diff options
Diffstat (limited to 'src/gallium')
620 files changed, 55215 insertions, 12208 deletions
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 63983c52201..136423513c6 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -54,13 +54,13 @@ install: ##### RULES ##### .c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ .cpp.o: - $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CXX) -c $(INCLUDES) $(DEFINES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ .S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ sinclude depend diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 8be84cddbe7..eea32b1314b 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -2,29 +2,7 @@ import os Import('*') -env = env.Clone() - -auxiliaries = [] - -Export('auxiliaries') - - -if llvm: - SConscript(['auxiliary/gallivm/SConscript']) - -SConscript([ - # NOTE: order matters! - 'auxiliary/util/SConscript', - 'auxiliary/rtasm/SConscript', - 'auxiliary/tgsi/SConscript', - 'auxiliary/cso_cache/SConscript', - 'auxiliary/translate/SConscript', - 'auxiliary/draw/SConscript', - 'auxiliary/pipebuffer/SConscript', - 'auxiliary/indices/SConscript', - 'auxiliary/rbug/SConscript', - 'auxiliary/vl/SConscript', -]) +SConscript('auxiliary/SConscript') for driver in env['drivers']: SConscript(os.path.join('drivers', driver, 'SConscript')) diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 5446eb68a98..e3af41c6e04 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -1,12 +1,177 @@ -# src/gallium/auxiliary/Makefile TOP = ../../.. include $(TOP)/configs/current -SUBDIRS = $(GALLIUM_AUXILIARY_DIRS) +LIBNAME = gallium -default install clean: - @for dir in $(SUBDIRS) ; do \ - if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE) $@) || exit 1; \ - fi \ - done +C_SOURCES = \ + cso_cache/cso_context.c \ + cso_cache/cso_cache.c \ + cso_cache/cso_hash.c \ + draw/draw_context.c \ + draw/draw_gs.c \ + draw/draw_pipe.c \ + draw/draw_pipe_aaline.c \ + draw/draw_pipe_aapoint.c \ + draw/draw_pipe_clip.c \ + draw/draw_pipe_cull.c \ + draw/draw_pipe_flatshade.c \ + draw/draw_pipe_offset.c \ + draw/draw_pipe_pstipple.c \ + draw/draw_pipe_stipple.c \ + draw/draw_pipe_twoside.c \ + draw/draw_pipe_unfilled.c \ + draw/draw_pipe_util.c \ + draw/draw_pipe_validate.c \ + draw/draw_pipe_vbuf.c \ + draw/draw_pipe_wide_line.c \ + draw/draw_pipe_wide_point.c \ + draw/draw_pt.c \ + draw/draw_pt_elts.c \ + draw/draw_pt_emit.c \ + draw/draw_pt_fetch.c \ + draw/draw_pt_fetch_emit.c \ + draw/draw_pt_fetch_shade_emit.c \ + draw/draw_pt_fetch_shade_pipeline.c \ + draw/draw_pt_post_vs.c \ + draw/draw_pt_util.c \ + draw/draw_pt_varray.c \ + draw/draw_pt_vcache.c \ + draw/draw_vertex.c \ + draw/draw_vs.c \ + draw/draw_vs_varient.c \ + draw/draw_vs_aos.c \ + draw/draw_vs_aos_io.c \ + draw/draw_vs_aos_machine.c \ + draw/draw_vs_exec.c \ + draw/draw_vs_llvm.c \ + draw/draw_vs_ppc.c \ + draw/draw_vs_sse.c \ + indices/u_indices_gen.c \ + indices/u_unfilled_gen.c \ + pipebuffer/pb_buffer_fenced.c \ + pipebuffer/pb_buffer_malloc.c \ + pipebuffer/pb_bufmgr_alt.c \ + pipebuffer/pb_bufmgr_cache.c \ + pipebuffer/pb_bufmgr_debug.c \ + pipebuffer/pb_bufmgr_fenced.c \ + pipebuffer/pb_bufmgr_mm.c \ + pipebuffer/pb_bufmgr_ondemand.c \ + pipebuffer/pb_bufmgr_pool.c \ + pipebuffer/pb_bufmgr_slab.c \ + pipebuffer/pb_validate.c \ + rbug/rbug_connection.c \ + rbug/rbug_core.c \ + rbug/rbug_texture.c \ + rbug/rbug_context.c \ + rbug/rbug_shader.c \ + rbug/rbug_demarshal.c \ + rtasm/rtasm_cpu.c \ + rtasm/rtasm_execmem.c \ + rtasm/rtasm_x86sse.c \ + rtasm/rtasm_ppc.c \ + rtasm/rtasm_ppc_spe.c \ + tgsi/tgsi_sanity.c \ + tgsi/tgsi_build.c \ + tgsi/tgsi_dump.c \ + tgsi/tgsi_exec.c \ + tgsi/tgsi_info.c \ + tgsi/tgsi_iterate.c \ + tgsi/tgsi_parse.c \ + tgsi/tgsi_ppc.c \ + tgsi/tgsi_scan.c \ + tgsi/tgsi_sse2.c \ + tgsi/tgsi_text.c \ + tgsi/tgsi_transform.c \ + tgsi/tgsi_ureg.c \ + tgsi/tgsi_util.c \ + translate/translate_generic.c \ + translate/translate_sse.c \ + translate/translate.c \ + translate/translate_cache.c \ + util/u_debug.c \ + util/u_debug_dump.c \ + util/u_debug_symbol.c \ + util/u_debug_stack.c \ + util/u_blit.c \ + util/u_blitter.c \ + util/u_cache.c \ + util/u_cpu_detect.c \ + util/u_dl.c \ + util/u_draw_quad.c \ + util/u_format.c \ + util/u_format_access.c \ + util/u_format_table.c \ + util/u_gen_mipmap.c \ + util/u_handle_table.c \ + util/u_hash_table.c \ + util/u_hash.c \ + util/u_keymap.c \ + util/u_linear.c \ + util/u_network.c \ + util/u_math.c \ + util/u_mm.c \ + util/u_rect.c \ + util/u_simple_shaders.c \ + util/u_snprintf.c \ + util/u_stream_stdc.c \ + util/u_stream_wd.c \ + util/u_surface.c \ + util/u_texture.c \ + util/u_tile.c \ + util/u_time.c \ + util/u_timed_winsys.c \ + util/u_upload_mgr.c \ + util/u_simple_screen.c \ + vl/vl_bitstream_parser.c \ + vl/vl_mpeg12_mc_renderer.c \ + vl/vl_compositor.c \ + vl/vl_csc.c \ + vl/vl_shader_build.c + +GALLIVM_SOURCES = \ + gallivm/gallivm.cpp \ + gallivm/gallivm_cpu.cpp \ + gallivm/instructions.cpp \ + gallivm/loweringpass.cpp \ + gallivm/tgsitollvm.cpp \ + gallivm/storage.cpp \ + gallivm/storagesoa.cpp \ + gallivm/instructionssoa.cpp + +INC_SOURCES = \ + gallivm/gallivm_builtins.cpp \ + gallivm/gallivmsoabuiltins.cpp + +# XXX: gallivm doesn't build correctly so disable for now +#ifeq ($(MESA_LLVM),1) +#DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +#CPP_SOURCES += \ +# $(GALLIVM_SOURCES) +#endif + + +include ../Makefile.template + + +gallivm/gallivm_builtins.cpp: gallivm/llvm_builtins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ + rm temp1.bin + +gallivm/gallivmsoabuiltins.cpp: gallivm/soabuiltins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ + rm temp2.bin + + +indices/u_indices_gen.c: indices/u_indices_gen.py + python $< > $@ + +indices/u_unfilled_gen.c: indices/u_unfilled_gen.py + python $< > $@ + +util/u_format_table.c: util/u_format_table.py util/u_format_parse.py util/u_format.csv + python util/u_format_table.py util/u_format.csv > $@ + +util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv + python util/u_format_access.py util/u_format.csv > $@ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript new file mode 100644 index 00000000000..782eb533863 --- /dev/null +++ b/src/gallium/auxiliary/SConscript @@ -0,0 +1,185 @@ +Import('*') + +from sys import executable as python_cmd + +env.Append(CPPPATH = [ + 'indices', + 'util', +]) + +env.CodeGenerate( + target = 'indices/u_indices_gen.c', + script = 'indices/u_indices_gen.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'indices/u_unfilled_gen.c', + script = 'indices/u_unfilled_gen.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'util/u_format_table.c', + script = 'util/u_format_table.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + +env.CodeGenerate( + target = 'util/u_format_access.c', + script = 'util/u_format_access.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + +source = [ + 'cso_cache/cso_context.c', + 'cso_cache/cso_cache.c', + 'cso_cache/cso_hash.c', + 'draw/draw_context.c', + 'draw/draw_pipe.c', + 'draw/draw_pipe_aaline.c', + 'draw/draw_pipe_aapoint.c', + 'draw/draw_pipe_clip.c', + 'draw/draw_pipe_cull.c', + 'draw/draw_pipe_flatshade.c', + 'draw/draw_pipe_offset.c', + 'draw/draw_pipe_pstipple.c', + 'draw/draw_pipe_stipple.c', + 'draw/draw_pipe_twoside.c', + 'draw/draw_pipe_unfilled.c', + 'draw/draw_pipe_util.c', + 'draw/draw_pipe_validate.c', + 'draw/draw_pipe_vbuf.c', + 'draw/draw_pipe_wide_line.c', + 'draw/draw_pipe_wide_point.c', + 'draw/draw_pt.c', + 'draw/draw_pt_elts.c', + 'draw/draw_pt_emit.c', + 'draw/draw_pt_fetch.c', + 'draw/draw_pt_fetch_emit.c', + 'draw/draw_pt_fetch_shade_emit.c', + 'draw/draw_pt_fetch_shade_pipeline.c', + 'draw/draw_pt_post_vs.c', + 'draw/draw_pt_util.c', + 'draw/draw_pt_varray.c', + 'draw/draw_pt_vcache.c', + 'draw/draw_vertex.c', + 'draw/draw_vs.c', + 'draw/draw_vs_aos.c', + 'draw/draw_vs_aos_io.c', + 'draw/draw_vs_aos_machine.c', + 'draw/draw_vs_exec.c', + 'draw/draw_vs_llvm.c', + 'draw/draw_vs_ppc.c', + 'draw/draw_vs_sse.c', + 'draw/draw_vs_varient.c', + 'draw/draw_gs.c', + #'indices/u_indices.c', + #'indices/u_unfilled_indices.c', + 'indices/u_indices_gen.c', + 'indices/u_unfilled_gen.c', + 'pipebuffer/pb_buffer_fenced.c', + 'pipebuffer/pb_buffer_malloc.c', + 'pipebuffer/pb_bufmgr_alt.c', + 'pipebuffer/pb_bufmgr_cache.c', + 'pipebuffer/pb_bufmgr_debug.c', + 'pipebuffer/pb_bufmgr_fenced.c', + 'pipebuffer/pb_bufmgr_mm.c', + 'pipebuffer/pb_bufmgr_ondemand.c', + 'pipebuffer/pb_bufmgr_pool.c', + 'pipebuffer/pb_bufmgr_slab.c', + 'pipebuffer/pb_validate.c', + 'rbug/rbug_core.c', + 'rbug/rbug_shader.c', + 'rbug/rbug_context.c', + 'rbug/rbug_texture.c', + 'rbug/rbug_demarshal.c', + 'rbug/rbug_connection.c', + 'rtasm/rtasm_cpu.c', + 'rtasm/rtasm_execmem.c', + 'rtasm/rtasm_x86sse.c', + 'rtasm/rtasm_ppc.c', + 'rtasm/rtasm_ppc_spe.c', + 'tgsi/tgsi_build.c', + 'tgsi/tgsi_dump.c', + 'tgsi/tgsi_dump_c.c', + 'tgsi/tgsi_exec.c', + 'tgsi/tgsi_info.c', + 'tgsi/tgsi_iterate.c', + 'tgsi/tgsi_parse.c', + 'tgsi/tgsi_sanity.c', + 'tgsi/tgsi_scan.c', + 'tgsi/tgsi_ppc.c', + 'tgsi/tgsi_sse2.c', + 'tgsi/tgsi_text.c', + 'tgsi/tgsi_transform.c', + 'tgsi/tgsi_ureg.c', + 'tgsi/tgsi_util.c', + 'translate/translate_generic.c', + 'translate/translate_sse.c', + 'translate/translate.c', + 'translate/translate_cache.c', + 'util/u_bitmask.c', + 'util/u_blit.c', + 'util/u_blitter.c', + 'util/u_cache.c', + 'util/u_cpu_detect.c', + 'util/u_debug.c', + 'util/u_debug_dump.c', + 'util/u_debug_memory.c', + 'util/u_debug_stack.c', + 'util/u_debug_symbol.c', + 'util/u_dl.c', + 'util/u_draw_quad.c', + 'util/u_format.c', + 'util/u_format_access.c', + 'util/u_format_table.c', + 'util/u_gen_mipmap.c', + 'util/u_handle_table.c', + 'util/u_hash.c', + 'util/u_hash_table.c', + 'util/u_keymap.c', + 'util/u_network.c', + 'util/u_math.c', + 'util/u_mm.c', + 'util/u_rect.c', + 'util/u_simple_shaders.c', + 'util/u_snprintf.c', + 'util/u_stream_stdc.c', + 'util/u_stream_wd.c', + 'util/u_surface.c', + 'util/u_texture.c', + 'util/u_tile.c', + 'util/u_time.c', + 'util/u_timed_winsys.c', + 'util/u_upload_mgr.c', + 'util/u_simple_screen.c', + 'vl/vl_bitstream_parser.c', + 'vl/vl_mpeg12_mc_renderer.c', + 'vl/vl_compositor.c', + 'vl/vl_csc.c', + 'vl/vl_shader_build.c', +] + +if env['llvm']: + source += [ + 'gallivm/gallivm.cpp', + 'gallivm/gallivm_cpu.cpp', + 'gallivm/instructions.cpp', + 'gallivm/loweringpass.cpp', + 'gallivm/tgsitollvm.cpp', + 'gallivm/storage.cpp', + 'gallivm/storagesoa.cpp', + 'gallivm/instructionssoa.cpp', + ] + +gallium = env.ConvenienceLibrary( + target = 'gallium', + source = source, +) + +Export('gallium') diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile deleted file mode 100644 index 8726afcd949..00000000000 --- a/src/gallium/auxiliary/cso_cache/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = cso_cache - -C_SOURCES = \ - cso_context.c \ - cso_cache.c \ - cso_hash.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript deleted file mode 100644 index 651e68a191a..00000000000 --- a/src/gallium/auxiliary/cso_cache/SConscript +++ /dev/null @@ -1,11 +0,0 @@ -Import('*') - -cso_cache = env.ConvenienceLibrary( - target = 'cso_cache', - source = [ - 'cso_context.c', - 'cso_cache.c', - 'cso_hash.c', - ]) - -auxiliaries.insert(0, cso_cache) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 4f13b3e2bad..2b16332e143 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -42,6 +42,7 @@ #include "cso_cache/cso_context.h" #include "cso_cache/cso_cache.h" #include "cso_cache/cso_hash.h" +#include "cso_context.h" struct cso_context { struct pipe_context *pipe; @@ -50,28 +51,43 @@ struct cso_context { struct { void *samplers[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; + + void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned nr_vertex_samplers; } hw; void *samplers[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; + void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned nr_vertex_samplers; + unsigned nr_samplers_saved; void *samplers_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_vertex_samplers_saved; + void *vertex_samplers_saved[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; uint nr_textures; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; + uint nr_vertex_textures; + uint nr_textures_saved; struct pipe_texture *textures_saved[PIPE_MAX_SAMPLERS]; + uint nr_vertex_textures_saved; + struct pipe_texture *vertex_textures_saved[PIPE_MAX_SAMPLERS]; + /** Current and saved state. * The saved state is used as a 1-deep stack. */ void *blend, *blend_saved; void *depth_stencil, *depth_stencil_saved; void *rasterizer, *rasterizer_saved; - void *fragment_shader, *fragment_shader_saved; - void *vertex_shader, *vertex_shader_saved; + void *fragment_shader, *fragment_shader_saved, *geometry_shader; + void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved; struct pipe_framebuffer_state fb, fb_saved; struct pipe_viewport_state vp, vp_saved; @@ -244,7 +260,9 @@ void cso_release_all( struct cso_context *ctx ) if (ctx->pipe) { ctx->pipe->bind_blend_state( ctx->pipe, NULL ); ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); - ctx->pipe->bind_sampler_states( ctx->pipe, 0, NULL ); + ctx->pipe->bind_fragment_sampler_states( ctx->pipe, 0, NULL ); + if (ctx->pipe->bind_vertex_sampler_states) + ctx->pipe->bind_vertex_sampler_states(ctx->pipe, 0, NULL); ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); ctx->pipe->bind_fs_state( ctx->pipe, NULL ); ctx->pipe->bind_vs_state( ctx->pipe, NULL ); @@ -255,6 +273,11 @@ void cso_release_all( struct cso_context *ctx ) pipe_texture_reference(&ctx->textures_saved[i], NULL); } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], NULL); + pipe_texture_reference(&ctx->vertex_textures_saved[i], NULL); + } + free_framebuffer_state(&ctx->fb); free_framebuffer_state(&ctx->fb_saved); @@ -378,6 +401,46 @@ enum pipe_error cso_single_sampler(struct cso_context *ctx, return PIPE_OK; } +enum pipe_error +cso_single_vertex_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void*)templ); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } + } + + ctx->vertex_samplers[idx] = handle; + return PIPE_OK; +} + void cso_single_sampler_done( struct cso_context *ctx ) { unsigned i; @@ -398,7 +461,36 @@ void cso_single_sampler_done( struct cso_context *ctx ) memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); ctx->hw.nr_samplers = ctx->nr_samplers; - ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); + ctx->pipe->bind_fragment_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); + } +} + +void +cso_single_vertex_sampler_done(struct cso_context *ctx) +{ + unsigned i; + + /* find highest non-null sampler */ + for (i = PIPE_MAX_VERTEX_SAMPLERS; i > 0; i--) { + if (ctx->vertex_samplers[i - 1] != NULL) + break; + } + + ctx->nr_vertex_samplers = i; + + if (ctx->hw.nr_vertex_samplers != ctx->nr_vertex_samplers || + memcmp(ctx->hw.vertex_samplers, + ctx->vertex_samplers, + ctx->nr_vertex_samplers * sizeof(void *)) != 0) + { + memcpy(ctx->hw.vertex_samplers, + ctx->vertex_samplers, + ctx->nr_vertex_samplers * sizeof(void *)); + ctx->hw.nr_vertex_samplers = ctx->nr_vertex_samplers; + + ctx->pipe->bind_vertex_sampler_states(ctx->pipe, + ctx->nr_vertex_samplers, + ctx->vertex_samplers); } } @@ -447,6 +539,21 @@ void cso_restore_samplers(struct cso_context *ctx) cso_single_sampler_done( ctx ); } +void +cso_save_vertex_samplers(struct cso_context *ctx) +{ + ctx->nr_vertex_samplers_saved = ctx->nr_vertex_samplers; + memcpy(ctx->vertex_samplers_saved, ctx->vertex_samplers, sizeof(ctx->vertex_samplers)); +} + +void +cso_restore_vertex_samplers(struct cso_context *ctx) +{ + ctx->nr_vertex_samplers = ctx->nr_vertex_samplers_saved; + memcpy(ctx->vertex_samplers, ctx->vertex_samplers_saved, sizeof(ctx->vertex_samplers)); + cso_single_vertex_sampler_done(ctx); +} + enum pipe_error cso_set_sampler_textures( struct cso_context *ctx, uint count, @@ -461,7 +568,7 @@ enum pipe_error cso_set_sampler_textures( struct cso_context *ctx, for ( ; i < PIPE_MAX_SAMPLERS; i++) pipe_texture_reference(&ctx->textures[i], NULL); - ctx->pipe->set_sampler_textures(ctx->pipe, count, textures); + ctx->pipe->set_fragment_sampler_textures(ctx->pipe, count, textures); return PIPE_OK; } @@ -491,13 +598,71 @@ void cso_restore_sampler_textures( struct cso_context *ctx ) for ( ; i < PIPE_MAX_SAMPLERS; i++) pipe_texture_reference(&ctx->textures[i], NULL); - ctx->pipe->set_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures); + ctx->pipe->set_fragment_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures); ctx->nr_textures_saved = 0; } +enum pipe_error +cso_set_vertex_sampler_textures(struct cso_context *ctx, + uint count, + struct pipe_texture **textures) +{ + uint i; + + ctx->nr_vertex_textures = count; + + for (i = 0; i < count; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], textures[i]); + } + for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], NULL); + } + + ctx->pipe->set_vertex_sampler_textures(ctx->pipe, count, textures); + + return PIPE_OK; +} + +void +cso_save_vertex_sampler_textures(struct cso_context *ctx) +{ + uint i; + + ctx->nr_vertex_textures_saved = ctx->nr_vertex_textures; + for (i = 0; i < ctx->nr_vertex_textures; i++) { + assert(!ctx->vertex_textures_saved[i]); + pipe_texture_reference(&ctx->vertex_textures_saved[i], ctx->vertex_textures[i]); + } +} + +void +cso_restore_vertex_sampler_textures(struct cso_context *ctx) +{ + uint i; + + ctx->nr_vertex_textures = ctx->nr_vertex_textures_saved; + + for (i = 0; i < ctx->nr_vertex_textures; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], NULL); + ctx->vertex_textures[i] = ctx->vertex_textures_saved[i]; + ctx->vertex_textures_saved[i] = NULL; + } + for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], NULL); + } + + ctx->pipe->set_vertex_sampler_textures(ctx->pipe, + ctx->nr_vertex_textures, + ctx->vertex_textures); + + ctx->nr_vertex_textures_saved = 0; +} + + + enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, const struct pipe_depth_stencil_alpha_state *templ) { @@ -863,3 +1028,38 @@ enum pipe_error cso_set_blend_color(struct cso_context *ctx, } return PIPE_OK; } + +enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx, + void *handle) +{ + if (ctx->geometry_shader != handle) { + ctx->geometry_shader = handle; + ctx->pipe->bind_gs_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_delete_geometry_shader(struct cso_context *ctx, void *handle) +{ + if (handle == ctx->geometry_shader) { + /* unbind before deleting */ + ctx->pipe->bind_gs_state(ctx->pipe, NULL); + ctx->geometry_shader = NULL; + } + ctx->pipe->delete_gs_state(ctx->pipe, handle); +} + +void cso_save_geometry_shader(struct cso_context *ctx) +{ + assert(!ctx->geometry_shader_saved); + ctx->geometry_shader_saved = ctx->geometry_shader; +} + +void cso_restore_geometry_shader(struct cso_context *ctx) +{ + if (ctx->geometry_shader_saved != ctx->geometry_shader) { + ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved); + ctx->geometry_shader = ctx->geometry_shader_saved; + } + ctx->geometry_shader_saved = NULL; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 69630e98bae..b9e313e32d6 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -84,6 +84,20 @@ enum pipe_error cso_single_sampler( struct cso_context *cso, void cso_single_sampler_done( struct cso_context *cso ); +void +cso_save_vertex_samplers(struct cso_context *cso); + +void +cso_restore_vertex_samplers(struct cso_context *cso); + +enum pipe_error +cso_single_vertex_sampler(struct cso_context *cso, + unsigned nr, + const struct pipe_sampler_state *states); + +void +cso_single_vertex_sampler_done(struct cso_context *cso); + enum pipe_error cso_set_sampler_textures( struct cso_context *cso, @@ -94,6 +108,17 @@ void cso_restore_sampler_textures( struct cso_context *cso ); +enum pipe_error +cso_set_vertex_sampler_textures(struct cso_context *cso, + uint count, + struct pipe_texture **textures); +void +cso_save_vertex_sampler_textures(struct cso_context *cso); +void +cso_restore_vertex_sampler_textures(struct cso_context *cso); + + + /* These aren't really sensible -- most of the time the api provides * object semantics for shaders anyway, and the cases where it doesn't * (eg mesa's internall-generated texenv programs), it will be up to @@ -121,6 +146,13 @@ void cso_save_vertex_shader(struct cso_context *cso); void cso_restore_vertex_shader(struct cso_context *cso); +enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx, + void *handle); +void cso_delete_geometry_shader(struct cso_context *ctx, void *handle); +void cso_save_geometry_shader(struct cso_context *cso); +void cso_restore_geometry_shader(struct cso_context *cso); + + enum pipe_error cso_set_framebuffer(struct cso_context *cso, const struct pipe_framebuffer_state *fb); diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile deleted file mode 100644 index 5041dcc072b..00000000000 --- a/src/gallium/auxiliary/draw/Makefile +++ /dev/null @@ -1,46 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = draw - -C_SOURCES = \ - draw_context.c \ - draw_pipe.c \ - draw_pipe_aaline.c \ - draw_pipe_aapoint.c \ - draw_pipe_clip.c \ - draw_pipe_cull.c \ - draw_pipe_flatshade.c \ - draw_pipe_offset.c \ - draw_pipe_pstipple.c \ - draw_pipe_stipple.c \ - draw_pipe_twoside.c \ - draw_pipe_unfilled.c \ - draw_pipe_util.c \ - draw_pipe_validate.c \ - draw_pipe_vbuf.c \ - draw_pipe_wide_line.c \ - draw_pipe_wide_point.c \ - draw_pt.c \ - draw_pt_elts.c \ - draw_pt_emit.c \ - draw_pt_fetch.c \ - draw_pt_fetch_emit.c \ - draw_pt_fetch_shade_emit.c \ - draw_pt_fetch_shade_pipeline.c \ - draw_pt_post_vs.c \ - draw_pt_util.c \ - draw_pt_varray.c \ - draw_pt_vcache.c \ - draw_vertex.c \ - draw_vs.c \ - draw_vs_varient.c \ - draw_vs_aos.c \ - draw_vs_aos_io.c \ - draw_vs_aos_machine.c \ - draw_vs_exec.c \ - draw_vs_llvm.c \ - draw_vs_ppc.c \ - draw_vs_sse.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript deleted file mode 100644 index 5f05aa324a5..00000000000 --- a/src/gallium/auxiliary/draw/SConscript +++ /dev/null @@ -1,46 +0,0 @@ -Import('*') - -draw = env.ConvenienceLibrary( - target = 'draw', - source = [ - 'draw_context.c', - 'draw_pipe.c', - 'draw_pipe_aaline.c', - 'draw_pipe_aapoint.c', - 'draw_pipe_clip.c', - 'draw_pipe_cull.c', - 'draw_pipe_flatshade.c', - 'draw_pipe_offset.c', - 'draw_pipe_pstipple.c', - 'draw_pipe_stipple.c', - 'draw_pipe_twoside.c', - 'draw_pipe_unfilled.c', - 'draw_pipe_util.c', - 'draw_pipe_validate.c', - 'draw_pipe_vbuf.c', - 'draw_pipe_wide_line.c', - 'draw_pipe_wide_point.c', - 'draw_pt.c', - 'draw_pt_elts.c', - 'draw_pt_emit.c', - 'draw_pt_fetch.c', - 'draw_pt_fetch_emit.c', - 'draw_pt_fetch_shade_emit.c', - 'draw_pt_fetch_shade_pipeline.c', - 'draw_pt_post_vs.c', - 'draw_pt_util.c', - 'draw_pt_varray.c', - 'draw_pt_vcache.c', - 'draw_vertex.c', - 'draw_vs.c', - 'draw_vs_aos.c', - 'draw_vs_aos_io.c', - 'draw_vs_aos_machine.c', - 'draw_vs_exec.c', - 'draw_vs_llvm.c', - 'draw_vs_ppc.c', - 'draw_vs_sse.c', - 'draw_vs_varient.c' - ]) - -auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index a4f1fcddc1a..667aa46b208 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -36,6 +36,7 @@ #include "draw_context.h" #include "draw_vbuf.h" #include "draw_vs.h" +#include "draw_gs.h" #include "draw_pt.h" #include "draw_pipe.h" @@ -67,6 +68,9 @@ struct draw_context *draw_create( void ) if (!draw_vs_init( draw )) goto fail; + if (!draw_gs_init( draw )) + goto fail; + return draw; fail: @@ -231,11 +235,19 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw, void draw_set_mapped_constant_buffer(struct draw_context *draw, - const void *buffer, + unsigned shader_type, + const void *buffer, unsigned size ) { - draw->pt.user.constants = buffer; - draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); + debug_assert(shader_type == PIPE_SHADER_VERTEX || + shader_type == PIPE_SHADER_GEOMETRY); + if (shader_type == PIPE_SHADER_VERTEX) { + draw->pt.user.vs_constants = buffer; + draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); + } else if (shader_type == PIPE_SHADER_GEOMETRY) { + draw->pt.user.gs_constants = buffer; + draw_gs_set_constants( draw, (const float (*)[4])buffer, size ); + } } @@ -298,7 +310,7 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable ) * a post-transformed vertex. * * With this function, drivers that use the draw module should have no reason - * to track the current vertex shader. + * to track the current vertex/geometry shader. * * Note that the draw module may sometimes generate vertices with extra * attributes (such as texcoords for AA lines). The driver can call this @@ -309,43 +321,59 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable ) * work for the drivers. */ int -draw_find_vs_output(const struct draw_context *draw, - uint semantic_name, uint semantic_index) +draw_find_shader_output(const struct draw_context *draw, + uint semantic_name, uint semantic_index) { const struct draw_vertex_shader *vs = draw->vs.vertex_shader; + const struct draw_geometry_shader *gs = draw->gs.geometry_shader; uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == semantic_name && - vs->info.output_semantic_index[i] == semantic_index) + const struct tgsi_shader_info *info = &vs->info; + + if (gs) + info = &gs->info; + + for (i = 0; i < info->num_outputs; i++) { + if (info->output_semantic_name[i] == semantic_name && + info->output_semantic_index[i] == semantic_index) return i; } /* XXX there may be more than one extra vertex attrib. * For example, simulated gl_FragCoord and gl_PointCoord. */ - if (draw->extra_vp_outputs.semantic_name == semantic_name && - draw->extra_vp_outputs.semantic_index == semantic_index) { - return draw->extra_vp_outputs.slot; + if (draw->extra_shader_outputs.semantic_name == semantic_name && + draw->extra_shader_outputs.semantic_index == semantic_index) { + return draw->extra_shader_outputs.slot; } + return 0; } /** - * Return number of vertex shader outputs. + * Return number of the shader outputs. + * + * If geometry shader is present, its output will be returned, + * if not vertex shader is used. */ uint -draw_num_vs_outputs(const struct draw_context *draw) +draw_num_shader_outputs(const struct draw_context *draw) { uint count = draw->vs.vertex_shader->info.num_outputs; - if (draw->extra_vp_outputs.slot > 0) + + /* if geometry shader is present, its outputs go to te + * driver, not the vertex shaders */ + if (draw->gs.geometry_shader) + count = draw->gs.geometry_shader->info.num_outputs; + + if (draw->extra_shader_outputs.slot > 0) count++; return count; } /** - * Provide TGSI sampler objects for vertex shaders that use texture fetches. + * Provide TGSI sampler objects for vertex/geometry shaders that use texture fetches. * This might only be used by software drivers for the time being. */ void @@ -355,6 +383,8 @@ draw_texture_samplers(struct draw_context *draw, { draw->vs.num_samplers = num_samplers; draw->vs.samplers = samplers; + draw->gs.num_samplers = num_samplers; + draw->gs.samplers = samplers; } @@ -366,13 +396,6 @@ void draw_set_render( struct draw_context *draw, draw->render = render; } -void draw_set_edgeflags( struct draw_context *draw, - const unsigned *edgeflag ) -{ - draw->pt.user.edgeflag = edgeflag; -} - - /** @@ -428,3 +451,18 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) draw->flushing = FALSE; } } + + +int draw_current_shader_outputs(struct draw_context *draw) +{ + if (draw->gs.geometry_shader) + return draw->gs.num_gs_outputs; + return draw->vs.num_vs_outputs; +} + +int draw_current_shader_position_output(struct draw_context *draw) +{ + if (draw->gs.geometry_shader) + return draw->gs.position_output; + return draw->vs.position_output; +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index d529e4e9a27..b716209df29 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -45,6 +45,7 @@ struct pipe_context; struct draw_context; struct draw_stage; struct draw_vertex_shader; +struct draw_geometry_shader; struct tgsi_sampler; @@ -85,11 +86,11 @@ draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe int -draw_find_vs_output(const struct draw_context *draw, - uint semantic_name, uint semantic_index); +draw_find_shader_output(const struct draw_context *draw, + uint semantic_name, uint semantic_index); uint -draw_num_vs_outputs(const struct draw_context *draw); +draw_num_shader_outputs(const struct draw_context *draw); void @@ -112,6 +113,17 @@ void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs); +/* + * Geometry shader functions + */ +struct draw_geometry_shader * +draw_create_geometry_shader(struct draw_context *draw, + const struct pipe_shader_state *shader); +void draw_bind_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dvs); +void draw_delete_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dvs); + /* * Vertex data functions @@ -140,12 +152,10 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); void draw_set_mapped_constant_buffer(struct draw_context *draw, + unsigned shader_type, const void *buffer, unsigned size ); -void draw_set_edgeflags( struct draw_context *draw, - const unsigned *edgeflag ); - /*********************************************************************** * draw_prim.c diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c new file mode 100644 index 00000000000..5db2e755423 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -0,0 +1,338 @@ +/************************************************************************** + * + * Copyright 2009 VMWare Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "draw_gs.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" + +#include "pipe/p_shader_tokens.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#define MAX_PRIM_VERTICES 6 +/* fixme: move it from here */ +#define MAX_PRIMITIVES 64 + +boolean +draw_gs_init( struct draw_context *draw ) +{ + draw->gs.machine = tgsi_exec_machine_create(); + if (!draw->gs.machine) + return FALSE; + + draw->gs.machine->Primitives = align_malloc( + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); + if (!draw->gs.machine->Primitives) + return FALSE; + memset(draw->gs.machine->Primitives, 0, + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); + + return TRUE; +} + + +void draw_gs_set_constants( struct draw_context *draw, + const float (*constants)[4], + unsigned size ) +{ +} + + +struct draw_geometry_shader * +draw_create_geometry_shader(struct draw_context *draw, + const struct pipe_shader_state *state) +{ + struct draw_geometry_shader *gs; + int i; + + gs = CALLOC_STRUCT(draw_geometry_shader); + + if (!gs) + return NULL; + + gs->state = *state; + gs->state.tokens = tgsi_dup_tokens(state->tokens); + if (!gs->state.tokens) { + FREE(gs); + return NULL; + } + + tgsi_scan_shader(state->tokens, &gs->info); + + /* setup the defaults */ + gs->input_primitive = PIPE_PRIM_TRIANGLES; + gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP; + gs->max_output_vertices = 32; + + for (i = 0; i < gs->info.num_properties; ++i) { + if (gs->info.properties[i].name == + TGSI_PROPERTY_GS_INPUT_PRIM) + gs->input_primitive = gs->info.properties[i].data[0]; + else if (gs->info.properties[i].name == + TGSI_PROPERTY_GS_OUTPUT_PRIM) + gs->output_primitive = gs->info.properties[i].data[0]; + else if (gs->info.properties[i].name == + TGSI_PROPERTY_GS_MAX_VERTICES) + gs->max_output_vertices = gs->info.properties[i].data[0]; + } + + gs->machine = draw->gs.machine; + + if (gs) + { + uint i; + for (i = 0; i < gs->info.num_outputs; i++) { + if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && + gs->info.output_semantic_index[i] == 0) + gs->position_output = i; + } + } + + return gs; +} + +void draw_bind_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dgs) +{ + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); + + if (dgs) { + draw->gs.geometry_shader = dgs; + draw->gs.num_gs_outputs = dgs->info.num_outputs; + draw->gs.position_output = dgs->position_output; + draw_geometry_shader_prepare(dgs, draw); + } + else { + draw->gs.geometry_shader = NULL; + draw->gs.num_gs_outputs = 0; + } +} + +void draw_delete_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dgs) +{ + FREE(dgs); +} + +static INLINE int num_vertices_for_prim(int prim) +{ + switch(prim) { + case PIPE_PRIM_POINTS: + return 1; + case PIPE_PRIM_LINES: + return 2; + case PIPE_PRIM_LINE_LOOP: + return 2; + case PIPE_PRIM_LINE_STRIP: + return 2; + case PIPE_PRIM_TRIANGLES: + return 3; + case PIPE_PRIM_TRIANGLE_STRIP: + return 3; + case PIPE_PRIM_TRIANGLE_FAN: + return 3; + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return 4; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return 6; + default: + assert(!"Bad geometry shader input"); + return 0; + } +} + +static void draw_fetch_geometry_input(struct draw_geometry_shader *shader, + int start_primitive, + int num_primitives, + const float (*input_ptr)[4], + unsigned input_vertex_stride, + unsigned inputs_from_vs) +{ + struct tgsi_exec_machine *machine = shader->machine; + unsigned slot, vs_slot, k, j; + unsigned num_vertices = num_vertices_for_prim(shader->input_primitive); + int idx = 0; + + for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; slot++) { + /*debug_printf("Slot = %d (semantic = %d)\n", slot, + shader->info.input_semantic_name[slot]);*/ + if (shader->info.input_semantic_name[slot] == + TGSI_SEMANTIC_PRIMID) { + for (j = 0; j < num_primitives; ++j) { + machine->Inputs[idx].xyzw[0].f[j] = (float)start_primitive + j; + machine->Inputs[idx].xyzw[1].f[j] = (float)start_primitive + j; + machine->Inputs[idx].xyzw[2].f[j] = (float)start_primitive + j; + machine->Inputs[idx].xyzw[3].f[j] = (float)start_primitive + j; + } + ++idx; + } else { + for (j = 0; j < num_primitives; ++j) { + int vidx = idx; + const float (*prim_ptr)[4]; + /*debug_printf(" %d) Prim (num_verts = %d)\n", start_primitive + j, + num_vertices);*/ + prim_ptr = (const float (*)[4])( + (const char *)input_ptr + + (j * num_vertices * input_vertex_stride)); + + for (k = 0; k < num_vertices; ++k, ++vidx) { + const float (*input)[4]; + input = (const float (*)[4])( + (const char *)prim_ptr + (k * input_vertex_stride)); + vidx = k * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; + /*debug_printf("\t%d)(%d) Input vert:\n", vidx, k);*/ +#if 1 + assert(!util_is_inf_or_nan(input[vs_slot][0])); + assert(!util_is_inf_or_nan(input[vs_slot][1])); + assert(!util_is_inf_or_nan(input[vs_slot][2])); + assert(!util_is_inf_or_nan(input[vs_slot][3])); +#endif + machine->Inputs[vidx].xyzw[0].f[j] = input[vs_slot][0]; + machine->Inputs[vidx].xyzw[1].f[j] = input[vs_slot][1]; + machine->Inputs[vidx].xyzw[2].f[j] = input[vs_slot][2]; + machine->Inputs[vidx].xyzw[3].f[j] = input[vs_slot][3]; +#if 0 + debug_printf("\t\t%d %f %f %f %f\n", slot, + machine->Inputs[vidx].xyzw[0].f[j], + machine->Inputs[vidx].xyzw[1].f[j], + machine->Inputs[vidx].xyzw[2].f[j], + machine->Inputs[vidx].xyzw[3].f[j]); +#endif + } + } + ++vs_slot; + idx += num_vertices; + } + } +} + +static INLINE void +draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, + int num_primitives, + float (*output)[4], + unsigned vertex_size) +{ + struct tgsi_exec_machine *machine = shader->machine; + unsigned prim_idx, j, slot; + + /* Unswizzle all output results. + */ + /* FIXME: handle all the primitives produced by the gs, not just + * the first one + unsigned prim_count = + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];*/ + for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { + unsigned num_verts_per_prim = machine->Primitives[0]; + for (j = 0; j < num_verts_per_prim; j++) { + int idx = (prim_idx * num_verts_per_prim + j) * + shader->info.num_outputs; +#ifdef DEBUG_OUTPUTS + debug_printf("%d) Output vert:\n", idx); +#endif + for (slot = 0; slot < shader->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[prim_idx]; + output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[prim_idx]; + output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[prim_idx]; + output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[prim_idx]; +#ifdef DEBUG_OUTPUTS + debug_printf("\t%d: %f %f %f %f\n", slot, + output[slot][0], + output[slot][1], + output[slot][2], + output[slot][3]); +#endif + debug_assert(!util_is_inf_or_nan(output[slot][0])); + } + output = (float (*)[4])((char *)output + vertex_size); + } + } +} + +void draw_geometry_shader_run(struct draw_geometry_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned vertex_size) +{ + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + unsigned num_vertices = num_vertices_for_prim(shader->input_primitive); + unsigned num_primitives = count/num_vertices; + unsigned inputs_from_vs = 0; + + machine->Consts = constants; + + for (i = 0; i < shader->info.num_inputs; ++i) { + if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID) + ++inputs_from_vs; + } + + for (i = 0; i < num_primitives; ++i) { + unsigned int max_primitives = 1; + + draw_fetch_geometry_input(shader, i, max_primitives, input, + input_stride, inputs_from_vs); + + tgsi_set_exec_mask(machine, + 1, + max_primitives > 1, + max_primitives > 2, + max_primitives > 3); + + /* run interpreter */ + tgsi_exec_machine_run(machine); + + draw_geometry_fetch_outputs(shader, max_primitives, + output, vertex_size); + } +} + +void draw_geometry_shader_delete(struct draw_geometry_shader *shader) +{ + FREE((void*) shader->state.tokens); + FREE(shader); +} + +void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, + struct draw_context *draw) +{ + if (shader->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(shader->machine, + shader->state.tokens, + draw->gs.num_samplers, + draw->gs.samplers); + } +} diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h new file mode 100644 index 00000000000..d6a97d9c4ef --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2009 VMWare Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DRAW_GS_H +#define DRAW_GS_H + +#include "draw_context.h" +#include "draw_private.h" + + +#define MAX_TGSI_PRIMITIVES 4 + +struct draw_context; + +/** + * Private version of the compiled geometry shader + */ +struct draw_geometry_shader { + struct draw_context *draw; + + struct tgsi_exec_machine *machine; + + /* This member will disappear shortly:*/ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + unsigned position_output; + + unsigned max_output_vertices; + unsigned input_primitive; + unsigned output_primitive; + + /* Extracted from shader: + */ + const float (*immediates)[4]; +}; + +void draw_geometry_shader_run(struct draw_geometry_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride); + +void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, + struct draw_context *draw); + +void draw_geometry_shader_delete(struct draw_geometry_shader *shader); + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 9f956715a22..4585dcdb48a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -35,6 +35,8 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" + +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -139,29 +141,29 @@ aa_transform_decl(struct tgsi_transform_context *ctx, struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; if (decl->Declaration.File == TGSI_FILE_OUTPUT && - decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && - decl->Semantic.SemanticIndex == 0) { - aactx->colorOutput = decl->DeclarationRange.First; + decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + decl->Semantic.Index == 0) { + aactx->colorOutput = decl->Range.First; } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { aactx->samplersUsed |= 1 << i; } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->DeclarationRange.Last; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && - (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { - aactx->maxGeneric = decl->Semantic.SemanticIndex; + if ((int) decl->Range.Last > aactx->maxInput) + aactx->maxInput = decl->Range.Last; + if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.Index > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.Index; } } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { aactx->tempsUsed |= (1 << i); } } @@ -228,30 +230,30 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /* XXX this could be linear... */ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->maxInput + 1; + decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; + decl.Semantic.Index = aactx->maxGeneric + 1; + decl.Range.First = + decl.Range.Last = aactx->maxInput + 1; ctx->emit_declaration(ctx, &decl); /* declare new sampler */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->freeSampler; + decl.Range.First = + decl.Range.Last = aactx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->texTemp; + decl.Range.First = + decl.Range.Last = aactx->texTemp; ctx->emit_declaration(ctx, &decl); decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->colorTemp; + decl.Range.First = + decl.Range.Last = aactx->colorTemp; ctx->emit_declaration(ctx, &decl); aactx->firstInstruction = FALSE; @@ -265,14 +267,15 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_TEX; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = aactx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->freeSampler; + newInst.Instruction.Texture = TRUE; + newInst.Texture.Texture = TGSI_TEXTURE_2D; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = aactx->maxInput + 1; + newInst.Src[1].Register.File = TGSI_FILE_SAMPLER; + newInst.Src[1].Register.Index = aactx->freeSampler; ctx->emit_instruction(ctx, &newInst); @@ -280,26 +283,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MOV; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; ctx->emit_instruction(ctx, &newInst); /* MUL alpha */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = aactx->texTemp; ctx->emit_instruction(ctx, &newInst); /* END */ @@ -316,11 +319,11 @@ aa_transform_inst(struct tgsi_transform_context *ctx, uint i; for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT && - dst->DstRegister.Index == aactx->colorOutput) { - dst->DstRegister.File = TGSI_FILE_TEMPORARY; - dst->DstRegister.Index = aactx->colorTemp; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + if (dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == aactx->colorOutput) { + dst->Register.File = TGSI_FILE_TEMPORARY; + dst->Register.Index = aactx->colorTemp; } } @@ -398,10 +401,9 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = MAX_TEXTURE_LEVEL; - texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.depth[0] = 1; - pf_get_block(texTemp.format, &texTemp.block); + texTemp.width0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth0 = 1; aaline->texture = screen->texture_create(screen, &texTemp); if (!aaline->texture) @@ -413,11 +415,11 @@ aaline_create_texture(struct aaline_stage *aaline) */ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { struct pipe_transfer *transfer; - const uint size = aaline->texture->width[level]; + const uint size = u_minify(aaline->texture->width0, level); ubyte *data; uint i, j; - assert(aaline->texture->width[level] == aaline->texture->height[level]); + assert(aaline->texture->width0 == aaline->texture->height0); /* This texture is new, no need to flush. */ @@ -658,13 +660,13 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) } /* update vertex attrib info */ - aaline->tex_slot = draw->vs.num_vs_outputs; - aaline->pos_slot = draw->vs.position_output; + aaline->tex_slot = draw_current_shader_outputs(draw); + aaline->pos_slot = draw_current_shader_position_output(draw);; /* advertise the extra post-transformed vertex attribute */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib; - draw->extra_vp_outputs.slot = aaline->tex_slot; + draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib; + draw->extra_shader_outputs.slot = aaline->tex_slot; /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ @@ -705,7 +707,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags) aaline->state.texture); draw->suspend_flushing = FALSE; - draw->extra_vp_outputs.slot = 0; + draw->extra_shader_outputs.slot = 0; } @@ -896,16 +898,16 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) aaline->driver_bind_fs_state = pipe->bind_fs_state; aaline->driver_delete_fs_state = pipe->delete_fs_state; - aaline->driver_bind_sampler_states = pipe->bind_sampler_states; - aaline->driver_set_sampler_textures = pipe->set_sampler_textures; + aaline->driver_bind_sampler_states = pipe->bind_fragment_sampler_states; + aaline->driver_set_sampler_textures = pipe->set_fragment_sampler_textures; /* override the driver's functions */ pipe->create_fs_state = aaline_create_fs_state; pipe->bind_fs_state = aaline_bind_fs_state; pipe->delete_fs_state = aaline_delete_fs_state; - pipe->bind_sampler_states = aaline_bind_sampler_states; - pipe->set_sampler_textures = aaline_set_sampler_textures; + pipe->bind_fragment_sampler_states = aaline_bind_sampler_states; + pipe->set_fragment_sampler_textures = aaline_set_sampler_textures; /* Install once everything is known to be OK: */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index ae1712fe122..d86717e5182 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -131,22 +131,22 @@ aa_transform_decl(struct tgsi_transform_context *ctx, struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; if (decl->Declaration.File == TGSI_FILE_OUTPUT && - decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && - decl->Semantic.SemanticIndex == 0) { - aactx->colorOutput = decl->DeclarationRange.First; + decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + decl->Semantic.Index == 0) { + aactx->colorOutput = decl->Range.First; } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->DeclarationRange.Last; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && - (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { - aactx->maxGeneric = decl->Semantic.SemanticIndex; + if ((int) decl->Range.Last > aactx->maxInput) + aactx->maxInput = decl->Range.Last; + if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.Index > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.Index; } } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { aactx->tempsUsed |= (1 << i); } } @@ -198,23 +198,23 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /* XXX this could be linear... */ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = texInput; + decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; + decl.Semantic.Index = aactx->maxGeneric + 1; + decl.Range.First = + decl.Range.Last = texInput; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = tmp0; + decl.Range.First = + decl.Range.Last = tmp0; ctx->emit_declaration(ctx, &decl); decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->colorTemp; + decl.Range.First = + decl.Range.Last = aactx->colorTemp; ctx->emit_declaration(ctx, &decl); aactx->firstInstruction = FALSE; @@ -234,30 +234,30 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = texInput; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; ctx->emit_instruction(ctx, &newInst); /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_ADD; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = tmp0; + newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y; ctx->emit_instruction(ctx, &newInst); #if NORMALIZE /* OPTIONAL normalization of length */ @@ -265,24 +265,24 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; ctx->emit_instruction(ctx, &newInst); /* RCP t0.x, t0.x; */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RCP; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; ctx->emit_instruction(ctx, &newInst); #endif @@ -290,16 +290,16 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SGT; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */ @@ -307,13 +307,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.Negate = 1; ctx->emit_instruction(ctx, &newInst); @@ -323,77 +323,77 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SUB; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = texInput; + newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* RCP t0.z, t0.z; # t0.z = 1 / m */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RCP; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* SUB t0.y, 1, t0.x; # d = 1 - d */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SUB; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = texInput; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = tmp0; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X; ctx->emit_instruction(ctx, &newInst); /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = tmp0; + newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SLE; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* CMP t0.w, -t0.y, tex.w, t0.w; @@ -405,29 +405,29 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_CMP; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 3; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.Negate = 1; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; + newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[2].Register.Index = tmp0; + newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W; + newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); } @@ -439,26 +439,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MOV; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; ctx->emit_instruction(ctx, &newInst); /* MUL result.color.w, colorTemp, tmp0.w; */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = aactx->tmp0; ctx->emit_instruction(ctx, &newInst); } else { @@ -468,11 +468,11 @@ aa_transform_inst(struct tgsi_transform_context *ctx, uint i; for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT && - dst->DstRegister.Index == aactx->colorOutput) { - dst->DstRegister.File = TGSI_FILE_TEMPORARY; - dst->DstRegister.Index = aactx->colorTemp; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + if (dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == aactx->colorOutput) { + dst->Register.File = TGSI_FILE_TEMPORARY; + dst->Register.Index = aactx->colorTemp; } } } @@ -687,14 +687,14 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) bind_aapoint_fragment_shader(aapoint); /* update vertex attrib info */ - aapoint->tex_slot = draw->vs.num_vs_outputs; + aapoint->tex_slot = draw_current_shader_outputs(draw); assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ - aapoint->pos_slot = draw->vs.position_output; + aapoint->pos_slot = draw_current_shader_position_output(draw); - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib; - draw->extra_vp_outputs.slot = aapoint->tex_slot; + draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib; + draw->extra_shader_outputs.slot = aapoint->tex_slot; /* find psize slot in post-transform vertex */ aapoint->psize_slot = -1; @@ -731,7 +731,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags) aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); draw->suspend_flushing = FALSE; - draw->extra_vp_outputs.slot = 0; + draw->extra_shader_outputs.slot = 0; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 0670268a196..205cda5eabe 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -114,8 +114,8 @@ static void interp( const struct clipper *clip, const struct vertex_header *out, const struct vertex_header *in ) { - const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs; - const unsigned pos_attr = clip->stage.draw->vs.position_output; + const unsigned nr_attrs = draw_current_shader_outputs(clip->stage.draw); + const unsigned pos_attr = draw_current_shader_position_output(clip->stage.draw); unsigned j; /* Vertex header. diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 0a70483858c..11b39db5990 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -55,7 +55,7 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) static void cull_tri( struct draw_stage *stage, struct prim_header *header ) { - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); /* Window coords: */ const float *v0 = header->v[0]->data[pos]; diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 40798a5d6e7..e829492423e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -63,7 +63,7 @@ static INLINE struct offset_stage *offset_stage( struct draw_stage *stage ) static void do_offset_tri( struct draw_stage *stage, struct prim_header *header ) { - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); struct offset_stage *offset = offset_stage(stage); float inv_det = 1.0f / header->det; diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 283502cdf3e..0cc2b718641 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -38,6 +38,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -133,20 +134,20 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { pctx->samplersUsed |= 1 << i; } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last); - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) - pctx->wincoordInput = (int) decl->DeclarationRange.First; + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last); + if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->Range.First; } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { pctx->tempsUsed |= (1 << i); } } @@ -226,25 +227,25 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; - decl.Semantic.SemanticIndex = 0; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = wincoordInput; + decl.Semantic.Name = TGSI_SEMANTIC_POSITION; + decl.Semantic.Index = 0; + decl.Range.First = + decl.Range.Last = wincoordInput; ctx->emit_declaration(ctx, &decl); } /* declare new sampler */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = pctx->freeSampler; + decl.Range.First = + decl.Range.Last = pctx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = pctx->texTemp; + decl.Range.First = + decl.Range.Last = pctx->texTemp; ctx->emit_declaration(ctx, &decl); /* emit immediate = {1/32, 1/32, 1, 1} @@ -280,27 +281,28 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; - newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = wincoordInput; + newInst.Src[1].Register.File = TGSI_FILE_IMMEDIATE; + newInst.Src[1].Register.Index = pctx->numImmed; ctx->emit_instruction(ctx, &newInst); /* TEX texTemp, texTemp, sampler; */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_TEX; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler; + newInst.Instruction.Texture = TRUE; + newInst.Texture.Texture = TGSI_TEXTURE_2D; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = pctx->texTemp; + newInst.Src[1].Register.File = TGSI_FILE_SAMPLER; + newInst.Src[1].Register.Index = pctx->freeSampler; ctx->emit_instruction(ctx, &newInst); /* KIL -texTemp; # if -texTemp < 0, KILL fragment */ @@ -308,9 +310,9 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = pctx->texTemp; + newInst.Src[0].Register.Negate = 1; ctx->emit_instruction(ctx, &newInst); } @@ -427,10 +429,9 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = 0; - texTemp.width[0] = 32; - texTemp.height[0] = 32; - texTemp.depth[0] = 1; - pf_get_block(texTemp.format, &texTemp.block); + texTemp.width0 = 32; + texTemp.height0 = 32; + texTemp.depth0 = 1; pstip->texture = screen->texture_create(screen, &texTemp); if (pstip->texture == NULL) @@ -754,8 +755,8 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->driver_bind_fs_state = pipe->bind_fs_state; pstip->driver_delete_fs_state = pipe->delete_fs_state; - pstip->driver_bind_sampler_states = pipe->bind_sampler_states; - pstip->driver_set_sampler_textures = pipe->set_sampler_textures; + pstip->driver_bind_sampler_states = pipe->bind_fragment_sampler_states; + pstip->driver_set_sampler_textures = pipe->set_fragment_sampler_textures; pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; /* override the driver's functions */ @@ -763,8 +764,8 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_fs_state = pstip_bind_fs_state; pipe->delete_fs_state = pstip_delete_fs_state; - pipe->bind_sampler_states = pstip_bind_sampler_states; - pipe->set_sampler_textures = pstip_set_sampler_textures; + pipe->bind_fragment_sampler_states = pstip_bind_sampler_states; + pipe->set_fragment_sampler_textures = pstip_set_sampler_textures; pipe->set_polygon_stipple = pstip_set_polygon_stipple; return TRUE; diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 6e921bac278..70fbab9ea76 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -73,7 +73,8 @@ screen_interp( struct draw_context *draw, const struct vertex_header *v1 ) { uint attr; - for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) { + int num_outputs = draw_current_shader_outputs(draw); + for (attr = 0; attr < num_outputs; attr++) { const float *val0 = v0->data[attr]; const float *val1 = v1->data[attr]; float *newv = dst->data[attr]; @@ -121,7 +122,7 @@ stipple_line(struct draw_stage *stage, struct prim_header *header) struct stipple_stage *stipple = stipple_stage(stage); struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); const float *pos0 = v0->data[pos]; const float *pos1 = v1->data[pos]; float start = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index f32cbef983d..3073c870825 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -59,7 +59,7 @@ static void wideline_line( struct draw_stage *stage, struct prim_header *header ) { /*const struct wideline_stage *wide = wideline_stage(stage);*/ - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); const float half_width = 0.5f * stage->draw->rasterizer->line_width; struct prim_header tri; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 7d76a7dbf39..8dc50c0ab43 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -112,7 +112,7 @@ static void set_texcoords(const struct widepoint_stage *wide, if (wide->point_coord_fs_input >= 0) { /* put gl_PointCoord into the extra vertex slot */ - uint slot = wide->stage.draw->extra_vp_outputs.slot; + uint slot = wide->stage.draw->extra_shader_outputs.slot; v->data[slot][0] = tc[0]; v->data[slot][1] = tc[1]; v->data[slot][2] = 0.0F; @@ -130,7 +130,7 @@ static void widepoint_point( struct draw_stage *stage, struct prim_header *header ) { const struct widepoint_stage *wide = widepoint_stage(stage); - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; float half_size; float left_adj, right_adj, bot_adj, top_adj; @@ -257,13 +257,13 @@ static void widepoint_first_point( struct draw_stage *stage, wide->point_coord_fs_input = find_pntc_input_attrib(draw); /* setup extra vp output (point coord implemented as a texcoord) */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = 0; - draw->extra_vp_outputs.slot = draw->vs.num_vs_outputs; + draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_shader_outputs.semantic_index = 0; + draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw); } else { wide->point_coord_fs_input = -1; - draw->extra_vp_outputs.slot = 0; + draw->extra_shader_outputs.slot = 0; } wide->psize_slot = -1; @@ -287,7 +287,7 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags ) { stage->point = widepoint_first_point; stage->next->flush( stage->next, flags ); - stage->draw->extra_vp_outputs.slot = 0; + stage->draw->extra_shader_outputs.slot = 0; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 41fcb16a0a5..e49041556bd 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -142,8 +142,6 @@ struct draw_context /* user-space vertex data, buffers */ struct { - const unsigned *edgeflag; - /** vertex element/index buffer (ex: glDrawElements) */ const void *elts; /** bytes per index (0, 1, 2 or 4) */ @@ -154,8 +152,9 @@ struct draw_context /** vertex arrays */ const void *vbuffer[PIPE_MAX_ATTRIBS]; - /** constant buffer (for vertex shader) */ - const void *constants; + /** constant buffer (for vertex/geometry shader) */ + const void *vs_constants; + const void *gs_constants; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ @@ -184,6 +183,7 @@ struct draw_context struct draw_vertex_shader *vertex_shader; uint num_vs_outputs; /**< convenience, from vertex_shader */ uint position_output; + uint edgeflag_output; /** TGSI program interpreter runtime state */ struct tgsi_exec_machine *machine; @@ -212,6 +212,18 @@ struct draw_context struct translate_cache *emit_cache; } vs; + struct { + struct draw_geometry_shader *geometry_shader; + uint num_gs_outputs; /**< convenience, from geometry_shader */ + uint position_output; + + /** TGSI program interpreter runtime state */ + struct tgsi_exec_machine *machine; + + uint num_samplers; + struct tgsi_sampler **samplers; + } gs; + /* Clip derived state: */ float plane[12][4]; @@ -223,7 +235,7 @@ struct draw_context uint semantic_name; uint semantic_index; int slot; - } extra_vp_outputs; + } extra_shader_outputs; unsigned reduced_prim; @@ -246,6 +258,19 @@ void draw_vs_set_constants( struct draw_context *, +/******************************************************************************* + * Geometry shading code: + */ +boolean draw_gs_init( struct draw_context *draw ); +void draw_gs_set_constants( struct draw_context *, + const float (*constants)[4], + unsigned size ); + +/******************************************************************************* + * Common shading code: + */ +int draw_current_shader_outputs(struct draw_context *draw); +int draw_current_shader_position_output(struct draw_context *draw); /******************************************************************************* * Vertex processing (was passthrough) code: diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 4865a2d8542..2801dbafe47 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -314,12 +314,3 @@ draw_arrays(struct draw_context *draw, unsigned prim, /* drawing done here: */ draw_pt_arrays(draw, prim, start, count); } - -boolean draw_pt_get_edgeflag( struct draw_context *draw, - unsigned idx ) -{ - if (draw->pt.user.edgeflag) - return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0; - else - return 1; -} diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 7a17a9fb6b2..20edf7a227e 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -149,11 +149,6 @@ struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); -/* More helpers: - */ -boolean draw_pt_get_edgeflag( struct draw_context *draw, - unsigned idx ); - /******************************************************************************* * HW vertex emit: @@ -217,7 +212,8 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, boolean bypass_clipping, boolean bypass_viewport, - boolean opengl ); + boolean opengl, + boolean need_edgeflags ); struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 65c3a34c347..305bfef4352 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -42,11 +42,11 @@ struct pt_fetch { struct translate *translate; unsigned vertex_size; - boolean need_edgeflags; struct translate_cache *cache; }; + /* Perform the fetch from API vertex elements & vertex buffers, to a * contiguous set of float[4] attributes as required for the * vertex_shader->run_linear() method. @@ -120,7 +120,12 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->translate = translate_cache_find(fetch->cache, &key); { - static struct vertex_header vh = { 0, 1, 0, UNDEFINED_VERTEX_ID, { .0f, .0f, .0f, .0f } }; + static struct vertex_header vh = { 0, + 1, + 0, + UNDEFINED_VERTEX_ID, + { .0f, .0f, .0f, .0f } }; + fetch->translate->set_buffer(fetch->translate, draw->pt.nr_vertex_buffers, &vh, @@ -128,9 +133,6 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, } } - fetch->need_edgeflags = ((draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) && - draw->pt.user.edgeflag); } @@ -158,17 +160,6 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, count, verts ); - /* Edgeflags are hard to fit into a translate program, populate - * them separately if required. In the setup above they are - * defaulted to one, so only need this if there is reason to change - * that default: - */ - if (fetch->need_edgeflags) { - for (i = 0; i < count; i++) { - struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size); - vh->edgeflag = draw_pt_get_edgeflag( draw, elts[i] ); - } - } } @@ -193,18 +184,6 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, start, count, verts ); - - /* Edgeflags are hard to fit into a translate program, populate - * them separately if required. In the setup above they are - * defaulted to one, so only need this if there is reason to change - * that default: - */ - if (fetch->need_edgeflags) { - for (i = 0; i < count; i++) { - struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size); - vh->edgeflag = draw_pt_get_edgeflag( draw, start + i ); - } - } } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index df6c265b7ec..1a9df4cac5d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -32,6 +32,7 @@ #include "draw/draw_vertex.h" #include "draw/draw_pt.h" #include "draw/draw_vs.h" +#include "draw/draw_gs.h" #include "translate/translate.h" @@ -85,9 +86,9 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, draw_pt_post_vs_prepare( fpme->post_vs, (boolean)draw->bypass_clipping, (boolean)(draw->identity_viewport || - draw->rasterizer->bypass_vs_clip_and_viewport), - (boolean)draw->rasterizer->gl_rasterization_rules ); - + draw->rasterizer->bypass_vs_clip_and_viewport), + (boolean)draw->rasterizer->gl_rasterization_rules, + (draw->vs.edgeflag_output ? true : false) ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, @@ -119,7 +120,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( fetch_count, 4 ); @@ -147,13 +149,21 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, */ if (opt & PT_SHADE) { - shader->run_linear(shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); + vshader->run_linear(vshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.vs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + if (gshader) + draw_geometry_shader_run(gshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.gs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, @@ -196,6 +206,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( count, 4 ); @@ -226,10 +237,19 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.constants, + (const float (*)[4])draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); + + if (geometry_shader) + draw_geometry_shader_run(geometry_shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.gs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, @@ -270,6 +290,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( count, 4 ); @@ -296,10 +317,19 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.constants, + (const float (*)[4])draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); + + if (geometry_shader) + draw_geometry_shader_run(geometry_shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.gs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 6c1cb48e8b8..55151823a14 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -100,7 +100,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; - const unsigned pos = pvs->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned clipped = 0; unsigned j; @@ -147,6 +147,39 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, +/* As above plus edgeflags + */ +static boolean +post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + unsigned j; + boolean needpipe; + + needpipe = post_vs_cliptest_viewport_gl( pvs, vertices, count, stride); + + /* If present, copy edgeflag VS output into vertex header. + * Otherwise, leave header as is. + */ + if (pvs->draw->vs.edgeflag_output) { + struct vertex_header *out = vertices; + int ef = pvs->draw->vs.edgeflag_output; + + for (j = 0; j < count; j++) { + const float *edgeflag = out->data[ef]; + out->edgeflag = !(edgeflag[0] != 1.0f); + needpipe |= !out->edgeflag; + out = (struct vertex_header *)( (char *)out + stride ); + } + } + return needpipe; +} + + + + /* If bypass_clipping is set, skip cliptest and rhw divide. */ static boolean post_vs_viewport( struct pt_post_vs *pvs, @@ -157,7 +190,7 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; - const unsigned pos = pvs->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned j; if (0) debug_printf("%s\n", __FUNCTION__); @@ -201,17 +234,29 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, boolean bypass_clipping, boolean bypass_viewport, - boolean opengl ) + boolean opengl, + boolean need_edgeflags ) { - if (bypass_clipping) { - if (bypass_viewport) - pvs->run = post_vs_none; - else - pvs->run = post_vs_viewport; + if (!need_edgeflags) { + if (bypass_clipping) { + if (bypass_viewport) + pvs->run = post_vs_none; + else + pvs->run = post_vs_viewport; + } + else { + /* if (opengl) */ + pvs->run = post_vs_cliptest_viewport_gl; + } } else { - /* if (opengl) */ - pvs->run = post_vs_cliptest_viewport_gl; + /* If we need to copy edgeflags to the vertex header, it should + * mean we're running the primitive pipeline. Hence the bypass + * flags should be false. + */ + assert(!bypass_clipping); + assert(!bypass_viewport); + pvs->run = post_vs_cliptest_viewport_gl_edgeflag; } } diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index b61fa291436..17c3b8cec26 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -50,16 +50,32 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) *first = 2; *incr = 1; break; + case PIPE_PRIM_LINES_ADJACENCY: + *first = 4; + *incr = 2; + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + *first = 4; + *incr = 1; + break; case PIPE_PRIM_TRIANGLES: *first = 3; *incr = 3; break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + *first = 6; + *incr = 3; + break; case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_TRIANGLE_FAN: case PIPE_PRIM_POLYGON: *first = 3; *incr = 1; break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + *first = 6; + *incr = 1; + break; case PIPE_PRIM_QUADS: *first = 4; *incr = 4; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index 010c7a18a7c..f0aec5febab 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -36,6 +36,10 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_QUADS: case PIPE_PRIM_QUAD_STRIP: + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: for (j = 0; j < count;) { unsigned remaining = count - j; unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr ); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index d3f179ced18..757c4874545 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -346,7 +346,8 @@ vcache_check_run( struct draw_pt_front_end *frontend, vcache->fetch_max, draw_count); - if (max_index == 0xffffffff || + if (max_index >= DRAW_PIPE_MAX_VERTICES || + fetch_count >= UNDEFINED_VERTEX_ID || fetch_count > draw_count) { if (0) debug_printf("fail\n"); goto fail; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 790e89ed820..35536895326 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -101,6 +101,9 @@ draw_create_vertex_shader(struct draw_context *draw, if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && vs->info.output_semantic_index[i] == 0) vs->position_output = i; + else if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_EDGEFLAG && + vs->info.output_semantic_index[i] == 0) + vs->edgeflag_output = i; } } @@ -120,6 +123,7 @@ draw_bind_vertex_shader(struct draw_context *draw, draw->vs.vertex_shader = dvs; draw->vs.num_vs_outputs = dvs->info.num_outputs; draw->vs.position_output = dvs->position_output; + draw->vs.edgeflag_output = dvs->edgeflag_output; dvs->prepare( dvs, draw ); } else { diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 89ae158751a..e3b807ebd0e 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -107,6 +107,7 @@ struct draw_vertex_shader { struct tgsi_shader_info info; unsigned position_output; + unsigned edgeflag_output; /* Extracted from shader: */ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 88bc790b621..1aaae4ab7a4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -361,8 +361,8 @@ static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp, static struct x86_reg get_dst_ptr( struct aos_compilation *cp, const struct tgsi_full_dst_register *dst ) { - unsigned file = dst->DstRegister.File; - unsigned idx = dst->DstRegister.Index; + unsigned file = dst->Register.File; + unsigned idx = dst->Register.Index; unsigned i; @@ -529,8 +529,8 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, const struct tgsi_full_src_register *src ) { struct x86_reg arg0 = aos_get_shader_reg(cp, - src->SrcRegister.File, - src->SrcRegister.Index); + src->Register.File, + src->Register.Index); unsigned i; ubyte swz = 0; unsigned negs = 0; @@ -620,8 +620,8 @@ static void x87_fld_src( struct aos_compilation *cp, unsigned channel ) { struct x86_reg arg0 = aos_get_shader_reg_ptr(cp, - src->SrcRegister.File, - src->SrcRegister.Index); + src->Register.File, + src->Register.Index); unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, channel ); unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel ); @@ -669,15 +669,15 @@ static void store_dest( struct aos_compilation *cp, { struct x86_reg dst; - switch (reg->DstRegister.WriteMask) { + switch (reg->Register.WriteMask) { case 0: return; case TGSI_WRITEMASK_XYZW: aos_adopt_xmm_reg(cp, get_xmm_writable(cp, result), - reg->DstRegister.File, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.Index, TRUE); return; default: @@ -685,10 +685,10 @@ static void store_dest( struct aos_compilation *cp, } dst = aos_get_shader_reg_xmm(cp, - reg->DstRegister.File, - reg->DstRegister.Index); + reg->Register.File, + reg->Register.Index); - switch (reg->DstRegister.WriteMask) { + switch (reg->Register.WriteMask) { case TGSI_WRITEMASK_X: sse_movss(cp->func, dst, get_xmm(cp, result)); break; @@ -710,14 +710,14 @@ static void store_dest( struct aos_compilation *cp, break; default: - mask_write(cp, dst, result, reg->DstRegister.WriteMask); + mask_write(cp, dst, result, reg->Register.WriteMask); break; } aos_adopt_xmm_reg(cp, dst, - reg->DstRegister.File, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.Index, TRUE); } @@ -737,7 +737,7 @@ static void store_scalar_dest( struct aos_compilation *cp, const struct tgsi_full_dst_register *reg, struct x86_reg result ) { - unsigned writemask = reg->DstRegister.WriteMask; + unsigned writemask = reg->Register.WriteMask; struct x86_reg dst; if (writemask != TGSI_WRITEMASK_X && @@ -754,12 +754,12 @@ static void store_scalar_dest( struct aos_compilation *cp, result = get_xmm(cp, result); dst = aos_get_shader_reg_xmm(cp, - reg->DstRegister.File, - reg->DstRegister.Index); + reg->Register.File, + reg->Register.Index); - switch (reg->DstRegister.WriteMask) { + switch (reg->Register.WriteMask) { case TGSI_WRITEMASK_X: sse_movss(cp->func, dst, result); break; @@ -782,8 +782,8 @@ static void store_scalar_dest( struct aos_compilation *cp, aos_adopt_xmm_reg(cp, dst, - reg->DstRegister.File, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.Index, TRUE); } @@ -819,7 +819,7 @@ static void x87_fstp_dest4( struct aos_compilation *cp, const struct tgsi_full_dst_register *dst ) { struct x86_reg ptr = get_dst_ptr(cp, dst); - unsigned writemask = dst->DstRegister.WriteMask; + unsigned writemask = dst->Register.WriteMask; x87_fst_or_nop(cp->func, writemask, 0, ptr); x87_fst_or_nop(cp->func, writemask, 1, ptr); @@ -956,7 +956,7 @@ static void emit_print( struct aos_compilation *cp, static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); struct x86_reg tmp = aos_get_xmm_reg(cp); @@ -964,27 +964,27 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst sse_mulps(cp->func, tmp, neg); sse_maxps(cp->func, tmp, arg0); - store_dest(cp, &op->FullDstRegisters[0], tmp); + store_dest(cp, &op->Dst[0], tmp); return TRUE; } static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_addps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fld_src(cp, &op->Src[0], 0); x87_fcos(cp->func); - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } @@ -993,8 +993,8 @@ static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_inst */ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg dst = get_xmm_writable(cp, arg0); @@ -1007,14 +1007,14 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg dst = get_xmm_writable(cp, arg0); @@ -1028,14 +1028,14 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg dst = get_xmm_writable(cp, arg0); @@ -1051,14 +1051,14 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); @@ -1073,25 +1073,25 @@ static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_inst sse_mulps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { x87_fld1(cp->func); /* 1 */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 1 */ + x87_fld_src(cp, &op->Src[0], 0); /* a0 1 */ x87_fyl2x(cp->func); /* log2(a0) */ - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } #if 0 static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fld_src(cp, &op->Src[0], 0); x87_emit_ex2(cp); - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } #endif @@ -1099,8 +1099,8 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); + unsigned writemask = op->Dst[0].Register.WriteMask; int i; set_fpu_round_neg_inf( cp ); @@ -1109,7 +1109,7 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst */ for (i = 3; i >= 0; i--) { if (writemask & (1<<i)) { - x87_fld_src(cp, &op->FullSrcRegisters[0], i); + x87_fld_src(cp, &op->Src[0], i); } } @@ -1126,8 +1126,8 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); + unsigned writemask = op->Dst[0].Register.WriteMask; int i; set_fpu_round_nearest( cp ); @@ -1136,7 +1136,7 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst */ for (i = 3; i >= 0; i--) { if (writemask & (1<<i)) { - x87_fld_src(cp, &op->FullSrcRegisters[0], i); + x87_fld_src(cp, &op->Src[0], i); } } @@ -1153,10 +1153,10 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); struct x86_reg st0 = x86_make_reg(file_x87, 0); struct x86_reg st1 = x86_make_reg(file_x87, 1); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].Register.WriteMask; int i; set_fpu_round_neg_inf( cp ); @@ -1166,7 +1166,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst */ for (i = 3; i >= 0; i--) { if (writemask & (1<<i)) { - x87_fld_src(cp, &op->FullSrcRegisters[0], i); + x87_fld_src(cp, &op->Src[0], i); } } @@ -1190,7 +1190,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].Register.WriteMask; unsigned lit_count = cp->lit_count++; struct x86_reg result, arg0; unsigned i; @@ -1209,10 +1209,10 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst if (writemask != TGSI_WRITEMASK_XYZW) result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0])); else - result = get_dst_ptr(cp, &op->FullDstRegisters[0]); + result = get_dst_ptr(cp, &op->Dst[0]); - arg0 = fetch_src( cp, &op->FullSrcRegisters[0] ); + arg0 = fetch_src( cp, &op->Src[0] ); if (arg0.file == file_XMM) { struct x86_reg tmp = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[1])); @@ -1259,7 +1259,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst if (writemask != TGSI_WRITEMASK_XYZW) { store_dest( cp, - &op->FullDstRegisters[0], + &op->Dst[0], get_xmm_writable( cp, result ) ); } @@ -1269,8 +1269,8 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst #if 0 static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); + unsigned writemask = op->Dst[0].Register.WriteMask; if (writemask & TGSI_WRITEMASK_YZ) { struct x86_reg st1 = x86_make_reg(file_x87, 1); @@ -1286,13 +1286,13 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu */ x87_fldz(cp->func); /* 1 0 */ #endif - x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */ + x87_fld_src(cp, &op->Src[0], 1); /* a1 1 0 */ x87_fcomi(cp->func, st2); /* a1 1 0 */ x87_fcmovb(cp->func, st1); /* a1' 1 0 */ x87_fstp(cp->func, st1); /* a1' 0 */ x87_fstp(cp->func, st1); /* a1' */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 3); /* a3 a1' */ + x87_fld_src(cp, &op->Src[0], 3); /* a3 a1' */ x87_fxch(cp->func, st1); /* a1' a3 */ @@ -1305,7 +1305,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu /* a0' = max2(a0, 0): */ x87_fldz(cp->func); /* 0 r2 */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */ + x87_fld_src(cp, &op->Src[0], 0); /* a0 0 r2 */ x87_fcomi(cp->func, st1); x87_fcmovb(cp->func, st1); /* a0' 0 r2 */ @@ -1333,58 +1333,58 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_maxps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_minps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg dst = get_xmm_writable(cp, arg0); /* potentially nothing to do */ - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_mulps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); + struct x86_reg arg2 = fetch_src(cp, &op->Src[2]); /* If we can't clobber old contents of arg0, get a temporary & copy * it there, then clobber it... @@ -1393,7 +1393,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst sse_mulps(cp->func, arg0, arg1); sse_addps(cp->func, arg0, arg2); - store_dest(cp, &op->FullDstRegisters[0], arg0); + store_dest(cp, &op->Dst[0], arg0); return TRUE; } @@ -1425,13 +1425,13 @@ static float PIPE_CDECL _exp2(float x) static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { #if 0 - x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */ + x87_fld_src(cp, &op->Src[1], 0); /* a1.x */ + x87_fld_src(cp, &op->Src[0], 0); /* a0.x a1.x */ x87_fyl2x(cp->func); /* a1*log2(a0) */ x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */ - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); #else uint i; @@ -1450,9 +1450,9 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) ); - x87_fld_src( cp, &op->FullSrcRegisters[1], 0 ); + x87_fld_src( cp, &op->Src[1], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fld_src( cp, &op->Src[0], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); /* tmp_EAX has been pushed & will be restored below */ @@ -1467,7 +1467,7 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst */ cp->func->x87_stack++; - x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + x87_fstp_dest4( cp, &op->Dst[0] ); #endif return TRUE; } @@ -1493,7 +1493,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fld_src( cp, &op->Src[0], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); /* tmp_EAX has been pushed & will be restored below */ @@ -1508,7 +1508,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full */ cp->func->x87_stack++; - x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + x87_fstp_dest4( cp, &op->Dst[0] ); return TRUE; } @@ -1517,7 +1517,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg dst = aos_get_xmm_reg(cp); if (cp->have_sse2) { @@ -1531,7 +1531,7 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst sse_divss(cp->func, dst, arg0); } - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } @@ -1551,14 +1551,14 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { if (0) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg r = aos_get_xmm_reg(cp); sse_rsqrtss(cp->func, r, arg0); - store_scalar_dest(cp, &op->FullDstRegisters[0], r); + store_scalar_dest(cp, &op->Dst[0], r); return TRUE; } else { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg r = aos_get_xmm_reg(cp); struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); @@ -1578,7 +1578,7 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */ sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */ - store_scalar_dest(cp, &op->FullDstRegisters[0], r); + store_scalar_dest(cp, &op->Dst[0], r); aos_release_xmm_reg(cp, tmp.idx); @@ -1589,23 +1589,23 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_NotLessThan); sse_andps(cp->func, dst, ones); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fld_src(cp, &op->Src[0], 0); x87_fsin(cp->func); - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } @@ -1613,46 +1613,46 @@ static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_LessThan); sse_andps(cp->func, dst, ones); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_subps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg tmp0 = aos_get_xmm_reg(cp); sse2_cvttps2dq(cp->func, tmp0, arg0); sse2_cvtdq2ps(cp->func, tmp0, tmp0); - store_dest(cp, &op->FullDstRegisters[0], tmp0); + store_dest(cp, &op->Dst[0], tmp0); return TRUE; } static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp0 = aos_get_xmm_reg(cp); struct x86_reg tmp1 = aos_get_xmm_reg(cp); @@ -1670,7 +1670,7 @@ static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_inst aos_release_xmm_reg(cp, tmp0.idx); - store_dest(cp, &op->FullDstRegisters[0], tmp1); + store_dest(cp, &op->Dst[0], tmp1); return TRUE; } @@ -1897,10 +1897,10 @@ static void find_last_write_outputs( struct aos_compilation *cp ) continue; for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) { - if (parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.File == + if (parse.FullToken.FullInstruction.Dst[i].Register.File == TGSI_FILE_OUTPUT) { - unsigned idx = parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.Index; + unsigned idx = parse.FullToken.FullInstruction.Dst[i].Register.Index; cp->output_last_write[idx] = this_instruction; } } diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 7ee567d4789..d16692584e5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -147,11 +147,12 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); + /* FIXME: geometry shading? */ if (vsvg->base.key.clip) { /* not really handling clipping, just do the rhw so we can @@ -207,7 +208,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile deleted file mode 100644 index 5a96d94ec37..00000000000 --- a/src/gallium/auxiliary/gallivm/Makefile +++ /dev/null @@ -1,92 +0,0 @@ -# -*-makefile-*- -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = gallivm - - -GALLIVM_SOURCES = \ - gallivm.cpp \ - gallivm_cpu.cpp \ - instructions.cpp \ - loweringpass.cpp \ - tgsitollvm.cpp \ - storage.cpp \ - storagesoa.cpp \ - instructionssoa.cpp - -INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp - -CPP_SOURCES = \ - $(GALLIVM_SOURCES) - -C_SOURCES = -ASM_SOURCES = - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(CPP_SOURCES:.cpp=.o) \ - $(ASM_SOURCES:.S=.o) - -### Include directories -INCLUDES = \ - -I. \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.cpp.o: - $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -##### TARGETS ##### - -default:: depend symlinks $(LIBNAME) - - -$(LIBNAME): $(OBJECTS) Makefile - $(TOP)/bin/mklib -o $@ -static $(OBJECTS) - - -depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ - $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null - - -gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin - (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp1.bin - -gallivmsoabuiltins.cpp: soabuiltins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin - (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp2.bin - -# Emacs tags -tags: - etags `find . -name \*.[ch]` `find ../include` - - -# Remove .o and backup files -clean: - -rm -f *.o */*.o *~ *.so *~ server/*.o - -rm -f depend depend.bak - -rm -f gallivm_builtins.cpp - -rm -f gallivmsoabuiltins.cpp - -symlinks: - - -include depend diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript deleted file mode 100644 index c0aa51b90a9..00000000000 --- a/src/gallium/auxiliary/gallivm/SConscript +++ /dev/null @@ -1,16 +0,0 @@ -Import('*') - -gallivm = env.ConvenienceLibrary( - target = 'gallivm', - source = [ - 'gallivm.cpp', - 'gallivm_cpu.cpp', - 'instructions.cpp', - 'loweringpass.cpp', - 'tgsitollvm.cpp', - 'storage.cpp', - 'storagesoa.cpp', - 'instructionssoa.cpp', - ]) - -auxiliaries.insert(0, gallivm) diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index bf84401e112..8f7d3b71004 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -94,8 +94,8 @@ translate_declaration(struct gallivm_ir *prog, unsigned first, last, mask; uint interp_method; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; /* Do not touch WPOS.xy */ @@ -149,7 +149,7 @@ translate_declarationir(struct gallivm_ir *, struct tgsi_full_declaration *) { if (decl->Declaration.File == TGSI_FILE_ADDRESS) { - int idx = decl->DeclarationRange.First; + int idx = decl->Range.First; storage->addAddress(idx); } } @@ -234,26 +234,26 @@ translate_instruction(llvm::Module *module, inputs[3] = 0; for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *src = &inst->Src[i]; llvm::Value *val = 0; llvm::Value *indIdx = 0; - if (src->SrcRegister.Indirect) { - indIdx = storage->addrElement(src->SrcRegisterInd.Index); + if (src->Register.Indirect) { + indIdx = storage->addrElement(src->Indirect.Index); indIdx = storage->extractIndex(indIdx); } - if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { - val = storage->constElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { - val = storage->inputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { - val = storage->tempElement(src->SrcRegister.Index); - } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { - val = storage->outputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { - val = storage->immediateElement(src->SrcRegister.Index); + if (src->Register.File == TGSI_FILE_CONSTANT) { + val = storage->constElement(src->Register.Index, indIdx); + } else if (src->Register.File == TGSI_FILE_INPUT) { + val = storage->inputElement(src->Register.Index, indIdx); + } else if (src->Register.File == TGSI_FILE_TEMPORARY) { + val = storage->tempElement(src->Register.Index); + } else if (src->Register.File == TGSI_FILE_OUTPUT) { + val = storage->outputElement(src->Register.Index, indIdx); + } else if (src->Register.File == TGSI_FILE_IMMEDIATE) { + val = storage->immediateElement(src->Register.Index); } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->Register.File); return; } @@ -552,7 +552,7 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_SHL: break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: break; case TGSI_OPCODE_AND: break; @@ -656,14 +656,14 @@ translate_instruction(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { - storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { - storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + + if (dst->Register.File == TGSI_FILE_OUTPUT) { + storage->setOutputElement(dst->Register.Index, out, dst->Register.WriteMask); + } else if (dst->Register.File == TGSI_FILE_TEMPORARY) { + storage->setTempElement(dst->Register.Index, out, dst->Register.WriteMask); + } else if (dst->Register.File == TGSI_FILE_ADDRESS) { + storage->setAddrElement(dst->Register.Index, out, dst->Register.WriteMask); } else { fprintf(stderr, "ERROR: unsupported LLVM destination!"); assert(!"wrong destination"); @@ -683,16 +683,16 @@ translate_instructionir(llvm::Module *module, std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs); for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *src = &inst->Src[i]; std::vector<llvm::Value*> val; llvm::Value *indIdx = 0; int swizzle = swizzleInt(src); - if (src->SrcRegister.Indirect) { - indIdx = storage->addrElement(src->SrcRegisterInd.Index); + if (src->Register.Indirect) { + indIdx = storage->addrElement(src->Indirect.Index); } - val = storage->load((enum tgsi_file_type)src->SrcRegister.File, - src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx); + val = storage->load((enum tgsi_file_type)src->Register.File, + src->Register.Index, swizzle, instr->getIRBuilder(), indIdx); inputs[i] = val; } @@ -919,7 +919,7 @@ translate_instructionir(llvm::Module *module, break; case TGSI_OPCODE_SHL: break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: break; case TGSI_OPCODE_AND: break; @@ -993,9 +993,9 @@ translate_instructionir(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - storage->store((enum tgsi_file_type)dst->DstRegister.File, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask, + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + storage->store((enum tgsi_file_type)dst->Register.File, + dst->Register.Index, out, dst->Register.WriteMask, instr->getIRBuilder() ); } } diff --git a/src/gallium/auxiliary/indices/Makefile b/src/gallium/auxiliary/indices/Makefile deleted file mode 100644 index f2ebc3f410a..00000000000 --- a/src/gallium/auxiliary/indices/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = indices - -C_SOURCES = \ - u_indices_gen.c \ - u_unfilled_gen.c - -include ../../Makefile.template - -u_indices_gen.c: u_indices_gen.py - python $< > $@ - -u_unfilled_gen.c: u_unfilled_gen.py - python $< > $@ diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript deleted file mode 100644 index 712e215534f..00000000000 --- a/src/gallium/auxiliary/indices/SConscript +++ /dev/null @@ -1,28 +0,0 @@ -Import('*') - -from sys import executable as python_cmd - -env.CodeGenerate( - target = 'u_indices_gen.c', - script = 'u_indices_gen.py', - source = [], - command = python_cmd + ' $SCRIPT > $TARGET' -) - -env.CodeGenerate( - target = 'u_unfilled_gen.c', - script = 'u_unfilled_gen.py', - source = [], - command = python_cmd + ' $SCRIPT > $TARGET' -) - -indices = env.ConvenienceLibrary( - target = 'indices', - source = [ -# 'u_indices.c', -# 'u_unfilled_indices.c', - 'u_indices_gen.c', - 'u_unfilled_gen.c', - ]) - -auxiliaries.insert(0, indices) diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile deleted file mode 100644 index 1c00ba8d986..00000000000 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = pipebuffer - -C_SOURCES = \ - pb_buffer_fenced.c \ - pb_buffer_malloc.c \ - pb_bufmgr_alt.c \ - pb_bufmgr_cache.c \ - pb_bufmgr_debug.c \ - pb_bufmgr_fenced.c \ - pb_bufmgr_mm.c \ - pb_bufmgr_ondemand.c \ - pb_bufmgr_pool.c \ - pb_bufmgr_slab.c \ - pb_validate.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript deleted file mode 100644 index 8e9f06abe45..00000000000 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ /dev/null @@ -1,19 +0,0 @@ -Import('*') - -pipebuffer = env.ConvenienceLibrary( - target = 'pipebuffer', - source = [ - 'pb_buffer_fenced.c', - 'pb_buffer_malloc.c', - 'pb_bufmgr_alt.c', - 'pb_bufmgr_cache.c', - 'pb_bufmgr_debug.c', - 'pb_bufmgr_fenced.c', - 'pb_bufmgr_mm.c', - 'pb_bufmgr_ondemand.c', - 'pb_bufmgr_pool.c', - 'pb_bufmgr_slab.c', - 'pb_validate.c', - ]) - -auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 4ef372233f0..eb7e84be848 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -237,8 +237,9 @@ pb_reference(struct pb_buffer **dst, { struct pb_buffer *old = *dst; - if (pipe_reference((struct pipe_reference**)dst, &src->base.reference)) + if (pipe_reference(&(*dst)->base.reference, &src->base.reference)) pb_destroy( old ); + *dst = src; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 1ee2bf9b7c5..ba6f7b15f9e 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -246,6 +246,7 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; + struct pb_buffer *pb_buf; struct pipe_fence_handle *prev_fence = NULL; curr = fenced_list->delayed.next; @@ -274,7 +275,8 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, fenced_buffer_remove_locked(fenced_list, fenced_buf); pipe_mutex_unlock(fenced_buf->mutex); - pb_reference((struct pb_buffer **)&fenced_buf, NULL); + pb_buf = &fenced_buf->base; + pb_reference(&pb_buf, NULL); curr = next; next = curr->next; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 57d1ede45a4..7b34c8e3578 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -294,7 +294,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, LIST_DEL(&buf->head); pipe_mutex_unlock(mgr->mutex); /* Increase refcount */ - pb_reference((struct pb_buffer**)&buf, &buf->base); + pipe_reference(NULL, &buf->base.base.reference); return &buf->base; } diff --git a/src/gallium/auxiliary/rbug/Makefile b/src/gallium/auxiliary/rbug/Makefile deleted file mode 100644 index cd12e8468fc..00000000000 --- a/src/gallium/auxiliary/rbug/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = rbug - -C_SOURCES = \ - rbug_connection.c \ - rbug_core.c \ - rbug_texture.c \ - rbug_context.c \ - rbug_shader.c \ - rbug_demarshal.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/rbug/SConscript b/src/gallium/auxiliary/rbug/SConscript deleted file mode 100644 index 4a9afb45d3c..00000000000 --- a/src/gallium/auxiliary/rbug/SConscript +++ /dev/null @@ -1,14 +0,0 @@ -Import('*') - -rbug = env.ConvenienceLibrary( - target = 'rbug', - source = [ - 'rbug_core.c', - 'rbug_shader.c', - 'rbug_context.c', - 'rbug_texture.c', - 'rbug_demarshal.c', - 'rbug_connection.c', - ]) - -auxiliaries.insert(0, rbug) diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile deleted file mode 100644 index ab8ea464c6e..00000000000 --- a/src/gallium/auxiliary/rtasm/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = rtasm - -C_SOURCES = \ - rtasm_cpu.c \ - rtasm_execmem.c \ - rtasm_x86sse.c \ - rtasm_ppc.c \ - rtasm_ppc_spe.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript deleted file mode 100644 index eb48368accb..00000000000 --- a/src/gallium/auxiliary/rtasm/SConscript +++ /dev/null @@ -1,13 +0,0 @@ -Import('*') - -rtasm = env.ConvenienceLibrary( - target = 'rtasm', - source = [ - 'rtasm_cpu.c', - 'rtasm_execmem.c', - 'rtasm_x86sse.c', - 'rtasm_ppc.c', - 'rtasm_ppc_spe.c', - ]) - -auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile deleted file mode 100644 index a7d111b6891..00000000000 --- a/src/gallium/auxiliary/sct/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = sct - -C_SOURCES = \ - sct.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/sct/SConscript b/src/gallium/auxiliary/sct/SConscript deleted file mode 100644 index 76927d973f8..00000000000 --- a/src/gallium/auxiliary/sct/SConscript +++ /dev/null @@ -1,9 +0,0 @@ -Import('*') - -sct = env.ConvenienceLibrary( - target = 'sct', - source = [ - 'sct.c' - ]) - -auxiliaries.insert(0, sct) diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c deleted file mode 100644 index 722d2b7e66e..00000000000 --- a/src/gallium/auxiliary/sct/sct.c +++ /dev/null @@ -1,453 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "pipe/p_state.h" -#include "sct.h" - - -struct texture_list -{ - struct pipe_texture *texture; - struct texture_list *next; -}; - - - -#define MAX_SURFACES ((PIPE_MAX_COLOR_BUFS) + 1) - -struct sct_context -{ - const struct pipe_context *context; - - /** surfaces the context is drawing into */ - struct pipe_surface *surfaces[MAX_SURFACES]; - - /** currently bound textures */ - struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; - - /** previously bound textures, used but not flushed */ - struct texture_list *textures_used; - - boolean needs_flush; - - struct sct_context *next; -}; - - - -struct sct_surface -{ - const struct pipe_surface *surface; - - /** list of contexts drawing to this surface */ - struct sct_context_list *contexts; - - struct sct_surface *next; -}; - - - -/** - * Find the surface_info for the given pipe_surface - */ -static struct sct_surface * -find_surface_info(struct surface_context_tracker *sct, - const struct pipe_surface *surface) -{ - struct sct_surface *si; - for (si = sct->surfaces; si; si = si->next) - if (si->surface == surface) - return si; - return NULL; -} - - -/** - * As above, but create new surface_info if surface is new. - */ -static struct sct_surface * -find_create_surface_info(struct surface_context_tracker *sct, - const struct pipe_surface *surface) -{ - struct sct_surface *si = find_surface_info(sct, surface); - if (si) - return si; - - /* alloc new */ - si = CALLOC_STRUCT(sct_surface); - if (si) { - si->surface = surface; - - /* insert at head */ - si->next = sct->surfaces; - sct->surfaces = si; - } - - return si; -} - - -/** - * Find a context_info for the given context. - */ -static struct sct_context * -find_context_info(struct surface_context_tracker *sct, - const struct pipe_context *context) -{ - struct sct_context *ci; - for (ci = sct->contexts; ci; ci = ci->next) - if (ci->context == context) - return ci; - return NULL; -} - - -/** - * As above, but create new context_info if context is new. - */ -static struct sct_context * -find_create_context_info(struct surface_context_tracker *sct, - const struct pipe_context *context) -{ - struct sct_context *ci = find_context_info(sct, context); - if (ci) - return ci; - - /* alloc new */ - ci = CALLOC_STRUCT(sct_context); - if (ci) { - ci->context = context; - - /* insert at head */ - ci->next = sct->contexts; - sct->contexts = ci; - } - - return ci; -} - - -/** - * Is the context already bound to the surface? - */ -static boolean -find_surface_context(const struct sct_surface *si, - const struct pipe_context *context) -{ - const struct sct_context_list *cl; - for (cl = si->contexts; cl; cl = cl->next) { - if (cl->context == context) { - return TRUE; - } - } - return FALSE; -} - - -/** - * Add a context to the list of contexts associated with a surface. - */ -static void -add_context_to_surface(struct sct_surface *si, - const struct pipe_context *context) -{ - struct sct_context_list *cl = CALLOC_STRUCT(sct_context_list); - if (cl) { - cl->context = context; - /* insert at head of list of contexts */ - cl->next = si->contexts; - si->contexts = cl; - } -} - - -/** - * Remove a context from the list of contexts associated with a surface. - */ -static void -remove_context_from_surface(struct sct_surface *si, - const struct pipe_context *context) -{ - struct sct_context_list *prev = NULL, *curr, *next; - - for (curr = si->contexts; curr; curr = next) { - if (curr->context == context) { - /* remove */ - if (prev) - prev->next = curr->next; - else - si->contexts = curr->next; - next = curr->next; - FREE(curr); - } - else { - prev = curr; - next = curr->next; - } - } -} - - -/** - * Unbind context from surface. - */ -static void -unbind_context_surface(struct surface_context_tracker *sct, - struct pipe_context *context, - struct pipe_surface *surface) -{ - struct sct_surface *si = find_surface_info(sct, surface); - if (si) { - remove_context_from_surface(si, context); - } -} - - -/** - * Bind context to a set of surfaces (color + Z). - * Like MakeCurrent(). - */ -void -sct_bind_surfaces(struct surface_context_tracker *sct, - struct pipe_context *context, - uint num_surf, - struct pipe_surface **surfaces) -{ - struct sct_context *ci = find_create_context_info(sct, context); - uint i; - - if (!ci) { - return; /* out of memory */ - } - - /* unbind currently bound surfaces */ - for (i = 0; i < MAX_SURFACES; i++) { - if (ci->surfaces[i]) { - unbind_context_surface(sct, context, ci->surfaces[i]); - } - } - - /* bind new surfaces */ - for (i = 0; i < num_surf; i++) { - struct sct_surface *si = find_create_surface_info(sct, surfaces[i]); - if (!find_surface_context(si, context)) { - add_context_to_surface(si, context); - } - } -} - - -/** - * Return list of contexts bound to a surface. - */ -const struct sct_context_list * -sct_get_surface_contexts(struct surface_context_tracker *sct, - const struct pipe_surface *surface) -{ - const struct sct_surface *si = find_surface_info(sct, surface); - return si->contexts; -} - - - -static boolean -find_texture(const struct sct_context *ci, - const struct pipe_texture *texture) -{ - const struct texture_list *tl; - - for (tl = ci->textures_used; tl; tl = tl->next) { - if (tl->texture == texture) { - return TRUE; - } - } - return FALSE; -} - - -/** - * Add the given texture to the context's list of used textures. - */ -static void -add_texture_used(struct sct_context *ci, - struct pipe_texture *texture) -{ - if (!find_texture(ci, texture)) { - /* add to list */ - struct texture_list *tl = CALLOC_STRUCT(texture_list); - if (tl) { - pipe_texture_reference(&tl->texture, texture); - /* insert at head */ - tl->next = ci->textures_used; - ci->textures_used = tl; - } - } -} - - -/** - * Bind a texture to a rendering context. - */ -void -sct_bind_texture(struct surface_context_tracker *sct, - struct pipe_context *context, - uint unit, - struct pipe_texture *tex) -{ - struct sct_context *ci = find_context_info(sct, context); - - if (ci->textures[unit] != tex) { - /* put texture on the 'used' list */ - add_texture_used(ci, tex); - /* bind new */ - pipe_texture_reference(&ci->textures[unit], tex); - } -} - - -/** - * Check if the given texture has been used by the rendering context - * since the last call to sct_flush_textures(). - */ -boolean -sct_is_texture_used(struct surface_context_tracker *sct, - const struct pipe_context *context, - const struct pipe_texture *texture) -{ - const struct sct_context *ci = find_context_info(sct, context); - return find_texture(ci, texture); -} - - -/** - * To be called when the image contents of a texture are changed, such - * as for gl[Copy]TexSubImage(). - * XXX this may not be needed - */ -void -sct_update_texture(struct pipe_texture *tex) -{ - -} - - -/** - * When a scene is flushed/rendered we can release the list of - * used textures. - */ -void -sct_flush_textures(struct surface_context_tracker *sct, - struct pipe_context *context) -{ - struct sct_context *ci = find_context_info(sct, context); - struct texture_list *tl, *next; - uint i; - - for (tl = ci->textures_used; tl; tl = next) { - next = tl->next; - pipe_texture_reference(&tl->texture, NULL); - FREE(tl); - } - ci->textures_used = NULL; - - /* put the currently bound textures on the 'used' list */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - add_texture_used(ci, ci->textures[i]); - } -} - - - -void -sct_destroy_context(struct surface_context_tracker *sct, - struct pipe_context *context) -{ - /* XXX should we require an unbinding first? */ - { - struct sct_surface *si; - for (si = sct->surfaces; si; si = si->next) { - remove_context_from_surface(si, context); - } - } - - /* remove context from context_info list */ - { - struct sct_context *ci, *next, *prev = NULL; - for (ci = sct->contexts; ci; ci = next) { - next = ci->next; - if (ci->context == context) { - if (prev) - prev->next = ci->next; - else - sct->contexts = ci->next; - FREE(ci); - } - else { - prev = ci; - } - } - } - -} - - -void -sct_destroy_surface(struct surface_context_tracker *sct, - struct pipe_surface *surface) -{ - if (1) { - /* debug/sanity: no context should be bound to surface */ - struct sct_context *ci; - uint i; - for (ci = sct->contexts; ci; ci = ci->next) { - for (i = 0; i < MAX_SURFACES; i++) { - assert(ci->surfaces[i] != surface); - } - } - } - - /* remove surface from sct_surface list */ - { - struct sct_surface *si, *next, *prev = NULL; - for (si = sct->surfaces; si; si = next) { - next = si->next; - if (si->surface == surface) { - /* unlink */ - if (prev) - prev->next = si->next; - else - sct->surfaces = si->next; - FREE(si); - } - else { - prev = si; - } - } - } -} diff --git a/src/gallium/auxiliary/sct/sct.h b/src/gallium/auxiliary/sct/sct.h deleted file mode 100644 index cf7c4d3bdfd..00000000000 --- a/src/gallium/auxiliary/sct/sct.h +++ /dev/null @@ -1,123 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Surface/Context Tracking - * - * For some drivers, we need to monitor the binding between contexts and - * surfaces/textures. - * This code may evolve quite a bit... - */ - - -#ifndef SCT_H -#define SCT_H - - -#ifdef __cplusplus -extern "C" { -#endif - - -struct pipe_context; -struct pipe_surface; - -struct sct_context; -struct sct_surface; - - -/** - * Per-device info, basically - */ -struct surface_context_tracker -{ - struct sct_context *contexts; - struct sct_surface *surfaces; -}; - - - -/** - * Simple linked list of contexts - */ -struct sct_context_list -{ - const struct pipe_context *context; - struct sct_context_list *next; -}; - - - -extern void -sct_bind_surfaces(struct surface_context_tracker *sct, - struct pipe_context *context, - uint num_surf, - struct pipe_surface **surfaces); - - -extern void -sct_bind_texture(struct surface_context_tracker *sct, - struct pipe_context *context, - uint unit, - struct pipe_texture *texture); - - -extern void -sct_update_texture(struct pipe_texture *tex); - - -extern boolean -sct_is_texture_used(struct surface_context_tracker *sct, - const struct pipe_context *context, - const struct pipe_texture *texture); - -extern void -sct_flush_textures(struct surface_context_tracker *sct, - struct pipe_context *context); - - -extern const struct sct_context_list * -sct_get_surface_contexts(struct surface_context_tracker *sct, - const struct pipe_surface *surf); - - -extern void -sct_destroy_context(struct surface_context_tracker *sct, - struct pipe_context *context); - - -extern void -sct_destroy_surface(struct surface_context_tracker *sct, - struct pipe_surface *surface); - - - -#ifdef __cplusplus -} -#endif - -#endif /* SCT_H */ diff --git a/src/gallium/auxiliary/sct/usage.c b/src/gallium/auxiliary/sct/usage.c deleted file mode 100644 index 6227f199628..00000000000 --- a/src/gallium/auxiliary/sct/usage.c +++ /dev/null @@ -1,61 +0,0 @@ -/* surface / context tracking */ - - -/* - -context A: - render to texture T - -context B: - texture from T - ------------------------ - -flush surface: - which contexts are bound to the surface? - ------------------------ - -glTexSubImage(): - which contexts need to be flushed? - - */ - - -/* - -in MakeCurrent(): - - call sct_bind_surfaces(context, list of surfaces) to update the - dependencies between context and surfaces - - -in SurfaceFlush(), or whatever it is in D3D: - - call sct_get_surface_contexts(surface) to get a list of contexts - which are currently bound to the surface. - - - -in BindTexture(): - - call sct_bind_texture(context, texture) to indicate that the texture - is used in the scene. - - -in glTexSubImage() or RenderToTexture(): - - call sct_is_texture_used(context, texture) to determine if the texture - has been used in the scene, but the scene's not flushed. If TRUE is - returned it means the scene has to be rendered/flushed before the contents - of the texture can be changed. - - -in psb_scene_flush/terminate(): - - call sct_flush_textures(context) to tell the SCT that the textures which - were used in the scene can be released. - - - -*/ diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile deleted file mode 100644 index 5f0a580b096..00000000000 --- a/src/gallium/auxiliary/tgsi/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = tgsi - -C_SOURCES = \ - tgsi_sanity.c \ - tgsi_build.c \ - tgsi_dump.c \ - tgsi_exec.c \ - tgsi_info.c \ - tgsi_iterate.c \ - tgsi_parse.c \ - tgsi_ppc.c \ - tgsi_scan.c \ - tgsi_sse2.c \ - tgsi_text.c \ - tgsi_transform.c \ - tgsi_ureg.c \ - tgsi_util.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript deleted file mode 100644 index b6bc2924f06..00000000000 --- a/src/gallium/auxiliary/tgsi/SConscript +++ /dev/null @@ -1,23 +0,0 @@ -Import('*') - -tgsi = env.ConvenienceLibrary( - target = 'tgsi', - source = [ - 'tgsi_build.c', - 'tgsi_dump.c', - 'tgsi_dump_c.c', - 'tgsi_exec.c', - 'tgsi_info.c', - 'tgsi_iterate.c', - 'tgsi_parse.c', - 'tgsi_sanity.c', - 'tgsi_scan.c', - 'tgsi_ppc.c', - 'tgsi_sse2.c', - 'tgsi_text.c', - 'tgsi_transform.c', - 'tgsi_ureg.c', - 'tgsi_util.c', - ]) - -auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index eb492076b7d..080fd4c7310 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -1129,3 +1129,35 @@ TGSI Instruction Specification target Label of target instruction. + +3 Other tokens +=============== + + +3.1 Declaration Semantic +------------------------- + + + Follows Declaration token if Semantic bit is set. + + Since its purpose is to link a shader with other stages of the pipeline, + it is valid to follow only those Declaration tokens that declare a register + either in INPUT or OUTPUT file. + + SemanticName field contains the semantic name of the register being declared. + There is no default value. + + SemanticIndex is an optional subscript that can be used to distinguish + different register declarations with the same semantic name. The default value + is 0. + + The meanings of the individual semantic names are explained in the following + sections. + + +3.1.1 FACE + + Valid only in a fragment shader INPUT declaration. + + FACE.x is negative when the primitive is back facing. FACE.x is positive + when the primitive is front facing. diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 4fa10e2f7e3..de9cbc86305 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -30,21 +30,6 @@ #include "tgsi_build.h" #include "tgsi_parse.h" -/* - * version - */ - -struct tgsi_version -tgsi_build_version( void ) -{ - struct tgsi_version version; - - version.MajorVersion = 1; - version.MinorVersion = 1; - version.Padding = 0; - - return version; -} /* * header @@ -122,7 +107,6 @@ tgsi_default_declaration( void ) declaration.Centroid = 0; declaration.Invariant = 0; declaration.Padding = 0; - declaration.Extended = 0; return declaration; } @@ -173,7 +157,7 @@ tgsi_default_full_declaration( void ) struct tgsi_full_declaration full_declaration; full_declaration.Declaration = tgsi_default_declaration(); - full_declaration.DeclarationRange = tgsi_default_declaration_range(); + full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); return full_declaration; @@ -210,8 +194,8 @@ tgsi_build_full_declaration( size++; *dr = tgsi_build_declaration_range( - full_decl->DeclarationRange.First, - full_decl->DeclarationRange.Last, + full_decl->Range.First, + full_decl->Range.Last, declaration, header ); @@ -224,8 +208,8 @@ tgsi_build_full_declaration( size++; *ds = tgsi_build_declaration_semantic( - full_decl->Semantic.SemanticName, - full_decl->Semantic.SemanticIndex, + full_decl->Semantic.Name, + full_decl->Semantic.Index, declaration, header ); } @@ -270,8 +254,8 @@ tgsi_default_declaration_semantic( void ) { struct tgsi_declaration_semantic ds; - ds.SemanticName = TGSI_SEMANTIC_POSITION; - ds.SemanticIndex = 0; + ds.Name = TGSI_SEMANTIC_POSITION; + ds.Index = 0; ds.Padding = 0; return ds; @@ -290,8 +274,8 @@ tgsi_build_declaration_semantic( assert( semantic_index <= 0xFFFF ); ds = tgsi_default_declaration_semantic(); - ds.SemanticName = semantic_name; - ds.SemanticIndex = semantic_index; + ds.Name = semantic_name; + ds.Index = semantic_index; declaration_grow( declaration, header ); @@ -311,7 +295,6 @@ tgsi_default_immediate( void ) immediate.NrTokens = 1; immediate.DataType = TGSI_IMM_FLOAT32; immediate.Padding = 0; - immediate.Extended = 0; return immediate; } @@ -416,24 +399,26 @@ tgsi_default_instruction( void ) struct tgsi_instruction instruction; instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - instruction.NrTokens = 1; + instruction.NrTokens = 0; instruction.Opcode = TGSI_OPCODE_MOV; instruction.Saturate = TGSI_SAT_NONE; + instruction.Predicate = 0; instruction.NumDstRegs = 1; instruction.NumSrcRegs = 1; + instruction.Label = 0; + instruction.Texture = 0; instruction.Padding = 0; - instruction.Extended = 0; return instruction; } struct tgsi_instruction -tgsi_build_instruction( - unsigned opcode, - unsigned saturate, - unsigned num_dst_regs, - unsigned num_src_regs, - struct tgsi_header *header ) +tgsi_build_instruction(unsigned opcode, + unsigned saturate, + unsigned predicate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header) { struct tgsi_instruction instruction; @@ -445,6 +430,7 @@ tgsi_build_instruction( instruction = tgsi_default_instruction(); instruction.Opcode = opcode; instruction.Saturate = saturate; + instruction.Predicate = predicate; instruction.NumDstRegs = num_dst_regs; instruction.NumSrcRegs = num_src_regs; @@ -472,18 +458,16 @@ tgsi_default_full_instruction( void ) unsigned i; full_instruction.Instruction = tgsi_default_instruction(); - full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); - full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); - full_instruction.InstructionExtPredicate = tgsi_default_instruction_ext_predicate(); + full_instruction.Predicate = tgsi_default_instruction_predicate(); + full_instruction.Label = tgsi_default_instruction_label(); + full_instruction.Texture = tgsi_default_instruction_texture(); for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { - full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); + full_instruction.Dst[i] = tgsi_default_full_dst_register(); } for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) { - full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register(); + full_instruction.Src[i] = tgsi_default_full_src_register(); } - full_instruction.Flags = 0x0; - return full_instruction; } @@ -504,77 +488,70 @@ tgsi_build_full_instruction( instruction = (struct tgsi_instruction *) &tokens[size]; size++; - *instruction = tgsi_build_instruction( - full_inst->Instruction.Opcode, - full_inst->Instruction.Saturate, - full_inst->Instruction.NumDstRegs, - full_inst->Instruction.NumSrcRegs, - header ); + *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode, + full_inst->Instruction.Saturate, + full_inst->Instruction.Predicate, + full_inst->Instruction.NumDstRegs, + full_inst->Instruction.NumSrcRegs, + header); prev_token = (struct tgsi_token *) instruction; - if( tgsi_compare_instruction_ext_label( - full_inst->InstructionExtLabel, - tgsi_default_instruction_ext_label() ) ) { - struct tgsi_instruction_ext_label *instruction_ext_label; + if (full_inst->Instruction.Predicate) { + struct tgsi_instruction_predicate *instruction_predicate; - if( maxsize <= size ) + if (maxsize <= size) { return 0; - instruction_ext_label = - (struct tgsi_instruction_ext_label *) &tokens[size]; + } + instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size]; size++; - *instruction_ext_label = tgsi_build_instruction_ext_label( - full_inst->InstructionExtLabel.Label, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) instruction_ext_label; + *instruction_predicate = + tgsi_build_instruction_predicate(full_inst->Predicate.Index, + full_inst->Predicate.Negate, + full_inst->Predicate.SwizzleX, + full_inst->Predicate.SwizzleY, + full_inst->Predicate.SwizzleZ, + full_inst->Predicate.SwizzleW, + instruction, + header); } - if( tgsi_compare_instruction_ext_texture( - full_inst->InstructionExtTexture, - tgsi_default_instruction_ext_texture() ) ) { - struct tgsi_instruction_ext_texture *instruction_ext_texture; + if (full_inst->Instruction.Label) { + struct tgsi_instruction_label *instruction_label; if( maxsize <= size ) return 0; - instruction_ext_texture = - (struct tgsi_instruction_ext_texture *) &tokens[size]; + instruction_label = + (struct tgsi_instruction_label *) &tokens[size]; size++; - *instruction_ext_texture = tgsi_build_instruction_ext_texture( - full_inst->InstructionExtTexture.Texture, + *instruction_label = tgsi_build_instruction_label( + full_inst->Label.Label, prev_token, instruction, - header ); - prev_token = (struct tgsi_token *) instruction_ext_texture; + header ); + prev_token = (struct tgsi_token *) instruction_label; } - if (tgsi_compare_instruction_ext_predicate(full_inst->InstructionExtPredicate, - tgsi_default_instruction_ext_predicate())) { - struct tgsi_instruction_ext_predicate *instruction_ext_predicate; + if (full_inst->Instruction.Texture) { + struct tgsi_instruction_texture *instruction_texture; - if (maxsize <= size) { + if( maxsize <= size ) return 0; - } - instruction_ext_predicate = (struct tgsi_instruction_ext_predicate *)&tokens[size]; + instruction_texture = + (struct tgsi_instruction_texture *) &tokens[size]; size++; - *instruction_ext_predicate = - tgsi_build_instruction_ext_predicate(full_inst->InstructionExtPredicate.SrcIndex, - full_inst->InstructionExtPredicate.Negate, - full_inst->InstructionExtPredicate.SwizzleX, - full_inst->InstructionExtPredicate.SwizzleY, - full_inst->InstructionExtPredicate.SwizzleZ, - full_inst->InstructionExtPredicate.SwizzleW, - prev_token, - instruction, - header); - prev_token = (struct tgsi_token *)instruction_ext_predicate; + *instruction_texture = tgsi_build_instruction_texture( + full_inst->Texture.Texture, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_texture; } for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { - const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; + const struct tgsi_full_dst_register *reg = &full_inst->Dst[i]; struct tgsi_dst_register *dst_register; struct tgsi_token *prev_token; @@ -584,34 +561,15 @@ tgsi_build_full_instruction( size++; *dst_register = tgsi_build_dst_register( - reg->DstRegister.File, - reg->DstRegister.WriteMask, - reg->DstRegister.Indirect, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.WriteMask, + reg->Register.Indirect, + reg->Register.Index, instruction, header ); prev_token = (struct tgsi_token *) dst_register; - if( tgsi_compare_dst_register_ext_modulate( - reg->DstRegisterExtModulate, - tgsi_default_dst_register_ext_modulate() ) ) { - struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate; - - if( maxsize <= size ) - return 0; - dst_register_ext_modulate = - (struct tgsi_dst_register_ext_modulate *) &tokens[size]; - size++; - - *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate( - reg->DstRegisterExtModulate.Modulate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register_ext_modulate; - } - - if( reg->DstRegister.Indirect ) { + if( reg->Register.Indirect ) { struct tgsi_src_register *ind; if( maxsize <= size ) @@ -620,22 +578,23 @@ tgsi_build_full_instruction( size++; *ind = tgsi_build_src_register( - reg->DstRegisterInd.File, - reg->DstRegisterInd.SwizzleX, - reg->DstRegisterInd.SwizzleY, - reg->DstRegisterInd.SwizzleZ, - reg->DstRegisterInd.SwizzleW, - reg->DstRegisterInd.Negate, - reg->DstRegisterInd.Indirect, - reg->DstRegisterInd.Dimension, - reg->DstRegisterInd.Index, + reg->Indirect.File, + reg->Indirect.SwizzleX, + reg->Indirect.SwizzleY, + reg->Indirect.SwizzleZ, + reg->Indirect.SwizzleW, + reg->Indirect.Negate, + reg->Indirect.Absolute, + reg->Indirect.Indirect, + reg->Indirect.Dimension, + reg->Indirect.Index, instruction, header ); } } for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { - const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *reg = &full_inst->Src[i]; struct tgsi_src_register *src_register; struct tgsi_token *prev_token; @@ -645,43 +604,21 @@ tgsi_build_full_instruction( size++; *src_register = tgsi_build_src_register( - reg->SrcRegister.File, - reg->SrcRegister.SwizzleX, - reg->SrcRegister.SwizzleY, - reg->SrcRegister.SwizzleZ, - reg->SrcRegister.SwizzleW, - reg->SrcRegister.Negate, - reg->SrcRegister.Indirect, - reg->SrcRegister.Dimension, - reg->SrcRegister.Index, + reg->Register.File, + reg->Register.SwizzleX, + reg->Register.SwizzleY, + reg->Register.SwizzleZ, + reg->Register.SwizzleW, + reg->Register.Negate, + reg->Register.Absolute, + reg->Register.Indirect, + reg->Register.Dimension, + reg->Register.Index, instruction, header ); prev_token = (struct tgsi_token *) src_register; - if( tgsi_compare_src_register_ext_mod( - reg->SrcRegisterExtMod, - tgsi_default_src_register_ext_mod() ) ) { - struct tgsi_src_register_ext_mod *src_register_ext_mod; - - if( maxsize <= size ) - return 0; - src_register_ext_mod = - (struct tgsi_src_register_ext_mod *) &tokens[size]; - size++; - - *src_register_ext_mod = tgsi_build_src_register_ext_mod( - reg->SrcRegisterExtMod.Complement, - reg->SrcRegisterExtMod.Bias, - reg->SrcRegisterExtMod.Scale2X, - reg->SrcRegisterExtMod.Absolute, - reg->SrcRegisterExtMod.Negate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register_ext_mod; - } - - if( reg->SrcRegister.Indirect ) { + if( reg->Register.Indirect ) { struct tgsi_src_register *ind; if( maxsize <= size ) @@ -690,23 +627,24 @@ tgsi_build_full_instruction( size++; *ind = tgsi_build_src_register( - reg->SrcRegisterInd.File, - reg->SrcRegisterInd.SwizzleX, - reg->SrcRegisterInd.SwizzleY, - reg->SrcRegisterInd.SwizzleZ, - reg->SrcRegisterInd.SwizzleW, - reg->SrcRegisterInd.Negate, - reg->SrcRegisterInd.Indirect, - reg->SrcRegisterInd.Dimension, - reg->SrcRegisterInd.Index, + reg->Indirect.File, + reg->Indirect.SwizzleX, + reg->Indirect.SwizzleY, + reg->Indirect.SwizzleZ, + reg->Indirect.SwizzleW, + reg->Indirect.Negate, + reg->Indirect.Absolute, + reg->Indirect.Indirect, + reg->Indirect.Dimension, + reg->Indirect.Index, instruction, header ); } - if( reg->SrcRegister.Dimension ) { + if( reg->Register.Dimension ) { struct tgsi_dimension *dim; - assert( !reg->SrcRegisterDim.Dimension ); + assert( !reg->Dimension.Dimension ); if( maxsize <= size ) return 0; @@ -714,12 +652,12 @@ tgsi_build_full_instruction( size++; *dim = tgsi_build_dimension( - reg->SrcRegisterDim.Indirect, - reg->SrcRegisterDim.Index, + reg->Dimension.Indirect, + reg->Dimension.Index, instruction, header ); - if( reg->SrcRegisterDim.Indirect ) { + if( reg->Dimension.Indirect ) { struct tgsi_src_register *ind; if( maxsize <= size ) @@ -728,15 +666,16 @@ tgsi_build_full_instruction( size++; *ind = tgsi_build_src_register( - reg->SrcRegisterDimInd.File, - reg->SrcRegisterDimInd.SwizzleX, - reg->SrcRegisterDimInd.SwizzleY, - reg->SrcRegisterDimInd.SwizzleZ, - reg->SrcRegisterDimInd.SwizzleW, - reg->SrcRegisterDimInd.Negate, - reg->SrcRegisterDimInd.Indirect, - reg->SrcRegisterDimInd.Dimension, - reg->SrcRegisterDimInd.Index, + reg->DimIndirect.File, + reg->DimIndirect.SwizzleX, + reg->DimIndirect.SwizzleY, + reg->DimIndirect.SwizzleZ, + reg->DimIndirect.SwizzleW, + reg->DimIndirect.Negate, + reg->DimIndirect.Absolute, + reg->DimIndirect.Indirect, + reg->DimIndirect.Dimension, + reg->DimIndirect.Index, instruction, header ); } @@ -746,147 +685,103 @@ tgsi_build_full_instruction( return size; } -/** test for inequality of 32-bit values pointed to by a and b */ -static INLINE boolean -compare32(const void *a, const void *b) +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void) { - return *((uint32_t *) a) != *((uint32_t *) b); + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate.SwizzleX = TGSI_SWIZZLE_X; + instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y; + instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z; + instruction_predicate.SwizzleW = TGSI_SWIZZLE_W; + instruction_predicate.Negate = 0; + instruction_predicate.Index = 0; + instruction_predicate.Padding = 0; + + return instruction_predicate; } -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ) +struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header) { - struct tgsi_instruction_ext_label instruction_ext_label; + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate = tgsi_default_instruction_predicate(); + instruction_predicate.SwizzleX = swizzleX; + instruction_predicate.SwizzleY = swizzleY; + instruction_predicate.SwizzleZ = swizzleZ; + instruction_predicate.SwizzleW = swizzleW; + instruction_predicate.Negate = negate; + instruction_predicate.Index = index; - instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; - instruction_ext_label.Label = 0; - instruction_ext_label.Padding = 0; - instruction_ext_label.Extended = 0; + instruction_grow(instruction, header); - return instruction_ext_label; + return instruction_predicate; } -unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ) +struct tgsi_instruction_label +tgsi_default_instruction_label( void ) { - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); + struct tgsi_instruction_label instruction_label; + + instruction_label.Label = 0; + instruction_label.Padding = 0; + + return instruction_label; } -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( +struct tgsi_instruction_label +tgsi_build_instruction_label( unsigned label, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) { - struct tgsi_instruction_ext_label instruction_ext_label; + struct tgsi_instruction_label instruction_label; - instruction_ext_label = tgsi_default_instruction_ext_label(); - instruction_ext_label.Label = label; + instruction_label = tgsi_default_instruction_label(); + instruction_label.Label = label; + instruction->Label = 1; - prev_token->Extended = 1; instruction_grow( instruction, header ); - return instruction_ext_label; + return instruction_label; } -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ) +struct tgsi_instruction_texture +tgsi_default_instruction_texture( void ) { - struct tgsi_instruction_ext_texture instruction_ext_texture; + struct tgsi_instruction_texture instruction_texture; - instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; - instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN; - instruction_ext_texture.Padding = 0; - instruction_ext_texture.Extended = 0; - - return instruction_ext_texture; -} + instruction_texture.Texture = TGSI_TEXTURE_UNKNOWN; + instruction_texture.Padding = 0; -unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); + return instruction_texture; } -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( +struct tgsi_instruction_texture +tgsi_build_instruction_texture( unsigned texture, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) { - struct tgsi_instruction_ext_texture instruction_ext_texture; + struct tgsi_instruction_texture instruction_texture; - instruction_ext_texture = tgsi_default_instruction_ext_texture(); - instruction_ext_texture.Texture = texture; + instruction_texture = tgsi_default_instruction_texture(); + instruction_texture.Texture = texture; + instruction->Texture = 1; - prev_token->Extended = 1; instruction_grow( instruction, header ); - return instruction_ext_texture; -} - -struct tgsi_instruction_ext_predicate -tgsi_default_instruction_ext_predicate(void) -{ - struct tgsi_instruction_ext_predicate instruction_ext_predicate; - - instruction_ext_predicate.Type = TGSI_INSTRUCTION_EXT_TYPE_PREDICATE; - instruction_ext_predicate.SwizzleX = TGSI_SWIZZLE_X; - instruction_ext_predicate.SwizzleY = TGSI_SWIZZLE_Y; - instruction_ext_predicate.SwizzleZ = TGSI_SWIZZLE_Z; - instruction_ext_predicate.SwizzleW = TGSI_SWIZZLE_W; - instruction_ext_predicate.Negate = 0; - instruction_ext_predicate.SrcIndex = 0; - instruction_ext_predicate.Padding = 0; - instruction_ext_predicate.Extended = 0; - - return instruction_ext_predicate; -} - -unsigned -tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a, - struct tgsi_instruction_ext_predicate b) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_instruction_ext_predicate -tgsi_build_instruction_ext_predicate(unsigned index, - unsigned negate, - unsigned swizzleX, - unsigned swizzleY, - unsigned swizzleZ, - unsigned swizzleW, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header) -{ - struct tgsi_instruction_ext_predicate instruction_ext_predicate; - - instruction_ext_predicate = tgsi_default_instruction_ext_predicate(); - instruction_ext_predicate.SwizzleX = swizzleX; - instruction_ext_predicate.SwizzleY = swizzleY; - instruction_ext_predicate.SwizzleZ = swizzleZ; - instruction_ext_predicate.SwizzleW = swizzleW; - instruction_ext_predicate.Negate = negate; - instruction_ext_predicate.SrcIndex = index; - - prev_token->Extended = 1; - instruction_grow(instruction, header); - - return instruction_ext_predicate; + return instruction_texture; } struct tgsi_src_register @@ -900,10 +795,10 @@ tgsi_default_src_register( void ) src_register.SwizzleZ = TGSI_SWIZZLE_Z; src_register.SwizzleW = TGSI_SWIZZLE_W; src_register.Negate = 0; + src_register.Absolute = 0; src_register.Indirect = 0; src_register.Dimension = 0; src_register.Index = 0; - src_register.Extended = 0; return src_register; } @@ -916,6 +811,7 @@ tgsi_build_src_register( unsigned swizzle_z, unsigned swizzle_w, unsigned negate, + unsigned absolute, unsigned indirect, unsigned dimension, int index, @@ -939,6 +835,7 @@ tgsi_build_src_register( src_register.SwizzleZ = swizzle_z; src_register.SwizzleW = swizzle_w; src_register.Negate = negate; + src_register.Absolute = absolute; src_register.Indirect = indirect; src_register.Dimension = dimension; src_register.Index = index; @@ -953,75 +850,15 @@ tgsi_default_full_src_register( void ) { struct tgsi_full_src_register full_src_register; - full_src_register.SrcRegister = tgsi_default_src_register(); - full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); - full_src_register.SrcRegisterInd = tgsi_default_src_register(); - full_src_register.SrcRegisterDim = tgsi_default_dimension(); - full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); + full_src_register.Register = tgsi_default_src_register(); + full_src_register.Indirect = tgsi_default_src_register(); + full_src_register.Dimension = tgsi_default_dimension(); + full_src_register.DimIndirect = tgsi_default_src_register(); return full_src_register; } -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; - src_register_ext_mod.Complement = 0; - src_register_ext_mod.Bias = 0; - src_register_ext_mod.Scale2X = 0; - src_register_ext_mod.Absolute = 0; - src_register_ext_mod.Negate = 0; - src_register_ext_mod.Padding = 0; - src_register_ext_mod.Extended = 0; - - return src_register_ext_mod; -} - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - assert( complement <= 1 ); - assert( bias <= 1 ); - assert( scale_2x <= 1 ); - assert( absolute <= 1 ); - assert( negate <= 1 ); - - src_register_ext_mod = tgsi_default_src_register_ext_mod(); - src_register_ext_mod.Complement = complement; - src_register_ext_mod.Bias = bias; - src_register_ext_mod.Scale2X = scale_2x; - src_register_ext_mod.Absolute = absolute; - src_register_ext_mod.Negate = negate; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return src_register_ext_mod; -} - struct tgsi_dimension tgsi_default_dimension( void ) { @@ -1031,7 +868,6 @@ tgsi_default_dimension( void ) dimension.Dimension = 0; dimension.Padding = 0; dimension.Index = 0; - dimension.Extended = 0; return dimension; } @@ -1065,7 +901,6 @@ tgsi_default_dst_register( void ) dst_register.Dimension = 0; dst_register.Index = 0; dst_register.Padding = 0; - dst_register.Extended = 0; return dst_register; } @@ -1101,53 +936,113 @@ tgsi_default_full_dst_register( void ) { struct tgsi_full_dst_register full_dst_register; - full_dst_register.DstRegister = tgsi_default_dst_register(); - full_dst_register.DstRegisterInd = tgsi_default_src_register(); - full_dst_register.DstRegisterExtModulate = - tgsi_default_dst_register_ext_modulate(); + full_dst_register.Register = tgsi_default_dst_register(); + full_dst_register.Indirect = tgsi_default_src_register(); return full_dst_register; } -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ) +struct tgsi_property +tgsi_default_property( void ) { - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + struct tgsi_property property; - dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE; - dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X; - dst_register_ext_modulate.Padding = 0; - dst_register_ext_modulate.Extended = 0; + property.Type = TGSI_TOKEN_TYPE_PROPERTY; + property.NrTokens = 1; + property.PropertyName = TGSI_PROPERTY_GS_INPUT_PRIM; + property.Padding = 0; - return dst_register_ext_modulate; + return property; } -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ) +struct tgsi_property +tgsi_build_property(unsigned property_name, + struct tgsi_header *header) { - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); + struct tgsi_property property; + + property = tgsi_default_property(); + property.PropertyName = property_name; + + header_bodysize_grow( header ); + + return property; } -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, + +struct tgsi_full_property +tgsi_default_full_property( void ) +{ + struct tgsi_full_property full_property; + + full_property.Property = tgsi_default_property(); + memset(full_property.u, 0, + sizeof(struct tgsi_property_data) * 8); + + return full_property; +} + +static void +property_grow( + struct tgsi_property *property, struct tgsi_header *header ) { - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + assert( property->NrTokens < 0xFF ); - assert( modulate <= TGSI_MODULATE_EIGHTH ); + property->NrTokens++; - dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate(); - dst_register_ext_modulate.Modulate = modulate; + header_bodysize_grow( header ); +} - prev_token->Extended = 1; - instruction_grow( instruction, header ); +struct tgsi_property_data +tgsi_build_property_data( + unsigned value, + struct tgsi_property *property, + struct tgsi_header *header ) +{ + struct tgsi_property_data property_data; + + property_data.Data = value; + + property_grow( property, header ); + + return property_data; +} + +unsigned +tgsi_build_full_property( + const struct tgsi_full_property *full_prop, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0, i; + struct tgsi_property *property; - return dst_register_ext_modulate; + if( maxsize <= size ) + return 0; + property = (struct tgsi_property *) &tokens[size]; + size++; + + *property = tgsi_build_property( + full_prop->Property.PropertyName, + header ); + + assert( full_prop->Property.NrTokens <= 8 + 1 ); + + for( i = 0; i < full_prop->Property.NrTokens - 1; i++ ) { + struct tgsi_property_data *data; + + if( maxsize <= size ) + return 0; + data = (struct tgsi_property_data *) &tokens[size]; + size++; + + *data = tgsi_build_property_data( + full_prop->u[i].Data, + property, + header ); + } + + return size; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 669712eb8f9..9de2757fe40 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -36,12 +36,6 @@ struct tgsi_token; extern "C" { #endif -/* - * version - */ - -struct tgsi_version -tgsi_build_version( void ); /* * header @@ -133,6 +127,34 @@ tgsi_build_full_immediate( unsigned maxsize ); /* + * properties + */ + +struct tgsi_property +tgsi_default_property( void ); + +struct tgsi_property +tgsi_build_property( + unsigned property_name, + struct tgsi_header *header ); + +struct tgsi_full_property +tgsi_default_full_property( void ); + +struct tgsi_property_data +tgsi_build_property_data( + unsigned value, + struct tgsi_property *property, + struct tgsi_header *header ); + +unsigned +tgsi_build_full_property( + const struct tgsi_full_property *full_prop, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +/* * instruction */ @@ -143,6 +165,7 @@ struct tgsi_instruction tgsi_build_instruction( unsigned opcode, unsigned saturate, + unsigned predicate, unsigned num_dst_regs, unsigned num_src_regs, struct tgsi_header *header ); @@ -157,54 +180,39 @@ tgsi_build_full_instruction( struct tgsi_header *header, unsigned maxsize ); -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ); +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void); -unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ); +struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header); + +struct tgsi_instruction_label +tgsi_default_instruction_label( void ); -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( +struct tgsi_instruction_label +tgsi_build_instruction_label( unsigned label, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ); -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ); +struct tgsi_instruction_texture +tgsi_default_instruction_texture( void ); -unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ); - -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( +struct tgsi_instruction_texture +tgsi_build_instruction_texture( unsigned texture, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ); -struct tgsi_instruction_ext_predicate -tgsi_default_instruction_ext_predicate(void); - -unsigned -tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a, - struct tgsi_instruction_ext_predicate b); - -struct tgsi_instruction_ext_predicate -tgsi_build_instruction_ext_predicate(unsigned index, - unsigned negate, - unsigned swizzleX, - unsigned swizzleY, - unsigned swizzleZ, - unsigned swizzleW, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header); - struct tgsi_src_register tgsi_default_src_register( void ); @@ -216,6 +224,7 @@ tgsi_build_src_register( unsigned swizzle_z, unsigned swizzle_w, unsigned negate, + unsigned absolute, unsigned indirect, unsigned dimension, int index, @@ -225,24 +234,6 @@ tgsi_build_src_register( struct tgsi_full_src_register tgsi_default_full_src_register( void ); -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ); - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ); - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); struct tgsi_dimension tgsi_default_dimension( void ); @@ -269,20 +260,6 @@ tgsi_build_dst_register( struct tgsi_full_dst_register tgsi_default_full_dst_register( void ); -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ); - -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ); - -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index d16e64f9c59..e2e5394f86f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -101,7 +101,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "LOOP", - "PRED" + "PRED", + "SV" }; static const char *interpolate_names[] = @@ -120,12 +121,16 @@ static const char *semantic_names[] = "PSIZE", "GENERIC", "NORMAL", - "FACE" + "FACE", + "EDGEFLAG", + "PRIM_ID" }; static const char *immediate_type_names[] = { - "FLT32" + "FLT32", + "UINT32", + "INT32" }; static const char *swizzle_names[] = @@ -149,26 +154,42 @@ static const char *texture_names[] = "SHADOWRECT" }; +static const char *property_names[] = +{ + "GS_INPUT_PRIMITIVE", + "GS_OUTPUT_PRIMITIVE", + "GS_MAX_OUTPUT_VERTICES" +}; -static const char *modulate_names[TGSI_MODULATE_COUNT] = +static const char *primitive_names[] = { - "", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" + "POINTS", + "LINES", + "LINE_LOOP", + "LINE_STRIP", + "TRIANGLES", + "TRIANGLE_STRIP", + "TRIANGLE_FAN", + "QUADS", + "QUAD_STRIP", + "POLYGON" }; + static void -_dump_register( +_dump_register_decl( struct dump_ctx *ctx, uint file, int first, int last ) { ENM( file, file_names ); + + /* all geometry shader inputs are two dimensional */ + if (file == TGSI_FILE_INPUT && + ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) + TXT("[]"); + CHR( '[' ); SID( first ); if (first != last) { @@ -179,6 +200,52 @@ _dump_register( } static void +_dump_register_dst( + struct dump_ctx *ctx, + uint file, + int index) +{ + ENM( file, file_names ); + + CHR( '[' ); + SID( index ); + CHR( ']' ); +} + + +static void +_dump_register_src( + struct dump_ctx *ctx, + const struct tgsi_full_src_register *src ) +{ + if (src->Register.Indirect) { + ENM( src->Register.File, file_names ); + CHR( '[' ); + ENM( src->Indirect.File, file_names ); + CHR( '[' ); + SID( src->Indirect.Index ); + TXT( "]." ); + ENM( src->Indirect.SwizzleX, swizzle_names ); + if (src->Register.Index != 0) { + if (src->Register.Index > 0) + CHR( '+' ); + SID( src->Register.Index ); + } + CHR( ']' ); + } else { + ENM( src->Register.File, file_names ); + CHR( '[' ); + SID( src->Register.Index ); + CHR( ']' ); + } + if (src->Register.Dimension) { + CHR( '[' ); + SID( src->Dimension.Index ); + CHR( ']' ); + } +} + +static void _dump_register_ind( struct dump_ctx *ctx, uint file, @@ -232,22 +299,22 @@ iter_declaration( TXT( "DCL " ); - _dump_register( + _dump_register_decl( ctx, decl->Declaration.File, - decl->DeclarationRange.First, - decl->DeclarationRange.Last ); + decl->Range.First, + decl->Range.Last ); _dump_writemask( ctx, decl->Declaration.UsageMask ); if (decl->Declaration.Semantic) { TXT( ", " ); - ENM( decl->Semantic.SemanticName, semantic_names ); - if (decl->Semantic.SemanticIndex != 0 || - decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { + ENM( decl->Semantic.Name, semantic_names ); + if (decl->Semantic.Index != 0 || + decl->Semantic.Name == TGSI_SEMANTIC_GENERIC) { CHR( '[' ); - UID( decl->Semantic.SemanticIndex ); + UID( decl->Semantic.Index ); CHR( ']' ); } } @@ -284,6 +351,50 @@ tgsi_dump_declaration( } static boolean +iter_property( + struct tgsi_iterate_context *iter, + struct tgsi_full_property *prop ) +{ + int i; + struct dump_ctx *ctx = (struct dump_ctx *)iter; + + assert(Elements(property_names) == TGSI_PROPERTY_COUNT); + + TXT( "PROPERTY " ); + ENM(prop->Property.PropertyName, property_names); + + if (prop->Property.NrTokens > 1) + TXT(" "); + + for (i = 0; i < prop->Property.NrTokens - 1; ++i) { + switch (prop->Property.PropertyName) { + case TGSI_PROPERTY_GS_INPUT_PRIM: + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + ENM(prop->u[i].Data, primitive_names); + break; + default: + SID( prop->u[i].Data ); + break; + } + if (i < prop->Property.NrTokens - 2) + TXT( ", " ); + } + EOL(); + + return TRUE; +} + +void tgsi_dump_property( + const struct tgsi_full_property *prop ) +{ + struct dump_ctx ctx; + + ctx.printf = dump_ctx_printf; + + iter_property( &ctx.iter, (struct tgsi_full_property *)prop ); +} + +static boolean iter_immediate( struct tgsi_iterate_context *iter, struct tgsi_full_immediate *imm ) @@ -303,6 +414,12 @@ iter_immediate( case TGSI_IMM_FLOAT32: FLT( imm->u[i].Float ); break; + case TGSI_IMM_UINT32: + UID(imm->u[i].Uint); + break; + case TGSI_IMM_INT32: + SID(imm->u[i].Int); + break; default: assert( 0 ); } @@ -363,99 +480,66 @@ iter_instruction( } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + const struct tgsi_full_dst_register *dst = &inst->Dst[i]; if (!first_reg) CHR( ',' ); CHR( ' ' ); - if (dst->DstRegister.Indirect) { + if (dst->Register.Indirect) { _dump_register_ind( ctx, - dst->DstRegister.File, - dst->DstRegister.Index, - dst->DstRegisterInd.File, - dst->DstRegisterInd.Index, - dst->DstRegisterInd.SwizzleX ); + dst->Register.File, + dst->Register.Index, + dst->Indirect.File, + dst->Indirect.Index, + dst->Indirect.SwizzleX ); } else { - _dump_register( + _dump_register_dst( ctx, - dst->DstRegister.File, - dst->DstRegister.Index, - dst->DstRegister.Index ); + dst->Register.File, + dst->Register.Index ); } - ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); - _dump_writemask( ctx, dst->DstRegister.WriteMask ); + _dump_writemask( ctx, dst->Register.WriteMask ); first_reg = FALSE; } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *src = &inst->Src[i]; if (!first_reg) CHR( ',' ); CHR( ' ' ); - if (src->SrcRegisterExtMod.Negate) - TXT( "-(" ); - if (src->SrcRegisterExtMod.Absolute) - CHR( '|' ); - if (src->SrcRegisterExtMod.Scale2X) - TXT( "2*(" ); - if (src->SrcRegisterExtMod.Bias) - CHR( '(' ); - if (src->SrcRegisterExtMod.Complement) - TXT( "1-(" ); - if (src->SrcRegister.Negate) + if (src->Register.Negate) CHR( '-' ); + if (src->Register.Absolute) + CHR( '|' ); - if (src->SrcRegister.Indirect) { - _dump_register_ind( - ctx, - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegisterInd.File, - src->SrcRegisterInd.Index, - src->SrcRegisterInd.SwizzleX ); - } - else { - _dump_register( - ctx, - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegister.Index ); - } + _dump_register_src(ctx, src); - if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { + if (src->Register.SwizzleX != TGSI_SWIZZLE_X || + src->Register.SwizzleY != TGSI_SWIZZLE_Y || + src->Register.SwizzleZ != TGSI_SWIZZLE_Z || + src->Register.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - ENM( src->SrcRegister.SwizzleX, swizzle_names ); - ENM( src->SrcRegister.SwizzleY, swizzle_names ); - ENM( src->SrcRegister.SwizzleZ, swizzle_names ); - ENM( src->SrcRegister.SwizzleW, swizzle_names ); + ENM( src->Register.SwizzleX, swizzle_names ); + ENM( src->Register.SwizzleY, swizzle_names ); + ENM( src->Register.SwizzleZ, swizzle_names ); + ENM( src->Register.SwizzleW, swizzle_names ); } - if (src->SrcRegisterExtMod.Complement) - CHR( ')' ); - if (src->SrcRegisterExtMod.Bias) - TXT( ")-.5" ); - if (src->SrcRegisterExtMod.Scale2X) - CHR( ')' ); - if (src->SrcRegisterExtMod.Absolute) + if (src->Register.Absolute) CHR( '|' ); - if (src->SrcRegisterExtMod.Negate) - CHR( ')' ); first_reg = FALSE; } - if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { + if (inst->Instruction.Texture) { TXT( ", " ); - ENM( inst->InstructionExtTexture.Texture, texture_names ); + ENM( inst->Texture.Texture, texture_names ); } switch (inst->Instruction.Opcode) { @@ -465,7 +549,7 @@ iter_instruction( case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_CAL: TXT( " :" ); - UID( inst->InstructionExtLabel.Label ); + UID( inst->Label.Label ); break; } @@ -503,9 +587,6 @@ prolog( { struct dump_ctx *ctx = (struct dump_ctx *) iter; ENM( iter->processor.Processor, processor_type_names ); - UID( iter->version.MajorVersion ); - CHR( '.' ); - UID( iter->version.MinorVersion ); EOL(); return TRUE; } @@ -521,6 +602,7 @@ tgsi_dump( ctx.iter.iterate_instruction = iter_instruction; ctx.iter.iterate_declaration = iter_declaration; ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.iterate_property = iter_property; ctx.iter.epilog = NULL; ctx.instno = 0; @@ -575,6 +657,7 @@ tgsi_dump_str( ctx.base.iter.iterate_instruction = iter_instruction; ctx.base.iter.iterate_declaration = iter_declaration; ctx.base.iter.iterate_immediate = iter_immediate; + ctx.base.iter.iterate_property = iter_property; ctx.base.iter.epilog = NULL; ctx.base.instno = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h index ad1e647ec90..4cd27317b36 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -49,6 +49,7 @@ tgsi_dump( struct tgsi_full_immediate; struct tgsi_full_instruction; struct tgsi_full_declaration; +struct tgsi_full_property; void tgsi_dump_immediate( @@ -63,6 +64,10 @@ void tgsi_dump_declaration( const struct tgsi_full_declaration *decl ); +void +tgsi_dump_property( + const struct tgsi_full_property *prop ); + #if defined __cplusplus } #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 4648051e29e..47fd1dd590e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -113,13 +113,6 @@ static const char *TGSI_SATS[] = "SAT_MINUS_PLUS_ONE" }; -static const char *TGSI_INSTRUCTION_EXTS[] = -{ - "", - "INSTRUCTION_EXT_TYPE_LABEL", - "INSTRUCTION_EXT_TYPE_TEXTURE" -}; - static const char *TGSI_SWIZZLES[] = { "SWIZZLE_X", @@ -141,12 +134,6 @@ static const char *TGSI_TEXTURES[] = "TEXTURE_SHADOWRECT" }; -static const char *TGSI_SRC_REGISTER_EXTS[] = -{ - "", - "SRC_REGISTER_EXT_TYPE_MOD" -}; - static const char *TGSI_WRITEMASKS[] = { "0", @@ -167,23 +154,6 @@ static const char *TGSI_WRITEMASKS[] = "WRITEMASK_XYZW" }; -static const char *TGSI_DST_REGISTER_EXTS[] = -{ - "", - "DST_REGISTER_EXT_TYPE_MODULATE" -}; - -static const char *TGSI_MODULATES[] = -{ - "MODULATE_1X", - "MODULATE_2X", - "MODULATE_4X", - "MODULATE_8X", - "MODULATE_HALF", - "MODULATE_QUARTER", - "MODULATE_EIGHTH" -}; - static void dump_declaration_verbose( struct tgsi_full_declaration *decl, @@ -216,6 +186,14 @@ dump_declaration_verbose( TXT( "\nSemantic : " ); UID( decl->Declaration.Semantic ); } + if (deflt || fd->Declaration.Centroid != decl->Declaration.Centroid) { + TXT("\nCentroid : "); + UID(decl->Declaration.Centroid); + } + if (deflt || fd->Declaration.Invariant != decl->Declaration.Invariant) { + TXT("\nInvariant : "); + UID(decl->Declaration.Invariant); + } if( ignored ) { TXT( "\nPadding : " ); UIX( decl->Declaration.Padding ); @@ -223,16 +201,16 @@ dump_declaration_verbose( EOL(); TXT( "\nFirst: " ); - UID( decl->DeclarationRange.First ); + UID( decl->Range.First ); TXT( "\nLast : " ); - UID( decl->DeclarationRange.Last ); + UID( decl->Range.Last ); if( decl->Declaration.Semantic ) { EOL(); - TXT( "\nSemanticName : " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); - TXT( "\nSemanticIndex: " ); - UID( decl->Semantic.SemanticIndex ); + TXT( "\nName : " ); + ENM( decl->Semantic.Name, TGSI_SEMANTICS ); + TXT( "\nIndex: " ); + UID( decl->Semantic.Index ); if( ignored ) { TXT( "\nPadding : " ); UIX( decl->Semantic.Padding ); @@ -292,180 +270,122 @@ dump_instruction_verbose( TXT( "\nNumSrcRegs : " ); UID( inst->Instruction.NumSrcRegs ); } + if (deflt || fi->Instruction.Predicate != inst->Instruction.Predicate) { + TXT("\nPredicate : "); + UID(inst->Instruction.Predicate); + } + if (deflt || fi->Instruction.Label != inst->Instruction.Label) { + TXT("\nLabel : "); + UID(inst->Instruction.Label); + } + if (deflt || fi->Instruction.Texture != inst->Instruction.Texture) { + TXT("\nTexture : "); + UID(inst->Instruction.Texture); + } if( ignored ) { TXT( "\nPadding : " ); UIX( inst->Instruction.Padding ); } - if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { + if (deflt || inst->Instruction.Label) { EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { + if (deflt || fi->Label.Label != inst->Label.Label) { TXT( "\nLabel : " ); - UID( inst->InstructionExtLabel.Label ); + UID(inst->Label.Label); } if( ignored ) { TXT( "\nPadding : " ); - UIX( inst->InstructionExtLabel.Padding ); - if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtLabel.Extended ); - } + UIX(inst->Label.Padding); } } - if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { + if (deflt || inst->Instruction.Texture) { EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { + if (deflt || fi->Texture.Texture != inst->Texture.Texture) { TXT( "\nTexture : " ); - ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); + ENM(inst->Texture.Texture, TGSI_TEXTURES); } if( ignored ) { TXT( "\nPadding : " ); - UIX( inst->InstructionExtTexture.Padding ); - if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtTexture.Extended ); - } + UIX(inst->Texture.Padding); } } for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + struct tgsi_full_dst_register *fd = &fi->Dst[i]; EOL(); TXT( "\nFile : " ); - ENM( dst->DstRegister.File, TGSI_FILES ); - if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { + ENM( dst->Register.File, TGSI_FILES ); + if( deflt || fd->Register.WriteMask != dst->Register.WriteMask ) { TXT( "\nWriteMask: " ); - ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); + ENM( dst->Register.WriteMask, TGSI_WRITEMASKS ); } if( ignored ) { - if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { + if( deflt || fd->Register.Indirect != dst->Register.Indirect ) { TXT( "\nIndirect : " ); - UID( dst->DstRegister.Indirect ); + UID( dst->Register.Indirect ); } - if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { + if( deflt || fd->Register.Dimension != dst->Register.Dimension ) { TXT( "\nDimension: " ); - UID( dst->DstRegister.Dimension ); + UID( dst->Register.Dimension ); } } - if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { + if( deflt || fd->Register.Index != dst->Register.Index ) { TXT( "\nIndex : " ); - SID( dst->DstRegister.Index ); + SID( dst->Register.Index ); } if( ignored ) { TXT( "\nPadding : " ); - UIX( dst->DstRegister.Padding ); - if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { - TXT( "\nExtended : " ); - UID( dst->DstRegister.Extended ); - } - } - - if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { - EOL(); - TXT( "\nType : " ); - ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { - TXT( "\nModulate: " ); - ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtModulate.Padding ); - if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { - TXT( "\nExtended: " ); - UID( dst->DstRegisterExtModulate.Extended ); - } - } + UIX( dst->Register.Padding ); } } for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; + struct tgsi_full_src_register *src = &inst->Src[i]; + struct tgsi_full_src_register *fs = &fi->Src[i]; EOL(); TXT( "\nFile : "); - ENM( src->SrcRegister.File, TGSI_FILES ); - if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { + ENM( src->Register.File, TGSI_FILES ); + if( deflt || fs->Register.SwizzleX != src->Register.SwizzleX ) { TXT( "\nSwizzleX : " ); - ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleX, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { + if( deflt || fs->Register.SwizzleY != src->Register.SwizzleY ) { TXT( "\nSwizzleY : " ); - ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleY, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { + if( deflt || fs->Register.SwizzleZ != src->Register.SwizzleZ ) { TXT( "\nSwizzleZ : " ); - ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleZ, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { + if( deflt || fs->Register.SwizzleW != src->Register.SwizzleW ) { TXT( "\nSwizzleW : " ); - ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleW, TGSI_SWIZZLES ); + } + if (deflt || fs->Register.Absolute != src->Register.Absolute) { + TXT("\nAbsolute : "); + UID(src->Register.Absolute); } - if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { + if( deflt || fs->Register.Negate != src->Register.Negate ) { TXT( "\nNegate : " ); - UID( src->SrcRegister.Negate ); + UID( src->Register.Negate ); } if( ignored ) { - if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { + if( deflt || fs->Register.Indirect != src->Register.Indirect ) { TXT( "\nIndirect : " ); - UID( src->SrcRegister.Indirect ); + UID( src->Register.Indirect ); } - if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { + if( deflt || fs->Register.Dimension != src->Register.Dimension ) { TXT( "\nDimension: " ); - UID( src->SrcRegister.Dimension ); + UID( src->Register.Dimension ); } } - if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { + if( deflt || fs->Register.Index != src->Register.Index ) { TXT( "\nIndex : " ); - SID( src->SrcRegister.Index ); - } - if( ignored ) { - if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegister.Extended ); - } - } - - if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { - TXT( "\nComplement: " ); - UID( src->SrcRegisterExtMod.Complement ); - } - if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { - TXT( "\nBias : " ); - UID( src->SrcRegisterExtMod.Bias ); - } - if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { - TXT( "\nScale2X : " ); - UID( src->SrcRegisterExtMod.Scale2X ); - } - if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { - TXT( "\nAbsolute : " ); - UID( src->SrcRegisterExtMod.Absolute ); - } - if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { - TXT( "\nNegate : " ); - UID( src->SrcRegisterExtMod.Negate ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtMod.Padding ); - if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtMod.Extended ); - } - } + SID( src->Register.Index ); } } } @@ -485,12 +405,6 @@ tgsi_dump_c( TXT( "tgsi-dump begin -----------------" ); - TXT( "\nMajorVersion: " ); - UID( parse.FullVersion.Version.MajorVersion ); - TXT( "\nMinorVersion: " ); - UID( parse.FullVersion.Version.MinorVersion ); - EOL(); - TXT( "\nHeaderSize: " ); UID( parse.FullHeader.Header.HeaderSize ); TXT( "\nBodySize : " ); @@ -510,10 +424,6 @@ tgsi_dump_c( if( ignored ) { TXT( "\nSize : " ); UID( parse.FullToken.Token.NrTokens ); - if( deflt || parse.FullToken.Token.Extended ) { - TXT( "\nExtended : " ); - UID( parse.FullToken.Token.Extended ); - } } switch( parse.FullToken.Token.Type ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b7569e74d4b..f43233bdb49 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -60,21 +61,337 @@ #include "util/u_memory.h" #include "util/u_math.h" -#define FAST_MATH 1 -/** for tgsi_full_instruction::Flags */ -#define SOA_DEPENDENCY_FLAG 0x1 +#define FAST_MATH 1 #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +static void +micro_abs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = fabsf(src->f[0]); + dst->f[1] = fabsf(src->f[1]); + dst->f[2] = fabsf(src->f[2]); + dst->f[3] = fabsf(src->f[3]); +} + +static void +micro_arl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0]); + dst->i[1] = (int)floorf(src->f[1]); + dst->i[2] = (int)floorf(src->f[2]); + dst->i[3] = (int)floorf(src->f[3]); +} + +static void +micro_arr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0] + 0.5f); + dst->i[1] = (int)floorf(src->f[1] + 0.5f); + dst->i[2] = (int)floorf(src->f[2] + 0.5f); + dst->i[3] = (int)floorf(src->f[3] + 0.5f); +} + +static void +micro_ceil(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = ceilf(src->f[0]); + dst->f[1] = ceilf(src->f[1]); + dst->f[2] = ceilf(src->f[2]); + dst->f[3] = ceilf(src->f[3]); +} + +static void +micro_cos(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = cosf(src->f[0]); + dst->f[1] = cosf(src->f[1]); + dst->f[2] = cosf(src->f[2]); + dst->f[3] = cosf(src->f[3]); +} + +static void +micro_ddx(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; +} + +static void +micro_exp2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_exp2(src->f[0]); + dst->f[1] = util_fast_exp2(src->f[1]); + dst->f[2] = util_fast_exp2(src->f[2]); + dst->f[3] = util_fast_exp2(src->f[3]); +#else +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif /* DEBUG */ + + dst->f[0] = powf(2.0f, src->f[0]); + dst->f[1] = powf(2.0f, src->f[1]); + dst->f[2] = powf(2.0f, src->f[2]); + dst->f[3] = powf(2.0f, src->f[3]); +#endif /* FAST_MATH */ +} + +static void +micro_flr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0]); + dst->f[1] = floorf(src->f[1]); + dst->f[2] = floorf(src->f[2]); + dst->f[3] = floorf(src->f[3]); +} + +static void +micro_frc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] - floorf(src->f[0]); + dst->f[1] = src->f[1] - floorf(src->f[1]); + dst->f[2] = src->f[2] - floorf(src->f[2]); + dst->f[3] = src->f[3] - floorf(src->f[3]); +} + +static void +micro_iabs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; + dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; + dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; + dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; +} + +static void +micro_ineg(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_lg2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_log2(src->f[0]); + dst->f[1] = util_fast_log2(src->f[1]); + dst->f[2] = util_fast_log2(src->f[2]); + dst->f[3] = util_fast_log2(src->f[3]); +#else + dst->f[0] = logf(src->f[0]) * 1.442695f; + dst->f[1] = logf(src->f[1]) * 1.442695f; + dst->f[2] = logf(src->f[2]) * 1.442695f; + dst->f[3] = logf(src->f[3]) * 1.442695f; +#endif +} + +static void +micro_lrp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0]; + dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1]; + dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2]; + dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3]; +} + +static void +micro_mad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0]; + dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1]; + dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2]; + dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3]; +} + +static void +micro_mov(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src->u[0]; + dst->u[1] = src->u[1]; + dst->u[2] = src->u[2]; + dst->u[3] = src->u[3]; +} + +static void +micro_rcp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / src->f[0]; + dst->f[1] = 1.0f / src->f[1]; + dst->f[2] = 1.0f / src->f[2]; + dst->f[3] = 1.0f / src->f[3]; +} + +static void +micro_rnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0] + 0.5f); + dst->f[1] = floorf(src->f[1] + 0.5f); + dst->f[2] = floorf(src->f[2] + 0.5f); + dst->f[3] = floorf(src->f[3] + 0.5f); +} + +static void +micro_rsq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); + dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); + dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); + dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); +} + +static void +micro_seq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sgn(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; + dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; + dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; + dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; +} + +static void +micro_sgt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = sinf(src->f[0]); + dst->f[1] = sinf(src->f[1]); + dst->f[2] = sinf(src->f[2]); + dst->f[3] = sinf(src->f[3]); +} + +static void +micro_sle(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_slt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_trunc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)(int)src->f[0]; + dst->f[1] = (float)(int)src->f[1]; + dst->f[2] = (float)(int)src->f[2]; + dst->f[3] = (float)(int)src->f[3]; +} + + #define CHAN_X 0 #define CHAN_Y 1 #define CHAN_Z 2 #define CHAN_W 3 +enum tgsi_exec_datatype { + TGSI_EXEC_DATA_FLOAT, + TGSI_EXEC_DATA_INT, + TGSI_EXEC_DATA_UINT +}; + /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ @@ -110,10 +427,10 @@ #define TEMP_P0 TGSI_EXEC_TEMP_P0 #define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ @@ -126,23 +443,19 @@ /** The execution mask depends on the conditional mask and the loop mask */ #define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; -#ifdef DEBUG -static void -check_inf_or_nan(const union tgsi_exec_channel *chan) -{ - assert(!util_is_inf_or_nan(chan->f[0])); - assert(!util_is_inf_or_nan(chan->f[1])); - assert(!util_is_inf_or_nan(chan->f[2])); - assert(!util_is_inf_or_nan(chan->f[3])); -} -#endif +#define CHECK_INF_OR_NAN(chan) do {\ + assert(!util_is_inf_or_nan((chan)->f[0]));\ + assert(!util_is_inf_or_nan((chan)->f[1]));\ + assert(!util_is_inf_or_nan((chan)->f[2]));\ + assert(!util_is_inf_or_nan((chan)->f[3]));\ + } while (0) #ifdef DEBUG @@ -191,7 +504,7 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) { uint i, chan; - uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint writemask = inst->Dst[0].Register.WriteMask; if (writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_Y || writemask == TGSI_WRITEMASK_Z || @@ -203,15 +516,15 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) /* loop over src regs */ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - if ((inst->FullSrcRegisters[i].SrcRegister.File == - inst->FullDstRegisters[0].DstRegister.File) && - (inst->FullSrcRegisters[i].SrcRegister.Index == - inst->FullDstRegisters[0].DstRegister.Index)) { + if ((inst->Src[i].Register.File == + inst->Dst[0].Register.File) && + (inst->Src[i].Register.Index == + inst->Dst[0].Register.Index)) { /* loop over dest channels */ uint channelsWritten = 0x0; FOR_EACH_ENABLED_CHANNEL(*inst, chan) { /* check if we're reading a channel that's been written */ - uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan); + uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); if (channelsWritten & (1 << swizzle)) { return TRUE; } @@ -295,6 +608,14 @@ tgsi_exec_machine_bind_shader( * sizeof(struct tgsi_full_declaration)); maxDeclarations += 10; } + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { + unsigned reg; + for (reg = parse.FullToken.FullDeclaration.Range.First; + reg <= parse.FullToken.FullDeclaration.Range.Last; + ++reg) { + ++mach->NumOutputs; + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -332,20 +653,6 @@ tgsi_exec_machine_bind_shader( maxInstructions += 10; } - if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { - uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG; - /* XXX we only handle SOA dependencies properly for MOV/SWZ - * at this time! - */ - if (opcode != TGSI_OPCODE_MOV) { - debug_printf("Warning: SOA dependency in instruction" - " is not handled:\n"); - tgsi_dump_instruction(&parse.FullToken.FullInstruction, - numInstructions); - } - } - memcpy(instructions + numInstructions, &parse.FullToken.FullInstruction, sizeof(instructions[0])); @@ -353,6 +660,9 @@ tgsi_exec_machine_bind_shader( numInstructions++; break; + case TGSI_TOKEN_TYPE_PROPERTY: + break; + default: assert( 0 ); } @@ -386,6 +696,8 @@ tgsi_exec_machine_create( void ) memset(mach, 0, sizeof(*mach)); mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; + mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; /* Setup constants. */ for( i = 0; i < 4; i++ ) { @@ -426,18 +738,6 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) align_free(mach); } - -static void -micro_abs( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = fabsf( src->f[0] ); - dst->f[1] = fabsf( src->f[1] ); - dst->f[2] = fabsf( src->f[2] ); - dst->f[3] = fabsf( src->f[3] ); -} - static void micro_add( union tgsi_exec_channel *dst, @@ -450,76 +750,6 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } -#if 0 -static void -micro_iadd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] + src1->i[0]; - dst->i[1] = src0->i[1] + src1->i[1]; - dst->i[2] = src0->i[2] + src1->i[2]; - dst->i[3] = src0->i[3] + src1->i[3]; -} -#endif - -static void -micro_and( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] & src1->u[0]; - dst->u[1] = src0->u[1] & src1->u[1]; - dst->u[2] = src0->u[2] & src1->u[2]; - dst->u[3] = src0->u[3] & src1->u[3]; -} - -static void -micro_ceil( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = ceilf( src->f[0] ); - dst->f[1] = ceilf( src->f[1] ); - dst->f[2] = ceilf( src->f[2] ); - dst->f[3] = ceilf( src->f[3] ); -} - -static void -micro_cos( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = cosf( src->f[0] ); - dst->f[1] = cosf( src->f[1] ); - dst->f[2] = cosf( src->f[2] ); - dst->f[3] = cosf( src->f[3] ); -} - -static void -micro_ddx( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_ddy( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; -} - static void micro_div( union tgsi_exec_channel *dst, @@ -540,81 +770,6 @@ micro_div( } } -#if 0 -static void -micro_udiv( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] / src1->u[0]; - dst->u[1] = src0->u[1] / src1->u[1]; - dst->u[2] = src0->u[2] / src1->u[2]; - dst->u[3] = src0->u[3] / src1->u[3]; -} -#endif - -static void -micro_eq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; -} - -#if 0 -static void -micro_ieq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -static void -micro_exp2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ -#if FAST_MATH - dst->f[0] = util_fast_exp2( src->f[0] ); - dst->f[1] = util_fast_exp2( src->f[1] ); - dst->f[2] = util_fast_exp2( src->f[2] ); - dst->f[3] = util_fast_exp2( src->f[3] ); -#else - dst->f[0] = powf( 2.0f, src->f[0] ); - dst->f[1] = powf( 2.0f, src->f[1] ); - dst->f[2] = powf( 2.0f, src->f[2] ); - dst->f[3] = powf( 2.0f, src->f[3] ); -#endif -} - -#if 0 -static void -micro_f2ut( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = (uint) src->f[0]; - dst->u[1] = (uint) src->f[1]; - dst->u[2] = (uint) src->f[2]; - dst->u[3] = (uint) src->f[3]; -} -#endif - static void micro_float_clamp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -642,71 +797,6 @@ micro_float_clamp(union tgsi_exec_channel *dst, } static void -micro_flr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] ); - dst->f[1] = floorf( src->f[1] ); - dst->f[2] = floorf( src->f[2] ); - dst->f[3] = floorf( src->f[3] ); -} - -static void -micro_frc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] - floorf( src->f[0] ); - dst->f[1] = src->f[1] - floorf( src->f[1] ); - dst->f[2] = src->f[2] - floorf( src->f[2] ); - dst->f[3] = src->f[3] - floorf( src->f[3] ); -} - -static void -micro_i2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->i[0]; - dst->f[1] = (float) src->i[1]; - dst->f[2] = (float) src->i[2]; - dst->f[3] = (float) src->i[3]; -} - -static void -micro_lg2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ -#if FAST_MATH - dst->f[0] = util_fast_log2( src->f[0] ); - dst->f[1] = util_fast_log2( src->f[1] ); - dst->f[2] = util_fast_log2( src->f[2] ); - dst->f[3] = util_fast_log2( src->f[3] ); -#else - dst->f[0] = logf( src->f[0] ) * 1.442695f; - dst->f[1] = logf( src->f[1] ) * 1.442695f; - dst->f[2] = logf( src->f[2] ) * 1.442695f; - dst->f[3] = logf( src->f[3] ) * 1.442695f; -#endif -} - -static void -micro_le( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void micro_lt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, @@ -720,38 +810,6 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } -#if 0 -static void -micro_ilt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -#if 0 -static void -micro_ult( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; -} -#endif - static void micro_max( union tgsi_exec_channel *dst, @@ -764,34 +822,6 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - static void micro_min( union tgsi_exec_channel *dst, @@ -804,48 +834,6 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - -#if 0 -static void -micro_umod( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] % src1->u[0]; - dst->u[1] = src0->u[1] % src1->u[1]; - dst->u[2] = src0->u[2] % src1->u[2]; - dst->u[3] = src0->u[3] % src1->u[3]; -} -#endif - static void micro_mul( union tgsi_exec_channel *dst, @@ -860,20 +848,6 @@ micro_mul( #if 0 static void -micro_imul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] * src1->i[0]; - dst->i[1] = src0->i[1] * src1->i[1]; - dst->i[2] = src0->i[2] * src1->i[2]; - dst->i[3] = src0->i[3] * src1->i[3]; -} -#endif - -#if 0 -static void micro_imul64( union tgsi_exec_channel *dst0, union tgsi_exec_channel *dst1, @@ -937,42 +911,6 @@ micro_neg( dst->f[3] = -src->f[3]; } -#if 0 -static void -micro_ineg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} -#endif - -static void -micro_not( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = ~src->u[0]; - dst->u[1] = ~src->u[1]; - dst->u[2] = ~src->u[2]; - dst->u[3] = ~src->u[3]; -} - -static void -micro_or( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] | src1->u[0]; - dst->u[1] = src0->u[1] | src1->u[1]; - dst->u[2] = src0->u[2] | src1->u[2]; - dst->u[3] = src0->u[3] | src1->u[3]; -} - static void micro_pow( union tgsi_exec_channel *dst, @@ -993,88 +931,6 @@ micro_pow( } static void -micro_rnd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] + 0.5f ); - dst->f[1] = floorf( src->f[1] + 0.5f ); - dst->f[2] = floorf( src->f[2] + 0.5f ); - dst->f[3] = floorf( src->f[3] + 0.5f ); -} - -static void -micro_sgn( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; - dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; - dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; - dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; -} - -static void -micro_shl( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] << src1->i[0]; - dst->i[1] = src0->i[1] << src1->i[1]; - dst->i[2] = src0->i[2] << src1->i[2]; - dst->i[3] = src0->i[3] << src1->i[3]; -} - -static void -micro_ishr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] >> src1->i[0]; - dst->i[1] = src0->i[1] >> src1->i[1]; - dst->i[2] = src0->i[2] >> src1->i[2]; - dst->i[3] = src0->i[3] >> src1->i[3]; -} - -static void -micro_trunc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0 ) -{ - dst->f[0] = (float) (int) src0->f[0]; - dst->f[1] = (float) (int) src0->f[1]; - dst->f[2] = (float) (int) src0->f[2]; - dst->f[3] = (float) (int) src0->f[3]; -} - -#if 0 -static void -micro_ushr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] >> src1->u[0]; - dst->u[1] = src0->u[1] >> src1->u[1]; - dst->u[2] = src0->u[2] >> src1->u[2]; - dst->u[3] = src0->u[3] >> src1->u[3]; -} -#endif - -static void -micro_sin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sinf( src->f[0] ); - dst->f[1] = sinf( src->f[1] ); - dst->f[2] = sinf( src->f[2] ); - dst->f[3] = sinf( src->f[3] ); -} - -static void micro_sqrt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { @@ -1096,31 +952,6 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } -#if 0 -static void -micro_u2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->u[0]; - dst->f[1] = (float) src->u[1]; - dst->f[2] = (float) src->u[2]; - dst->f[3] = (float) src->u[3]; -} -#endif - -static void -micro_xor( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] ^ src1->u[0]; - dst->u[1] = src0->u[1] ^ src1->u[1]; - dst->u[2] = src0->u[2] ^ src1->u[2]; - dst->u[3] = src0->u[3] ^ src1->u[3]; -} - static void fetch_src_file_channel( const struct tgsi_exec_machine *mach, @@ -1156,6 +987,7 @@ fetch_src_file_channel( break; case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; @@ -1193,10 +1025,10 @@ fetch_src_file_channel( assert(index->i[1] < TGSI_EXEC_NUM_PREDS); assert(index->i[2] < TGSI_EXEC_NUM_PREDS); assert(index->i[3] < TGSI_EXEC_NUM_PREDS); - chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3]; + chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; + chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; + chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; + chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; break; case TGSI_FILE_OUTPUT: @@ -1218,11 +1050,11 @@ fetch_src_file_channel( } static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) +fetch_source(const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index, + enum tgsi_exec_datatype src_datatype) { union tgsi_exec_channel index; uint swizzle; @@ -1231,13 +1063,13 @@ fetch_source( * * file[1], * where: - * file = SrcRegister.File - * [1] = SrcRegister.Index + * file = Register.File + * [1] = Register.Index */ index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->SrcRegister.Index; + index.i[3] = reg->Register.Index; /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1245,11 +1077,11 @@ fetch_source( * * file[ind[2].x+1], * where: - * ind = SrcRegisterInd.File - * [2] = SrcRegisterInd.Index - * .x = SrcRegisterInd.SwizzleX + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX */ - if (reg->SrcRegister.Indirect) { + if (reg->Register.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; @@ -1259,22 +1091,22 @@ fetch_source( index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; + index2.i[3] = reg->Indirect.Index; /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterInd.File, + reg->Indirect.File, swizzle, &index2, &indir_index ); /* add value of address register to the offset */ - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1291,15 +1123,16 @@ fetch_source( * * file[1][3] == file[1*sizeof(file[1])+3], * where: - * [3] = SrcRegisterDim.Index + * [3] = Dimension.Index */ - if (reg->SrcRegister.Dimension) { + if (reg->Register.Dimension) { /* The size of the first-order array depends on the register file type. * We need to multiply the index to the first array to get an effective, * "flat" index that points to the beginning of the second-order array. */ - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; @@ -1315,10 +1148,10 @@ fetch_source( assert( 0 ); } - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; + index.i[0] += reg->Dimension.Index; + index.i[1] += reg->Dimension.Index; + index.i[2] += reg->Dimension.Index; + index.i[3] += reg->Dimension.Index; /* Again, the second subscript index can be addressed indirectly * identically to the first one. @@ -1327,11 +1160,11 @@ fetch_source( * * file[1][ind[4].y+3], * where: - * ind = SrcRegisterDimInd.File - * [4] = SrcRegisterDimInd.Index - * .y = SrcRegisterDimInd.SwizzleX + * ind = DimIndirect.File + * [4] = DimIndirect.Index + * .y = DimIndirect.SwizzleX */ - if (reg->SrcRegisterDim.Indirect) { + if (reg->Dimension.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; @@ -1340,20 +1173,20 @@ fetch_source( index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; + index2.i[3] = reg->DimIndirect.Index; - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterDimInd.File, + reg->DimIndirect.File, swizzle, &index2, &indir_index ); - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1365,7 +1198,7 @@ fetch_source( } /* If by any chance there was a need for a 3D array of register - * files, we would have to check whether SrcRegisterDim is followed + * files, we would have to check whether Dimension is followed * by a dimension register and continue the saga. */ } @@ -1373,41 +1206,35 @@ fetch_source( swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, - reg->SrcRegister.File, + reg->Register.File, swizzle, &index, chan ); - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); - break; - - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; + if (reg->Register.Absolute) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_abs(chan, chan); + } else { + micro_iabs(chan, chan); + } } - if (reg->SrcRegisterExtMod.Complement) { - micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); + if (reg->Register.Negate) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_neg(chan, chan); + } else { + micro_ineg(chan, chan); + } } } static void -store_dest( - struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) +store_dest(struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index, + enum tgsi_exec_datatype dst_datatype) { uint i; union tgsi_exec_channel null; @@ -1416,9 +1243,9 @@ store_dest( int offset = 0; /* indirection offset */ int index; -#ifdef DEBUG - check_inf_or_nan(chan); -#endif + if (dst_datatype == TGSI_EXEC_DATA_FLOAT) { + CHECK_INF_OR_NAN(chan); + } /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1426,11 +1253,11 @@ store_dest( * * file[ind[2].x+1], * where: - * ind = DstRegisterInd.File - * [2] = DstRegisterInd.Index - * .x = DstRegisterInd.SwizzleX + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX */ - if (reg->DstRegister.Indirect) { + if (reg->Register.Indirect) { union tgsi_exec_channel index; union tgsi_exec_channel indir_index; uint swizzle; @@ -1439,49 +1266,65 @@ store_dest( index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->DstRegisterInd.Index; + index.i[3] = reg->Indirect.Index; /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); /* fetch values from the address/indirection register */ fetch_src_file_channel( mach, - reg->DstRegisterInd.File, + reg->Indirect.File, swizzle, &index, &indir_index ); /* save indirection offset */ - offset = (int) indir_index.f[0]; + offset = indir_index.i[0]; } - switch (reg->DstRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_NULL: dst = &null; break; case TGSI_FILE_OUTPUT: index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index; + + reg->Register.Index; dst = &mach->Outputs[offset + index].xyzw[chan_index]; +#if 0 + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) + fprintf(stderr, "%f, ", chan->f[i]); + fprintf(stderr, ")\n"); + } +#endif break; case TGSI_FILE_TEMPORARY: - index = reg->DstRegister.Index; + index = reg->Register.Index; assert( index < TGSI_EXEC_NUM_TEMPS ); dst = &mach->Temps[offset + index].xyzw[chan_index]; break; case TGSI_FILE_ADDRESS: - index = reg->DstRegister.Index; + index = reg->Register.Index; dst = &mach->Addrs[index].xyzw[chan_index]; break; + case TGSI_FILE_LOOP: + assert(reg->Register.Index == 0); + assert(mach->LoopCounterStackTop > 0); + assert(chan_index == CHAN_X); + dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; + break; + case TGSI_FILE_PREDICATE: - index = reg->DstRegister.Index; + index = reg->Register.Index; assert(index < TGSI_EXEC_NUM_PREDS); - dst = &mach->Addrs[index].xyzw[chan_index]; + dst = &mach->Predicates[index].xyzw[chan_index]; break; default: @@ -1489,6 +1332,47 @@ store_dest( return; } + if (inst->Instruction.Predicate) { + uint swizzle; + union tgsi_exec_channel *pred; + + switch (chan_index) { + case CHAN_X: + swizzle = inst->Predicate.SwizzleX; + break; + case CHAN_Y: + swizzle = inst->Predicate.SwizzleY; + break; + case CHAN_Z: + swizzle = inst->Predicate.SwizzleZ; + break; + case CHAN_W: + swizzle = inst->Predicate.SwizzleW; + break; + default: + assert(0); + return; + } + + assert(inst->Predicate.Index == 0); + + pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; + + if (inst->Predicate.Negate) { + for (i = 0; i < QUAD_SIZE; i++) { + if (pred->u[i]) { + execmask &= ~(1 << i); + } + } + } else { + for (i = 0; i < QUAD_SIZE; i++) { + if (!pred->u[i]) { + execmask &= ~(1 << i); + } + } + } + } + switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: for (i = 0; i < QUAD_SIZE; i++) @@ -1526,10 +1410,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) /** @@ -1555,7 +1439,7 @@ exec_kil(struct tgsi_exec_machine *mach, /* unswizzle channel */ swizzle = tgsi_util_get_full_src_register_swizzle ( - &inst->FullSrcRegisters[0], + &inst->Src[0], chan_index); /* check if the component has not been already tested */ @@ -1587,6 +1471,35 @@ exec_kilp(struct tgsi_exec_machine *mach, mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +static void +emit_vertex(struct tgsi_exec_machine *mach) +{ + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + } +} + +static void +emit_primitive(struct tgsi_exec_machine *mach) +{ + unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + ++(*prim_count); + debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); + mach->Primitives[*prim_count] = 0; + } +} /* * Fetch a four texture samples using STR texture coordinates. @@ -1622,14 +1535,14 @@ exec_tex(struct tgsi_exec_machine *mach, boolean biasLod, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; uint chan_index; float lodBias; /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ - switch (inst->InstructionExtTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -1715,6 +1628,64 @@ exec_tex(struct tgsi_exec_machine *mach, } } +static void +exec_txd(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[3].Register.Index; + union tgsi_exec_channel r[4]; + uint chan_index; + + /* + * XXX: This is fake TXD -- the derivatives are not taken into account, yet. + */ + + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + + FETCH(&r[0], 0, CHAN_X); + + fetch_texel(mach->Samplers[unit], + &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); + } +} + /** * Evaluate a constant-valued coefficient at the position of the @@ -1784,53 +1755,59 @@ typedef void (* eval_coef_func)( unsigned chan ); static void -exec_declaration( - struct tgsi_exec_machine *mach, - const struct tgsi_full_declaration *decl ) +exec_declaration(struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl) { - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - eval_coef_func eval; + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + if (decl->Declaration.File == TGSI_FILE_INPUT || + decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + uint first, last, mask; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - eval = eval_constant_coef; - break; + if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { + assert(decl->Semantic.Index == 0); + assert(first == last); + assert(mask == TGSI_WRITEMASK_XYZW); - case TGSI_INTERPOLATE_LINEAR: - eval = eval_linear_coef; - break; + mach->Inputs[first] = mach->QuadPos; + } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { + uint i; - case TGSI_INTERPOLATE_PERSPECTIVE: - eval = eval_perspective_coef; - break; + assert(decl->Semantic.Index == 0); + assert(first == last); - default: - assert( 0 ); - return; - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - eval( mach, i, j ); - } + for (i = 0; i < QUAD_SIZE; i++) { + mach->Inputs[first].xyzw[0].f[i] = mach->Face; + } + } else { + eval_coef_func eval; + uint i, j; + + switch (decl->Declaration.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; + + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + assert(0); + return; } - } - else { - unsigned i, j; - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - eval( mach, i, j ); + for (j = 0; j < NUM_CHANNELS; j++) { + if (mask & (1 << j)) { + for (i = first; i <= last; i++) { + eval(mach, i, j); } } } @@ -1839,6 +1816,461 @@ exec_declaration( } } +typedef void (* micro_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src); + +static void +exec_scalar_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + union tgsi_exec_channel src; + union tgsi_exec_channel dst; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); + op(&dst, &src); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src; + + fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); + op(&dst.xyzw[chan], &src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_binary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[2]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_trinary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[3]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_break(struct tgsi_exec_machine *mach) +{ + if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + } else { + assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); + + mach->Switch.mask = 0x0; + + UPDATE_EXEC_MASK(mach); + } +} + +static void +exec_switch(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + mach->Switch.mask = 0x0; + mach->Switch.defaultMask = 0x0; + + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_case(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + union tgsi_exec_channel src; + uint mask = 0; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + + if (mach->Switch.selector.u[0] == src.u[0]) { + mask |= 0x1; + } + if (mach->Switch.selector.u[1] == src.u[1]) { + mask |= 0x2; + } + if (mach->Switch.selector.u[2] == src.u[2]) { + mask |= 0x4; + } + if (mach->Switch.selector.u[3] == src.u[3]) { + mask |= 0x8; + } + + mach->Switch.defaultMask |= mask; + + mach->Switch.mask |= mask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_default(struct tgsi_exec_machine *mach) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + + mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_endswitch(struct tgsi_exec_machine *mach) +{ + mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; + + UPDATE_EXEC_MASK(mach); +} + +static void +micro_i2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->i[0]; + dst->f[1] = (float)src->i[1]; + dst->f[2] = (float)src->i[2]; + dst->f[3] = (float)src->i[3]; +} + +static void +micro_not(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_shl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] << src[1].u[0]; + dst->u[1] = src[0].u[1] << src[1].u[1]; + dst->u[2] = src[0].u[2] << src[1].u[2]; + dst->u[3] = src[0].u[3] << src[1].u[3]; +} + +static void +micro_and(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] & src[1].u[0]; + dst->u[1] = src[0].u[1] & src[1].u[1]; + dst->u[2] = src[0].u[2] & src[1].u[2]; + dst->u[3] = src[0].u[3] & src[1].u[3]; +} + +static void +micro_or(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] | src[1].u[0]; + dst->u[1] = src[0].u[1] | src[1].u[1]; + dst->u[2] = src[0].u[2] | src[1].u[2]; + dst->u[3] = src[0].u[3] | src[1].u[3]; +} + +static void +micro_xor(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] ^ src[1].u[0]; + dst->u[1] = src[0].u[1] ^ src[1].u[1]; + dst->u[2] = src[0].u[2] ^ src[1].u[2]; + dst->u[3] = src[0].u[3] ^ src[1].u[3]; +} + +static void +micro_f2i(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)src->f[0]; + dst->i[1] = (int)src->f[1]; + dst->i[2] = (int)src->f[2]; + dst->i[3] = (int)src->f[3]; +} + +static void +micro_idiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] / src[1].i[0]; + dst->i[1] = src[0].i[1] / src[1].i[1]; + dst->i[2] = src[0].i[2] / src[1].i[2]; + dst->i[3] = src[0].i[3] / src[1].i[3]; +} + +static void +micro_imax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_imin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_isge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0; +} + +static void +micro_ishr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >> src[1].i[0]; + dst->i[1] = src[0].i[1] >> src[1].i[1]; + dst->i[2] = src[0].i[2] >> src[1].i[2]; + dst->i[3] = src[0].i[3] >> src[1].i[3]; +} + +static void +micro_islt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0; +} + +static void +micro_f2u(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = (uint)src->f[0]; + dst->u[1] = (uint)src->f[1]; + dst->u[2] = (uint)src->f[2]; + dst->u[3] = (uint)src->f[3]; +} + +static void +micro_u2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->u[0]; + dst->f[1] = (float)src->u[1]; + dst->f[2] = (float)src->u[2]; + dst->f[3] = (float)src->u[3]; +} + +static void +micro_uadd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] + src[1].u[0]; + dst->u[1] = src[0].u[1] + src[1].u[1]; + dst->u[2] = src[0].u[2] + src[1].u[2]; + dst->u[3] = src[0].u[3] + src[1].u[3]; +} + +static void +micro_udiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] / src[1].u[0]; + dst->u[1] = src[0].u[1] / src[1].u[1]; + dst->u[2] = src[0].u[2] / src[1].u[2]; + dst->u[3] = src[0].u[3] / src[1].u[3]; +} + +static void +micro_umad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3]; +} + +static void +micro_umax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] % src[1].u[0]; + dst->u[1] = src[0].u[1] % src[1].u[1]; + dst->u[2] = src[0].u[2] % src[1].u[2]; + dst->u[3] = src[0].u[3] % src[1].u[3]; +} + +static void +micro_umul(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3]; +} + +static void +micro_useq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0; +} + +static void +micro_usge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0; +} + +static void +micro_ushr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >> src[1].u[0]; + dst->u[1] = src[0].u[1] >> src[1].u[1]; + dst->u[2] = src[0].u[2] >> src[1].u[2]; + dst->u[3] = src[0].u[3] >> src[1].u[3]; +} + +static void +micro_uslt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0; +} + +static void +micro_usne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0; +} + static void exec_instruction( struct tgsi_exec_machine *mach, @@ -1847,50 +2279,24 @@ exec_instruction( { uint chan_index; union tgsi_exec_channel r[10]; + union tgsi_exec_channel d[8]; (*pc)++; switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - case TGSI_OPCODE_FLR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MOV: - if (inst->Flags & SOA_DEPENDENCY_FLAG) { - /* Do all fetches into temp regs, then do all stores to avoid - * intermediate/accidental clobbering. This could be done all the - * time for MOV but for other instructions we'll need more temps... - */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[chan_index], 0, chan_index ); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } - } - else { - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } - } + exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { FETCH( &r[0], 0, CHAN_X ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Y ); + micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { @@ -1901,34 +2307,30 @@ exec_instruction( micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); micro_pow( &r[1], &r[1], &r[2] ); - micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Z ); + micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } - } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[0], &r[0] ); - micro_sqrt( &r[0], &r[0] ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EXP: @@ -1973,14 +2375,13 @@ exec_instruction( break; case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_mul( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_mul(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -1988,8 +2389,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_add(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2045,25 +2448,29 @@ exec_instruction( break; case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { FETCH( &r[0], 0, CHAN_Y ); FETCH( &r[1], 1, CHAN_Y); - micro_mul( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, CHAN_Y ); + micro_mul(&d[CHAN_Y], &r[0], &r[1]); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); + FETCH(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); + FETCH(&d[CHAN_W], 1, CHAN_W); + } + + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_W], 0, CHAN_W); } break; @@ -2073,9 +2480,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_min()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2085,67 +2493,38 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_max()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); - - STORE(&r[0], 0, chan_index ); + micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1] ); - FETCH( &r[1], 2, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SUB: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_sub( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_sub(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_LRP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - micro_sub( &r[1], &r[1], &r[2] ); - micro_mul( &r[0], &r[0], &r[1] ); - micro_add( &r[0], &r[0], &r[2] ); - - STORE(&r[0], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CND: @@ -2153,8 +2532,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2177,11 +2558,7 @@ exec_instruction( break; case TGSI_OPCODE_FRC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_frc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CLAMP: @@ -2190,40 +2567,27 @@ exec_instruction( FETCH(&r[1], 1, chan_index); micro_max(&r[0], &r[0], &r[1]); FETCH(&r[1], 2, chan_index); - micro_min(&r[0], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_min(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; + case TGSI_OPCODE_FLR: + exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_ARR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_rnd( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EX2: - FETCH(&r[0], 0, CHAN_X); - -#if FAST_MATH - micro_exp2( &r[0], &r[0] ); -#else - micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); -#endif - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LG2: - FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_POW: @@ -2247,11 +2611,7 @@ exec_instruction( FETCH(&r[4], 1, CHAN_Y); micro_mul( &r[5], &r[3], &r[4] ); - micro_sub( &r[2], &r[2], &r[5] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } + micro_sub(&d[CHAN_X], &r[2], &r[5]); FETCH(&r[2], 1, CHAN_X); @@ -2260,34 +2620,29 @@ exec_instruction( FETCH(&r[5], 0, CHAN_X); micro_mul( &r[1], &r[1], &r[5] ); - micro_sub( &r[3], &r[3], &r[1] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } + micro_sub(&d[CHAN_Y], &r[3], &r[1]); micro_mul( &r[5], &r[5], &r[4] ); micro_mul( &r[0], &r[0], &r[2] ); - micro_sub( &r[5], &r[5], &r[0] ); + micro_sub(&d[CHAN_Z], &r[5], &r[0]); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - - micro_abs( &r[0], &r[0] ); - - STORE(&r[0], 0, chan_index); - } - break; + case TGSI_OPCODE_ABS: + exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; case TGSI_OPCODE_RCC: FETCH(&r[0], 0, CHAN_X); @@ -2326,29 +2681,15 @@ exec_instruction( break; case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - micro_cos( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddx( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddy( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_KILP: @@ -2425,14 +2766,7 @@ exec_instruction( break; case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], - &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SFL: @@ -2442,38 +2776,19 @@ exec_instruction( break; case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - micro_sin( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_STR: @@ -2502,7 +2817,7 @@ exec_instruction( /* src[1] = d[strq]/dx */ /* src[2] = d[strq]/dy */ /* src[3] = sampler unit */ - assert (0); + exec_txd(mach, inst); break; case TGSI_OPCODE_TXL: @@ -2546,13 +2861,8 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_X); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&r[2], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&r[2], 0, CHAN_Z); - } + micro_add(&d[CHAN_X], &r[2], &r[3]); + } if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || IS_CHANNEL_ENABLED(*inst, CHAN_W)) { @@ -2562,13 +2872,20 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_Y); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&r[2], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&r[2], 0, CHAN_W); - } + micro_add(&d[CHAN_Y], &r[2], &r[3]); + + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_X], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_Y], 0, CHAN_W); } break; @@ -2576,6 +2893,10 @@ exec_instruction( assert (0); break; + case TGSI_OPCODE_ARR: + exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_BRA: assert (0); break; @@ -2595,6 +2916,8 @@ exec_instruction( mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; + mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; /* note that PC was already incremented above */ mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; @@ -2602,16 +2925,21 @@ exec_instruction( /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ - *pc = inst->InstructionExtLabel.Label; + *pc = inst->Label.Label; } break; @@ -2640,6 +2968,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -2650,12 +2984,7 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_sgn( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CMP: @@ -2663,10 +2992,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - - micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2839,71 +3168,31 @@ exec_instruction( break; case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ceil( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_i2f( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_not( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_shl( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ishr( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_and( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_or( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_MOD: @@ -2911,12 +3200,7 @@ exec_instruction( break; case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_xor( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } + exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_SAD: @@ -2932,13 +3216,11 @@ exec_instruction( break; case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + emit_vertex(mach); break; case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + emit_primitive(mach); break; case TGSI_OPCODE_BGNFOR: @@ -2946,43 +3228,62 @@ exec_instruction( for (chan_index = 0; chan_index < 3; chan_index++) { FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); } - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); ++mach->LoopCounterStackTop; + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); + /* update LoopMask */ + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { + mach->LoopMask &= ~0x1; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { + mach->LoopMask &= ~0x2; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { + mach->LoopMask &= ~0x4; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { + mach->LoopMask &= ~0x8; + } + /* TODO: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; case TGSI_OPCODE_ENDFOR: assert(mach->LoopCounterStackTop > 0); - micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); /* update LoopMask */ - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { mach->LoopMask &= ~0x1; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { mach->LoopMask &= ~0x2; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { mach->LoopMask &= ~0x4; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { mach->LoopMask &= ~0x8; } - micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); + micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); assert(mach->LoopLabelStackTop > 0); inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; @@ -3003,6 +3304,8 @@ exec_instruction( --mach->LoopLabelStackTop; assert(mach->LoopCounterStackTop > 0); --mach->LoopCounterStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; @@ -3026,15 +3329,14 @@ exec_instruction( mach->ContMask = mach->ContStack[--mach->ContStackTop]; assert(mach->LoopLabelStackTop > 0); --mach->LoopLabelStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); + exec_break(mach); break; case TGSI_OPCODE_CONT: @@ -3049,18 +3351,171 @@ exec_instruction( break; case TGSI_OPCODE_ENDSUB: - /* no-op */ + /* + * XXX: This really should be a no-op. We should never reach this opcode. + */ + + assert(mach->CallStackTop > 0); + mach->CallStackTop--; + + mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; + mach->CondMask = mach->CondStack[mach->CondStackTop]; + + mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; + mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; + + mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; + mach->ContMask = mach->ContStack[mach->ContStackTop]; + + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; + + UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_NOP: break; + case TGSI_OPCODE_BREAKC: + FETCH(&r[0], 0, CHAN_X); + /* update CondMask */ + if (r[0].u[0] && (mach->ExecMask & 0x1)) { + mach->LoopMask &= ~0x1; + } + if (r[0].u[1] && (mach->ExecMask & 0x2)) { + mach->LoopMask &= ~0x2; + } + if (r[0].u[2] && (mach->ExecMask & 0x4)) { + mach->LoopMask &= ~0x4; + } + if (r[0].u[3] && (mach->ExecMask & 0x8)) { + mach->LoopMask &= ~0x8; + } + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_F2I: + exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_IDIV: + exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMAX: + exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMIN: + exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_INEG: + exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISGE: + exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISHR: + exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISLT: + exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_F2U: + exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U2F: + exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UADD: + exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UDIV: + exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAD: + exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAX: + exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMIN: + exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMOD: + exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMUL: + exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USEQ: + exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USGE: + exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USHR: + exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USLT: + exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USNE: + exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_SWITCH: + exec_switch(mach, inst); + break; + + case TGSI_OPCODE_CASE: + exec_case(mach, inst); + break; + + case TGSI_OPCODE_DEFAULT: + exec_default(mach); + break; + + case TGSI_OPCODE_ENDSWITCH: + exec_endswitch(mach); + break; + default: assert( 0 ); } } +#define DEBUG_EXECUTION 0 + + /** * Run TGSI interpreter. * \return bitmask of "alive" quad components @@ -3077,9 +3532,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; + mach->Switch.mask = 0xf; + assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; @@ -3103,10 +3562,67 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) exec_declaration( mach, mach->Declarations+i ); } - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - assert(pc < (int) mach->NumInstructions); - exec_instruction( mach, mach->Instructions + pc, &pc ); + { +#if DEBUG_EXECUTION + struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; + struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; + uint inst = 1; + + memcpy(temps, mach->Temps, sizeof(temps)); + memcpy(outputs, mach->Outputs, sizeof(outputs)); +#endif + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + +#if DEBUG_EXECUTION + uint i; + + tgsi_dump_instruction(&mach->Instructions[pc], inst++); +#endif + + assert(pc < (int) mach->NumInstructions); + exec_instruction(mach, mach->Instructions + pc, &pc); + +#if DEBUG_EXECUTION + for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { + if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { + uint j; + + memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); + debug_printf("TEMP[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], + temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], + temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], + temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); + } + } + } + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { + uint j; + + memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); + debug_printf("OUT[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); + } + } + } +#endif + } } #if 0 @@ -3120,5 +3636,12 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) } #endif + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); + assert(mach->CallStackTop == 0); + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 471f591dd6e..aa3a98d7f18 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -179,6 +179,7 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_COND_NESTING 32 #define TGSI_EXEC_MAX_LOOP_NESTING 32 +#define TGSI_EXEC_MAX_SWITCH_NESTING 32 #define TGSI_EXEC_MAX_CALL_NESTING 32 /* The maximum number of input attributes per vertex. For 2D @@ -191,6 +192,14 @@ struct tgsi_exec_labels */ #define TGSI_EXEC_MAX_CONST_BUFFER 4096 +/* The maximum number of vertices per primitive */ +#define TGSI_MAX_PRIM_VERTICES 6 + +/* The maximum number of primitives to be generated */ +#define TGSI_MAX_PRIMITIVES 64 + +/* The maximum total number of vertices */ +#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) /** function call/activation record */ struct tgsi_call_record @@ -198,10 +207,29 @@ struct tgsi_call_record uint CondStackTop; uint LoopStackTop; uint ContStackTop; + int SwitchStackTop; + int BreakStackTop; uint ReturnAddr; }; +/* Switch-case block state. */ +struct tgsi_switch_record { + uint mask; /**< execution mask */ + union tgsi_exec_channel selector; /**< a value case statements are compared to */ + uint defaultMask; /**< non-execute mask for default case */ +}; + + +enum tgsi_break_type { + TGSI_EXEC_BREAK_INSIDE_LOOP, + TGSI_EXEC_BREAK_INSIDE_SWITCH +}; + + +#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING) + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -214,10 +242,11 @@ struct tgsi_exec_machine float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; - struct tgsi_exec_vector Inputs[PIPE_MAX_ATTRIBS]; - struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS]; + struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; + struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; struct tgsi_exec_vector *Addrs; + struct tgsi_exec_vector *Predicates; struct tgsi_sampler **Samplers; @@ -228,10 +257,13 @@ struct tgsi_exec_machine /* GEOMETRY processor only. */ unsigned *Primitives; + unsigned NumOutputs; + unsigned MaxGeometryShaderOutputs; /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; struct tgsi_exec_vector QuadPos; + float Face; /**< +1 if front facing, -1 if back facing */ /* Conditional execution masks */ uint CondMask; /**< For IF/ELSE/ENDIF */ @@ -240,6 +272,12 @@ struct tgsi_exec_machine uint FuncMask; /**< For function calls */ uint ExecMask; /**< = CondMask & LoopMask */ + /* Current switch-case state. */ + struct tgsi_switch_record Switch; + + /* Current break type. */ + enum tgsi_break_type BreakType; + /** Condition mask stack (for nested conditionals) */ uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; int CondStackTop; @@ -252,7 +290,7 @@ struct tgsi_exec_machine uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopLabelStackTop; - /** Loop counter stack (x = count, y = current, z = step) */ + /** Loop counter stack (x = index, y = counter, z = step) */ struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopCounterStackTop; @@ -260,6 +298,13 @@ struct tgsi_exec_machine uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; + /** Switch case stack */ + struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING]; + int SwitchStackTop; + + enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK]; + int BreakStackTop; + /** Function execution mask stack (for executing subroutine code) */ uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; int FuncStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index be375cabb8b..de0e09cdbae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -119,7 +119,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 0, 0, 0, 0, 0, 0, "", 88 }, /* removed */ { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, @@ -149,7 +149,33 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */ + { 0, 0, 0, 0, 0, 0, "", 118 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "F2I", TGSI_OPCODE_F2I }, + { 1, 2, 0, 0, 0, 0, "IDIV", TGSI_OPCODE_IDIV }, + { 1, 2, 0, 0, 0, 0, "IMAX", TGSI_OPCODE_IMAX }, + { 1, 2, 0, 0, 0, 0, "IMIN", TGSI_OPCODE_IMIN }, + { 1, 1, 0, 0, 0, 0, "INEG", TGSI_OPCODE_INEG }, + { 1, 2, 0, 0, 0, 0, "ISGE", TGSI_OPCODE_ISGE }, + { 1, 2, 0, 0, 0, 0, "ISHR", TGSI_OPCODE_ISHR }, + { 1, 2, 0, 0, 0, 0, "ISLT", TGSI_OPCODE_ISLT }, + { 1, 1, 0, 0, 0, 0, "F2U", TGSI_OPCODE_F2U }, + { 1, 1, 0, 0, 0, 0, "U2F", TGSI_OPCODE_U2F }, + { 1, 2, 0, 0, 0, 0, "UADD", TGSI_OPCODE_UADD }, + { 1, 2, 0, 0, 0, 0, "UDIV", TGSI_OPCODE_UDIV }, + { 1, 3, 0, 0, 0, 0, "UMAD", TGSI_OPCODE_UMAD }, + { 1, 2, 0, 0, 0, 0, "UMAX", TGSI_OPCODE_UMAX }, + { 1, 2, 0, 0, 0, 0, "UMIN", TGSI_OPCODE_UMIN }, + { 1, 2, 0, 0, 0, 0, "UMOD", TGSI_OPCODE_UMOD }, + { 1, 2, 0, 0, 0, 0, "UMUL", TGSI_OPCODE_UMUL }, + { 1, 2, 0, 0, 0, 0, "USEQ", TGSI_OPCODE_USEQ }, + { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE }, + { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR }, + { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT }, + { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE }, + { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH }, + { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE }, + { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT }, + { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c index d88c2558d81..0ba5fe48419 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_iterate.c +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c @@ -39,7 +39,6 @@ tgsi_iterate_shader( return FALSE; ctx->processor = parse.FullHeader.Processor; - ctx->version = parse.FullVersion.Version; if (ctx->prolog) if (!ctx->prolog( ctx )) @@ -67,6 +66,12 @@ tgsi_iterate_shader( goto fail; break; + case TGSI_TOKEN_TYPE_PROPERTY: + if (ctx->iterate_property) + if (!ctx->iterate_property( ctx, &parse.FullToken.FullProperty )) + goto fail; + break; + default: assert( 0 ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/tgsi_iterate.h index ec7b85bf63d..8d67f22c429 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_iterate.h +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.h @@ -57,11 +57,15 @@ struct tgsi_iterate_context struct tgsi_full_immediate *imm ); boolean + (* iterate_property)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_property *prop ); + + boolean (* epilog)( struct tgsi_iterate_context *ctx ); struct tgsi_processor processor; - struct tgsi_version version; }; boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b34263da489..e4af15c156f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -124,7 +124,6 @@ OP11(I2F) OP11(NOT) OP11(TRUNC) OP12(SHL) -OP12(SHR) OP12(AND) OP12(OR) OP12(MOD) @@ -146,6 +145,28 @@ OP01(IFC) OP01(BREAKC) OP01(KIL) OP00(END) +OP11(F2I) +OP12(IDIV) +OP12(IMAX) +OP12(IMIN) +OP11(INEG) +OP12(ISGE) +OP12(ISHR) +OP12(ISLT) +OP11(F2U) +OP11(U2F) +OP12(UADD) +OP12(UDIV) +OP13(UMAD) +OP12(UMAX) +OP12(UMIN) +OP12(UMOD) +OP12(UMUL) +OP12(USEQ) +OP12(USGE) +OP12(USHR) +OP12(USLT) +OP12(USNE) #undef OP00 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 83f9df1183e..8c7062d850c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -28,44 +28,23 @@ #include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" -#include "tgsi_build.h" #include "util/u_memory.h" -void -tgsi_full_token_init( - union tgsi_full_token *full_token ) -{ - full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION; -} - -void -tgsi_full_token_free( - union tgsi_full_token *full_token ) -{ -} - unsigned tgsi_parse_init( struct tgsi_parse_context *ctx, const struct tgsi_token *tokens ) { - ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0]; - if( ctx->FullVersion.Version.MajorVersion > 1 ) { - return TGSI_PARSE_ERROR; - } - - ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1]; + ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[0]; if( ctx->FullHeader.Header.HeaderSize >= 2 ) { - ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2]; + ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[1]; } else { - ctx->FullHeader.Processor = tgsi_default_processor(); + return TGSI_PARSE_ERROR; } ctx->Tokens = tokens; - ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize; - - tgsi_full_token_init( &ctx->FullToken ); + ctx->Position = ctx->FullHeader.Header.HeaderSize; return TGSI_PARSE_OK; } @@ -74,7 +53,6 @@ void tgsi_parse_free( struct tgsi_parse_context *ctx ) { - tgsi_full_token_free( &ctx->FullToken ); } boolean @@ -82,7 +60,7 @@ tgsi_parse_end_of_tokens( struct tgsi_parse_context *ctx ) { return ctx->Position >= - 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; } @@ -119,9 +97,6 @@ tgsi_parse_token( struct tgsi_token token; unsigned i; - tgsi_full_token_free( &ctx->FullToken ); - tgsi_full_token_init( &ctx->FullToken ); - next_token( ctx, &token ); switch( token.Type ) { @@ -129,10 +104,10 @@ tgsi_parse_token( { struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; - *decl = tgsi_default_full_declaration(); + memset(decl, 0, sizeof *decl); copy_token(&decl->Declaration, &token); - next_token( ctx, &decl->DeclarationRange ); + next_token( ctx, &decl->Range ); if( decl->Declaration.Semantic ) { next_token( ctx, &decl->Semantic ); @@ -144,18 +119,29 @@ tgsi_parse_token( case TGSI_TOKEN_TYPE_IMMEDIATE: { struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + uint imm_count; - *imm = tgsi_default_full_immediate(); + memset(imm, 0, sizeof *imm); copy_token(&imm->Immediate, &token); - assert( !imm->Immediate.Extended ); + + imm_count = imm->Immediate.NrTokens - 1; switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: - { - uint imm_count = imm->Immediate.NrTokens - 1; - for (i = 0; i < imm_count; i++) { - next_token(ctx, &imm->u[i]); - } + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Float); + } + break; + + case TGSI_IMM_UINT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Uint); + } + break; + + case TGSI_IMM_INT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Int); } break; @@ -169,137 +155,76 @@ tgsi_parse_token( case TGSI_TOKEN_TYPE_INSTRUCTION: { struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction; - unsigned extended; - *inst = tgsi_default_full_instruction(); + memset(inst, 0, sizeof *inst); copy_token(&inst->Instruction, &token); - extended = inst->Instruction.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - switch( token.Type ) { - case TGSI_INSTRUCTION_EXT_TYPE_LABEL: - copy_token(&inst->InstructionExtLabel, &token); - break; - - case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: - copy_token(&inst->InstructionExtTexture, &token); - break; - - case TGSI_INSTRUCTION_EXT_TYPE_PREDICATE: - copy_token(&inst->InstructionExtPredicate, &token); - break; + if (inst->Instruction.Predicate) { + next_token(ctx, &inst->Predicate); + } - default: - assert( 0 ); - } + if (inst->Instruction.Label) { + next_token( ctx, &inst->Label); + } - extended = token.Extended; + if (inst->Instruction.Texture) { + next_token( ctx, &inst->Texture); } assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - unsigned extended; - next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); + next_token( ctx, &inst->Dst[i].Register ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); - - extended = inst->FullDstRegisters[i].DstRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; + assert( !inst->Dst[i].Register.Dimension ); - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: - copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate, - &token); - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - - if( inst->FullDstRegisters[i].DstRegister.Indirect ) { - next_token( ctx, &inst->FullDstRegisters[i].DstRegisterInd ); + if( inst->Dst[i].Register.Indirect ) { + next_token( ctx, &inst->Dst[i].Indirect ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullDstRegisters[i].DstRegisterInd.Indirect ); - assert( !inst->FullDstRegisters[i].DstRegisterInd.Dimension ); - assert( !inst->FullDstRegisters[i].DstRegisterInd.Extended ); + assert( !inst->Dst[i].Indirect.Dimension ); + assert( !inst->Dst[i].Indirect.Indirect ); } } assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - unsigned extended; - - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); - - extended = inst->FullSrcRegisters[i].SrcRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - next_token( ctx, &token ); + next_token( ctx, &inst->Src[i].Register ); - switch( token.Type ) { - case TGSI_SRC_REGISTER_EXT_TYPE_MOD: - copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod, - &token); - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - - if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); + if( inst->Src[i].Register.Indirect ) { + next_token( ctx, &inst->Src[i].Indirect ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + assert( !inst->Src[i].Indirect.Indirect ); + assert( !inst->Src[i].Indirect.Dimension ); } - if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim ); + if( inst->Src[i].Register.Dimension ) { + next_token( ctx, &inst->Src[i].Dimension ); /* * No support for multi-dimensional addressing. */ - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended ); + assert( !inst->Src[i].Dimension.Dimension ); - if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); + if( inst->Src[i].Dimension.Indirect ) { + next_token( ctx, &inst->Src[i].DimIndirect ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + assert( !inst->Src[i].Indirect.Indirect ); + assert( !inst->Src[i].Indirect.Dimension ); } } } @@ -307,6 +232,22 @@ tgsi_parse_token( break; } + case TGSI_TOKEN_TYPE_PROPERTY: + { + struct tgsi_full_property *prop = &ctx->FullToken.FullProperty; + uint prop_count; + + memset(prop, 0, sizeof *prop); + copy_token(&prop->Property, &token); + + prop_count = prop->Property.NrTokens - 1; + for (i = 0; i < prop_count; i++) { + next_token(ctx, &prop->u[i]); + } + + break; + } + default: assert( 0 ); } @@ -319,8 +260,7 @@ tgsi_num_tokens(const struct tgsi_token *tokens) struct tgsi_parse_context ctx; if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) { unsigned len = (ctx.FullHeader.Header.HeaderSize + - ctx.FullHeader.Header.BodySize + - 1); + ctx.FullHeader.Header.BodySize); return len; } return 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 76f1676d85d..439a57269b7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -34,11 +34,6 @@ extern "C" { #endif -struct tgsi_full_version -{ - struct tgsi_version Version; -}; - struct tgsi_full_header { struct tgsi_header Header; @@ -47,24 +42,22 @@ struct tgsi_full_header struct tgsi_full_dst_register { - struct tgsi_dst_register DstRegister; - struct tgsi_src_register DstRegisterInd; - struct tgsi_dst_register_ext_modulate DstRegisterExtModulate; + struct tgsi_dst_register Register; + struct tgsi_src_register Indirect; }; struct tgsi_full_src_register { - struct tgsi_src_register SrcRegister; - struct tgsi_src_register_ext_mod SrcRegisterExtMod; - struct tgsi_src_register SrcRegisterInd; - struct tgsi_dimension SrcRegisterDim; - struct tgsi_src_register SrcRegisterDimInd; + struct tgsi_src_register Register; + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; }; struct tgsi_full_declaration { struct tgsi_declaration Declaration; - struct tgsi_declaration_range DeclarationRange; + struct tgsi_declaration_range Range; struct tgsi_declaration_semantic Semantic; }; @@ -74,18 +67,23 @@ struct tgsi_full_immediate union tgsi_immediate_data u[4]; }; +struct tgsi_full_property +{ + struct tgsi_property Property; + struct tgsi_property_data u[8]; +}; + #define TGSI_FULL_MAX_DST_REGISTERS 2 #define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */ struct tgsi_full_instruction { struct tgsi_instruction Instruction; - struct tgsi_instruction_ext_label InstructionExtLabel; - struct tgsi_instruction_ext_texture InstructionExtTexture; - struct tgsi_instruction_ext_predicate InstructionExtPredicate; - struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; - struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; - uint Flags; /**< user-defined usage */ + struct tgsi_instruction_predicate Predicate; + struct tgsi_instruction_label Label; + struct tgsi_instruction_texture Texture; + struct tgsi_full_dst_register Dst[TGSI_FULL_MAX_DST_REGISTERS]; + struct tgsi_full_src_register Src[TGSI_FULL_MAX_SRC_REGISTERS]; }; union tgsi_full_token @@ -94,21 +92,13 @@ union tgsi_full_token struct tgsi_full_declaration FullDeclaration; struct tgsi_full_immediate FullImmediate; struct tgsi_full_instruction FullInstruction; + struct tgsi_full_property FullProperty; }; -void -tgsi_full_token_init( - union tgsi_full_token *full_token ); - -void -tgsi_full_token_free( - union tgsi_full_token *full_token ); - struct tgsi_parse_context { const struct tgsi_token *Tokens; unsigned Position; - struct tgsi_full_version FullVersion; struct tgsi_full_header FullHeader; union tgsi_full_token FullToken; }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 617fd7f6be1..138d2d095bb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -60,7 +60,7 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = { for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -156,8 +156,8 @@ init_gen_context(struct gen_context *gen, struct ppc_function *func) static boolean is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) { - return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Index < MAX_PPC_TEMPS); + return (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Index < MAX_PPC_TEMPS); } @@ -167,8 +167,8 @@ is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) static boolean is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) { - return (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Index < MAX_PPC_TEMPS); + return (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Index < MAX_PPC_TEMPS); } @@ -291,10 +291,11 @@ emit_fetch(struct gen_context *gen, case TGSI_SWIZZLE_Y: case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset = (reg->Register.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); @@ -303,11 +304,11 @@ emit_fetch(struct gen_context *gen, case TGSI_FILE_TEMPORARY: if (is_ppc_vec_temporary(reg)) { /* use PPC vec register */ - dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; + dst_vec = gen->temps_map[reg->Register.Index][swizzle]; } else { /* use memory-based temp register "file" */ - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset = (reg->Register.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); @@ -315,7 +316,7 @@ emit_fetch(struct gen_context *gen, break; case TGSI_FILE_IMMEDIATE: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset = (reg->Register.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); /* Load 4-byte word into vector register. @@ -331,7 +332,7 @@ emit_fetch(struct gen_context *gen, break; case TGSI_FILE_CONSTANT: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset = (reg->Register.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); /* Load 4-byte word into vector register. @@ -404,9 +405,9 @@ equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, { int swz_a, swz_b; int sign_a, sign_b; - if (a->SrcRegister.File != b->SrcRegister.File) + if (a->Register.File != b->Register.File) return FALSE; - if (a->SrcRegister.Index != b->SrcRegister.Index) + if (a->Register.Index != b->Register.Index) return FALSE; swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a); swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b); @@ -431,7 +432,7 @@ get_src_vec(struct gen_context *gen, struct tgsi_full_instruction *inst, int src_reg, uint chan) { const const struct tgsi_full_src_register *src = - &inst->FullSrcRegisters[src_reg]; + &inst->Src[src_reg]; int vec; uint i; @@ -482,10 +483,10 @@ get_dst_vec(struct gen_context *gen, const struct tgsi_full_instruction *inst, unsigned chan_index) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + const struct tgsi_full_dst_register *reg = &inst->Dst[0]; if (is_ppc_vec_temporary_dst(reg)) { - int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + int vec = gen->temps_map[reg->Register.Index][chan_index]; return vec; } else { @@ -505,12 +506,12 @@ emit_store(struct gen_context *gen, unsigned chan_index, boolean free_vec) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + const struct tgsi_full_dst_register *reg = &inst->Dst[0]; - switch (reg->DstRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_OUTPUT: { - int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset = (reg->Register.Index * 4 + chan_index) * 16; int offset_reg = emit_li_offset(gen, offset); ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); } @@ -518,14 +519,14 @@ emit_store(struct gen_context *gen, case TGSI_FILE_TEMPORARY: if (is_ppc_vec_temporary_dst(reg)) { if (!free_vec) { - int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + int dst_vec = gen->temps_map[reg->Register.Index][chan_index]; if (dst_vec != src_vec) ppc_vmove(gen->f, dst_vec, src_vec); } free_vec = FALSE; } else { - int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset = (reg->Register.Index * 4 + chan_index) * 16; int offset_reg = emit_li_offset(gen, offset); ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); } @@ -535,7 +536,7 @@ emit_store(struct gen_context *gen, emit_addrs( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; #endif @@ -1173,13 +1174,14 @@ emit_declaration( struct ppc_function *func, struct tgsi_full_declaration *decl ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT || + decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) { #if 0 unsigned first, last, mask; unsigned i, j; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( i = first; i <= last; i++ ) { @@ -1339,6 +1341,9 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, } break; + case TGSI_TOKEN_TYPE_PROPERTY: + break; + default: ok = 0; assert( 0 ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 36e27ea52f4..9b0644465af 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -26,32 +26,112 @@ **************************************************************************/ #include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_prim.h" +#include "cso_cache/cso_hash.h" #include "tgsi_sanity.h" #include "tgsi_info.h" #include "tgsi_iterate.h" -typedef uint reg_flag; - -#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) - -#define MAX_REGISTERS 256 -#define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG) +typedef struct { + uint file : 28; + /* max 2 dimensions */ + uint dimensions : 4; + uint indices[2]; +} scan_register; struct sanity_check_ctx { struct tgsi_iterate_context iter; + struct cso_hash *regs_decl; + struct cso_hash *regs_used; + struct cso_hash *regs_ind_used; - reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REG_FLAGS]; - reg_flag regs_used[TGSI_FILE_COUNT][MAX_REG_FLAGS]; - boolean regs_ind_used[TGSI_FILE_COUNT]; uint num_imms; uint num_instructions; uint index_of_END; uint errors; uint warnings; + uint implied_array_size; }; +static INLINE unsigned +scan_register_key(const scan_register *reg) +{ + unsigned key = reg->file; + key |= (reg->indices[0] << 4); + key |= (reg->indices[1] << 18); + + return key; +} + +static void +fill_scan_register1d(scan_register *reg, + uint file, uint index) +{ + reg->file = file; + reg->dimensions = 1; + reg->indices[0] = index; + reg->indices[1] = 0; +} + +static void +fill_scan_register2d(scan_register *reg, + uint file, uint index1, uint index2) +{ + reg->file = file; + reg->dimensions = 2; + reg->indices[0] = index1; + reg->indices[1] = index2; +} + +static void +scan_register_dst(scan_register *reg, + struct tgsi_full_dst_register *dst) +{ + fill_scan_register1d(reg, + dst->Register.File, + dst->Register.Index); +} + +static void +scan_register_src(scan_register *reg, + struct tgsi_full_src_register *src) +{ + if (src->Register.Dimension) { + /*FIXME: right now we don't support indirect + * multidimensional addressing */ + debug_assert(!src->Dimension.Indirect); + fill_scan_register2d(reg, + src->Register.File, + src->Register.Index, + src->Dimension.Index); + } else { + fill_scan_register1d(reg, + src->Register.File, + src->Register.Index); + } +} + +static scan_register * +create_scan_register_src(struct tgsi_full_src_register *src) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_src(reg, src); + + return reg; +} + +static scan_register * +create_scan_register_dst(struct tgsi_full_dst_register *dst) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_dst(reg, dst); + + return reg; +} + static void report_error( struct sanity_check_ctx *ctx, @@ -99,12 +179,12 @@ check_file_name( static boolean is_register_declared( struct sanity_check_ctx *ctx, - uint file, - int index ) + const scan_register *reg) { - assert( index >= 0 && index < MAX_REGISTERS ); - - return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; + void *data = cso_hash_find_data_from_template( + ctx->regs_decl, scan_register_key(reg), + (void*)reg, sizeof(scan_register)); + return data ? TRUE : FALSE; } static boolean @@ -112,23 +192,37 @@ is_any_register_declared( struct sanity_check_ctx *ctx, uint file ) { - uint i; + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); - for (i = 0; i < MAX_REG_FLAGS; i++) - if (ctx->regs_decl[file][i]) + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (reg->file == file) return TRUE; + iter = cso_hash_iter_next(iter); + } + return FALSE; } static boolean is_register_used( struct sanity_check_ctx *ctx, - uint file, - int index ) + scan_register *reg) { - assert( index < MAX_REGISTERS ); + void *data = cso_hash_find_data_from_template( + ctx->regs_used, scan_register_key(reg), + reg, sizeof(scan_register)); + return data ? TRUE : FALSE; +} - return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; + +static boolean +is_ind_register_used( + struct sanity_check_ctx *ctx, + scan_register *reg) +{ + return cso_hash_contains(ctx->regs_ind_used, reg->file); } static const char *file_names[TGSI_FILE_COUNT] = @@ -148,31 +242,40 @@ static const char *file_names[TGSI_FILE_COUNT] = static boolean check_register_usage( struct sanity_check_ctx *ctx, - uint file, - int index, + scan_register *reg, const char *name, boolean indirect_access ) { - if (!check_file_name( ctx, file )) + if (!check_file_name( ctx, reg->file )) { + FREE(reg); return FALSE; + } if (indirect_access) { /* Note that 'index' is an offset relative to the value of the - * address register. No range checking done here. - */ - if (!is_any_register_declared( ctx, file )) - report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); - ctx->regs_ind_used[file] = TRUE; + * address register. No range checking done here.*/ + reg->indices[0] = 0; + reg->indices[1] = 0; + if (!is_any_register_declared( ctx, reg->file )) + report_error( ctx, "%s: Undeclared %s register", file_names[reg->file], name ); + if (!is_ind_register_used(ctx, reg)) + cso_hash_insert(ctx->regs_ind_used, reg->file, reg); + else + FREE(reg); } else { - if (index < 0 || index >= MAX_REGISTERS) { - report_error( ctx, "%s[%d]: Invalid %s index", file_names[file], index, name ); - return FALSE; - } - - if (!is_register_declared( ctx, file, index )) - report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); - ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + if (!is_register_declared( ctx, reg )) { + if (reg->dimensions == 2) + report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], reg->indices[1], name ); + else + report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], name ); + } + if (!is_register_used( ctx, reg )) + cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg); + else + FREE(reg); } return TRUE; } @@ -210,33 +313,33 @@ iter_instruction( * Mark the registers as used. */ for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + scan_register *reg = create_scan_register_dst(&inst->Dst[i]); check_register_usage( ctx, - inst->FullDstRegisters[i].DstRegister.File, - inst->FullDstRegisters[i].DstRegister.Index, + reg, "destination", FALSE ); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + scan_register *reg = create_scan_register_src(&inst->Src[i]); check_register_usage( ctx, - inst->FullSrcRegisters[i].SrcRegister.File, - inst->FullSrcRegisters[i].SrcRegister.Index, + reg, "source", - (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); - if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { - uint file; - int index; + (boolean)inst->Src[i].Register.Indirect ); + if (inst->Src[i].Register.Indirect) { + scan_register *ind_reg = MALLOC(sizeof(scan_register)); - file = inst->FullSrcRegisters[i].SrcRegisterInd.File; - index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; + fill_scan_register1d(ind_reg, + inst->Src[i].Indirect.File, + inst->Src[i].Indirect.Index); check_register_usage( ctx, - file, - index, + reg, "indirect", FALSE ); - if (!(file == TGSI_FILE_ADDRESS || file == TGSI_FILE_LOOP) || index != 0) { + if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || + reg->indices[0] != 0) { report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); } } @@ -245,8 +348,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_BGNFOR: case TGSI_OPCODE_ENDFOR: - if (inst->FullDstRegisters[0].DstRegister.File != TGSI_FILE_LOOP || - inst->FullDstRegisters[0].DstRegister.Index != 0) { + if (inst->Dst[0].Register.File != TGSI_FILE_LOOP || + inst->Dst[0].Register.Index != 0) { report_error(ctx, "Destination register must be LOOP[0]"); } break; @@ -254,8 +357,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_BGNFOR: - if (inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_CONSTANT && - inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) { + if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT && + inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) { report_error(ctx, "Source register file must be either CONST or IMM"); } break; @@ -266,6 +369,19 @@ iter_instruction( return TRUE; } +static void +check_and_declare(struct sanity_check_ctx *ctx, + scan_register *reg) +{ + if (is_register_declared( ctx, reg)) + report_error( ctx, "%s[%u]: The same register declared more than once", + file_names[reg->file], reg->indices[0] ); + cso_hash_insert(ctx->regs_decl, + scan_register_key(reg), + reg); +} + + static boolean iter_declaration( struct tgsi_iterate_context *iter, @@ -286,10 +402,22 @@ iter_declaration( file = decl->Declaration.File; if (!check_file_name( ctx, file )) return TRUE; - for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { - if (is_register_declared( ctx, file, i )) - report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i ); - ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); + for (i = decl->Range.First; i <= decl->Range.Last; i++) { + /* declared TGSI_FILE_INPUT's for geometry processor + * have an implied second dimension */ + if (file == TGSI_FILE_INPUT && + ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) { + uint vert; + for (vert = 0; vert < ctx->implied_array_size; ++vert) { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register2d(reg, file, vert, i); + check_and_declare(ctx, reg); + } + } else { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register1d(reg, file, i); + check_and_declare(ctx, reg); + } } return TRUE; @@ -301,8 +429,7 @@ iter_immediate( struct tgsi_full_immediate *imm ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - - assert( ctx->num_imms < MAX_REGISTERS ); + scan_register *reg; /* No immediates allowed after the first instruction. */ @@ -311,12 +438,16 @@ iter_immediate( /* Mark the register as declared. */ - ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); + reg = MALLOC(sizeof(scan_register)); + fill_scan_register1d(reg, TGSI_FILE_IMMEDIATE, ctx->num_imms); + cso_hash_insert(ctx->regs_decl, scan_register_key(reg), reg); ctx->num_imms++; /* Check data type validity. */ - if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32 && + imm->Immediate.DataType != TGSI_IMM_UINT32 && + imm->Immediate.DataType != TGSI_IMM_INT32) { report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType ); return TRUE; } @@ -324,12 +455,26 @@ iter_immediate( return TRUE; } + +static boolean +iter_property( + struct tgsi_iterate_context *iter, + struct tgsi_full_property *prop ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + + if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY && + prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) { + ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data); + } + return TRUE; +} + static boolean epilog( struct tgsi_iterate_context *iter ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint file; /* There must be an END instruction somewhere. */ @@ -339,13 +484,17 @@ epilog( /* Check if all declared registers were used. */ - for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { - uint i; - - for (i = 0; i < MAX_REGISTERS; i++) { - if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { - report_warning( ctx, "%s[%u]: Register never used", file_names[file], i ); + { + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); + + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) { + report_warning( ctx, "%s[%u]: Register never used", + file_names[reg->file], reg->indices[0] ); } + iter = cso_hash_iter_next(iter); } } @@ -357,6 +506,18 @@ epilog( return TRUE; } +static void +regs_hash_destroy(struct cso_hash *hash) +{ + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + iter = cso_hash_erase(hash, iter); + FREE(reg); + } + cso_hash_delete(hash); +} + boolean tgsi_sanity_check( const struct tgsi_token *tokens ) @@ -367,20 +528,26 @@ tgsi_sanity_check( ctx.iter.iterate_instruction = iter_instruction; ctx.iter.iterate_declaration = iter_declaration; ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.iterate_property = iter_property; ctx.iter.epilog = epilog; - memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); - memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); - memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); + ctx.regs_decl = cso_hash_create(); + ctx.regs_used = cso_hash_create(); + ctx.regs_ind_used = cso_hash_create(); + ctx.num_imms = 0; ctx.num_instructions = 0; ctx.index_of_END = ~0; ctx.errors = 0; ctx.warnings = 0; + ctx.implied_array_size = 0; if (!tgsi_iterate_shader( tokens, &ctx.iter )) return FALSE; + regs_hash_destroy(ctx.regs_decl); + regs_hash_destroy(ctx.regs_used); + regs_hash_destroy(ctx.regs_ind_used); return ctx.errors == 0; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index f9c16f1b6cb..a6cc773003a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -35,7 +35,6 @@ #include "util/u_math.h" -#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -97,14 +96,15 @@ tgsi_scan_shader(const struct tgsi_token *tokens, uint i; for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *src = - &fullinst->FullSrcRegisters[i]; - if (src->SrcRegister.File == TGSI_FILE_INPUT) { - const int ind = src->SrcRegister.Index; + &fullinst->Src[i]; + if (src->Register.File == TGSI_FILE_INPUT || + src->Register.File == TGSI_FILE_SYSTEM_VALUE) { + const int ind = src->Register.Index; if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { - if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_X) { + if (src->Register.SwizzleX == TGSI_SWIZZLE_X) { info->uses_fogcoord = TRUE; } - else if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_Y) { + else if (src->Register.SwizzleX == TGSI_SWIZZLE_Y) { info->uses_frontfacing = TRUE; } } @@ -120,8 +120,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, = &parse.FullToken.FullDeclaration; const uint file = fulldecl->Declaration.File; uint reg; - for (reg = fulldecl->DeclarationRange.First; - reg <= fulldecl->DeclarationRange.Last; + for (reg = fulldecl->Range.First; + reg <= fulldecl->Range.Last; reg++) { /* only first 32 regs will appear in this bitfield */ @@ -129,25 +129,29 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)reg); - if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; - info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; + if (file == TGSI_FILE_INPUT || file == TGSI_FILE_SYSTEM_VALUE) { + info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; + info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; info->num_inputs++; } else if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; - info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; + info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->num_outputs++; - } - /* special case */ - if (procType == TGSI_PROCESSOR_FRAGMENT && - file == TGSI_FILE_OUTPUT && - fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { - info->writes_z = TRUE; + /* extra info for special outputs */ + if (procType == TGSI_PROCESSOR_FRAGMENT && + fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) { + info->writes_z = TRUE; + } + if (procType == TGSI_PROCESSOR_VERTEX && + fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) { + info->writes_edgeflag = TRUE; + } } - } + + } } break; @@ -161,6 +165,19 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_max[file] = MAX2(info->file_max[file], (int)reg); } break; + case TGSI_TOKEN_TYPE_PROPERTY: + { + const struct tgsi_full_property *fullprop + = &parse.FullToken.FullProperty; + + info->properties[info->num_properties].name = + fullprop->Property.PropertyName; + memcpy(info->properties[info->num_properties].data, + fullprop->u, 8 * sizeof(unsigned));; + + ++info->num_properties; + } + break; default: assert( 0 ); @@ -206,29 +223,26 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) struct tgsi_full_instruction *fullinst = &parse.FullToken.FullInstruction; const struct tgsi_full_src_register *src = - &fullinst->FullSrcRegisters[0]; + &fullinst->Src[0]; const struct tgsi_full_dst_register *dst = - &fullinst->FullDstRegisters[0]; + &fullinst->Dst[0]; /* Do a whole bunch of checks for a simple move */ if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || - src->SrcRegister.File != TGSI_FILE_INPUT || - dst->DstRegister.File != TGSI_FILE_OUTPUT || - src->SrcRegister.Index != dst->DstRegister.Index || - - src->SrcRegister.Negate || - src->SrcRegisterExtMod.Negate || - src->SrcRegisterExtMod.Absolute || - src->SrcRegisterExtMod.Scale2X || - src->SrcRegisterExtMod.Bias || - src->SrcRegisterExtMod.Complement || - - src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || - - dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) + (src->Register.File != TGSI_FILE_INPUT && + src->Register.File != TGSI_FILE_SYSTEM_VALUE) || + dst->Register.File != TGSI_FILE_OUTPUT || + src->Register.Index != dst->Register.Index || + + src->Register.Negate || + src->Register.Absolute || + + src->Register.SwizzleX != TGSI_SWIZZLE_X || + src->Register.SwizzleY != TGSI_SWIZZLE_Y || + src->Register.SwizzleZ != TGSI_SWIZZLE_Z || + src->Register.SwizzleW != TGSI_SWIZZLE_W || + + dst->Register.WriteMask != TGSI_WRITEMASK_XYZW) { tgsi_parse_free(&parse); return FALSE; @@ -240,6 +254,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) /* fall-through */ case TGSI_TOKEN_TYPE_IMMEDIATE: /* fall-through */ + case TGSI_TOKEN_TYPE_PROPERTY: + /* fall-through */ default: ; /* no-op */ } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 8a7ee0c7e4f..dae5376c24a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -33,7 +33,6 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" - /** * Shader summary info */ @@ -58,11 +57,17 @@ struct tgsi_shader_info uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ boolean writes_z; /**< does fragment shader write Z value? */ + boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ boolean uses_fogcoord; /**< fragment shader uses fog coord? */ boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */ -}; + struct { + unsigned name; + unsigned data[8]; + } properties[TGSI_PROPERTY_COUNT]; + uint num_properties; +}; extern void tgsi_scan_shader(const struct tgsi_token *tokens, diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index a96fc94c7ad..118059ace9c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -58,7 +58,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -1267,31 +1267,32 @@ emit_fetch( case TGSI_SWIZZLE_Y: case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: emit_const( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle, - reg->SrcRegister.Indirect, - reg->SrcRegisterInd.File, - reg->SrcRegisterInd.Index ); + reg->Register.Indirect, + reg->Indirect.File, + reg->Indirect.Index ); break; case TGSI_FILE_IMMEDIATE: emit_immediate( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle ); break; case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: emit_inputf( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle ); break; @@ -1299,7 +1300,7 @@ emit_fetch( emit_tempf( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle ); break; @@ -1331,7 +1332,7 @@ emit_fetch( } #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ - emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + emit_fetch( FUNC, XMM, &(INST).Src[INDEX], CHAN ) /** * Register store. @@ -1371,12 +1372,12 @@ emit_store( } - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: emit_output( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; @@ -1384,7 +1385,7 @@ emit_store( emit_temps( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; @@ -1392,7 +1393,7 @@ emit_store( emit_addrs( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; @@ -1402,7 +1403,7 @@ emit_store( } #define STORE( FUNC, INST, XMM, INDEX, CHAN )\ - emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + emit_store( FUNC, XMM, &(INST).Dst[INDEX], &(INST), CHAN ) static void PIPE_CDECL @@ -1459,12 +1460,13 @@ emit_tex( struct x86_function *func, boolean lodbias, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; struct x86_reg args[2]; unsigned count; unsigned i; - switch (inst->InstructionExtTexture.Texture) { + assert(inst->Instruction.Texture); + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: count = 1; break; @@ -1719,15 +1721,15 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) { uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; - if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Indirect) + const struct tgsi_full_src_register *reg = &inst->Src[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Indirect) + const struct tgsi_full_dst_register *reg = &inst->Dst[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } return FALSE; @@ -2243,7 +2245,7 @@ emit_instruction( case TGSI_OPCODE_KIL: /* conditional kill */ - emit_kil( func, &inst->FullSrcRegisters[0] ); + emit_kil( func, &inst->Src[0] ); break; case TGSI_OPCODE_PK2H: @@ -2576,7 +2578,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: return 0; break; @@ -2632,12 +2634,13 @@ emit_declaration( struct x86_function *func, struct tgsi_full_declaration *decl ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT || + decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) { unsigned first, last, mask; unsigned i, j; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( i = first; i <= last; i++ ) { @@ -2951,6 +2954,9 @@ tgsi_emit_sse2( num_immediates++; } break; + case TGSI_TOKEN_TYPE_PROPERTY: + /* we just ignore them for now */ + break; default: ok = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index d2b03ffb2fc..9fcffeda368 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -27,6 +27,9 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "util/u_prim.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_info.h" @@ -59,6 +62,23 @@ static boolean uprcase( char c ) return c; } +/* + * Ignore case of str1 and assume str1 is already uppercase. + * Return TRUE iff str1 and str2 are equal. + */ +static int +streq_nocase_uprcase(const char *str1, + const char *str2) +{ + while (*str1 && *str2) { + if (*str1 != uprcase(*str2)) + return FALSE; + str1++; + str2++; + } + return TRUE; +} + static boolean str_match_no_case( const char **pcur, const char *str ) { const char *cur = *pcur; @@ -110,6 +130,20 @@ static boolean parse_uint( const char **pcur, uint *val ) return FALSE; } +static boolean parse_identifier( const char **pcur, char *ret ) +{ + const char *cur = *pcur; + int i = 0; + if (is_alpha_underscore( cur )) { + ret[i++] = *cur++; + while (is_alpha_underscore( cur )) + ret[i++] = *cur++; + *pcur = cur; + return TRUE; + } + return FALSE; +} + /* Parse floating point. */ static boolean parse_float( const char **pcur, float *val ) @@ -163,28 +197,43 @@ struct translate_ctx struct tgsi_token *tokens_cur; struct tgsi_token *tokens_end; struct tgsi_header *header; + unsigned processor : 4; + int implied_array_size : 5; }; static void report_error( struct translate_ctx *ctx, const char *msg ) { - debug_printf( "\nError: %s", msg ); + int line = 1; + int column = 1; + const char *itr = ctx->text; + + while (itr != ctx->cur) { + if (*itr == '\n') { + column = 1; + ++line; + } + ++column; + ++itr; + } + + debug_printf( "\nTGSI asm error: %s [%d : %d] \n", msg, line, column ); } /* Parse shader header. * Return TRUE for one of the following headers. - * FRAG1.1 - * GEOM1.1 - * VERT1.1 + * FRAG + * GEOM + * VERT */ static boolean parse_header( struct translate_ctx *ctx ) { uint processor; - if (str_match_no_case( &ctx->cur, "FRAG1.1" )) + if (str_match_no_case( &ctx->cur, "FRAG" )) processor = TGSI_PROCESSOR_FRAGMENT; - else if (str_match_no_case( &ctx->cur, "VERT1.1" )) + else if (str_match_no_case( &ctx->cur, "VERT" )) processor = TGSI_PROCESSOR_VERTEX; - else if (str_match_no_case( &ctx->cur, "GEOM1.1" )) + else if (str_match_no_case( &ctx->cur, "GEOM" )) processor = TGSI_PROCESSOR_GEOMETRY; else { report_error( ctx, "Unknown header" ); @@ -193,16 +242,13 @@ static boolean parse_header( struct translate_ctx *ctx ) if (ctx->tokens_cur >= ctx->tokens_end) return FALSE; - *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version(); - - if (ctx->tokens_cur >= ctx->tokens_end) - return FALSE; ctx->header = (struct tgsi_header *) ctx->tokens_cur++; *ctx->header = tgsi_build_header(); if (ctx->tokens_cur >= ctx->tokens_end) return FALSE; *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); + ctx->processor = processor; return TRUE; } @@ -233,7 +279,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "LOOP", - "PRED" + "PRED", + "SV" }; static boolean @@ -298,92 +345,36 @@ parse_opt_writemask( return TRUE; } -/* <register_file_bracket> ::= <file> `[' - */ static boolean -parse_register_file_bracket( - struct translate_ctx *ctx, - uint *file ) -{ - if (!parse_file( &ctx->cur, file )) { - report_error( ctx, "Unknown register file" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '[') { - report_error( ctx, "Expected `['" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} +parse_register_dst( struct translate_ctx *ctx, + uint *file, + int *index ); -/* <register_file_bracket_index> ::= <register_file_bracket> <uint> - */ -static boolean -parse_register_file_bracket_index( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - uint uindex; +struct parsed_src_bracket { + int index; - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - return TRUE; -} + uint ind_file; + int ind_index; + uint ind_comp; +}; -/* Parse destination register operand. - * <register_dst> ::= <register_file_bracket_index> `]' - */ -static boolean -parse_register_dst( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - if (!parse_register_file_bracket_index( ctx, file, index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} -/* Parse source register operand. - * <register_src> ::= <register_file_bracket_index> `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]' - */ static boolean -parse_register_src( +parse_register_src_bracket( struct translate_ctx *ctx, - uint *file, - int *index, - uint *ind_file, - int *ind_index, - uint *ind_comp) + struct parsed_src_bracket *brackets) { const char *cur; uint uindex; - *ind_comp = TGSI_SWIZZLE_X; - if (!parse_register_file_bracket( ctx, file )) - return FALSE; + memset(brackets, 0, sizeof(struct parsed_src_bracket)); + eat_opt_white( &ctx->cur ); + cur = ctx->cur; - if (parse_file( &cur, ind_file )) { - if (!parse_register_dst( ctx, ind_file, ind_index )) + if (parse_file( &cur, &brackets->ind_file )) { + if (!parse_register_dst( ctx, &brackets->ind_file, + &brackets->ind_index )) return FALSE; eat_opt_white( &ctx->cur ); @@ -393,16 +384,16 @@ parse_register_src( switch (uprcase(*ctx->cur)) { case 'X': - *ind_comp = TGSI_SWIZZLE_X; + brackets->ind_comp = TGSI_SWIZZLE_X; break; case 'Y': - *ind_comp = TGSI_SWIZZLE_Y; + brackets->ind_comp = TGSI_SWIZZLE_Y; break; case 'Z': - *ind_comp = TGSI_SWIZZLE_Z; + brackets->ind_comp = TGSI_SWIZZLE_Z; break; case 'W': - *ind_comp = TGSI_SWIZZLE_W; + brackets->ind_comp = TGSI_SWIZZLE_W; break; default: report_error(ctx, "Expected indirect register swizzle component `x', `y', `z' or `w'"); @@ -423,12 +414,12 @@ parse_register_src( return FALSE; } if (negate) - *index = -(int) uindex; + brackets->index = -(int) uindex; else - *index = (int) uindex; + brackets->index = (int) uindex; } else { - *index = 0; + brackets->index = 0; } } else { @@ -436,9 +427,9 @@ parse_register_src( report_error( ctx, "Expected literal unsigned integer" ); return FALSE; } - *index = (int) uindex; - *ind_file = TGSI_FILE_NULL; - *ind_index = 0; + brackets->index = (int) uindex; + brackets->ind_file = TGSI_FILE_NULL; + brackets->ind_index = 0; } eat_opt_white( &ctx->cur ); if (*ctx->cur != ']') { @@ -449,20 +440,123 @@ parse_register_src( return TRUE; } -/* Parse register declaration. - * <register_dcl> ::= <register_file_bracket_index> `]' | - * <register_file_bracket_index> `..' <index> `]' +static boolean +parse_opt_register_src_bracket( + struct translate_ctx *ctx, + struct parsed_src_bracket *brackets, + int *parsed_brackets) +{ + const char *cur = ctx->cur; + + *parsed_brackets = 0; + + eat_opt_white( &cur ); + if (cur[0] == '[') { + ++cur; + ctx->cur = cur; + + if (!parse_register_src_bracket(ctx, brackets)) + return FALSE; + + *parsed_brackets = 1; + } + + return TRUE; +} + +/* <register_file_bracket> ::= <file> `[' */ static boolean -parse_register_dcl( +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* <register_file_bracket_index> ::= <register_file_bracket> <uint> + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse source register operand. + * <register_src> ::= <register_file_bracket_index> `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]' + */ +static boolean +parse_register_src( struct translate_ctx *ctx, uint *file, - int *first, - int *last ) + struct parsed_src_bracket *brackets) { - if (!parse_register_file_bracket_index( ctx, file, first )) + + brackets->ind_comp = TGSI_SWIZZLE_X; + if (!parse_register_file_bracket( ctx, file )) return FALSE; + if (!parse_register_src_bracket( ctx, brackets )) + return FALSE; + + return TRUE; +} + +struct parsed_dcl_bracket { + uint first; + uint last; +}; + +static boolean +parse_register_dcl_bracket( + struct translate_ctx *ctx, + struct parsed_dcl_bracket *bracket) +{ + uint uindex; + memset(bracket, 0, sizeof(struct parsed_dcl_bracket)); + eat_opt_white( &ctx->cur ); + + if (!parse_uint( &ctx->cur, &uindex )) { + /* it can be an empty bracket [] which means its range + * is from 0 to some implied size */ + if (ctx->cur[0] == ']' && ctx->implied_array_size != 0) { + bracket->first = 0; + bracket->last = ctx->implied_array_size - 1; + goto cleanup; + } + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + bracket->first = (int) uindex; + + eat_opt_white( &ctx->cur ); + if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { uint uindex; @@ -472,12 +566,14 @@ parse_register_dcl( report_error( ctx, "Expected literal integer" ); return FALSE; } - *last = (int) uindex; + bracket->last = (int) uindex; eat_opt_white( &ctx->cur ); } else { - *last = *first; + bracket->last = bracket->first; } + +cleanup: if (*ctx->cur != ']') { report_error( ctx, "Expected `]' or `..'" ); return FALSE; @@ -486,16 +582,70 @@ parse_register_dcl( return TRUE; } -static const char *modulate_names[TGSI_MODULATE_COUNT] = +/* Parse register declaration. + * <register_dcl> ::= <register_file_bracket_index> `]' | + * <register_file_bracket_index> `..' <index> `]' + */ +static boolean +parse_register_dcl( + struct translate_ctx *ctx, + uint *file, + struct parsed_dcl_bracket *brackets, + int *num_brackets) { - "_1X", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" -}; + const char *cur; + + *num_brackets = 0; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + if (!parse_register_dcl_bracket( ctx, &brackets[0] )) + return FALSE; + + *num_brackets = 1; + + cur = ctx->cur; + eat_opt_white( &cur ); + + if (cur[0] == '[') { + ++cur; + ctx->cur = cur; + if (!parse_register_dcl_bracket( ctx, &brackets[1] )) + return FALSE; + /* for geometry shader we don't really care about + * the first brackets it's always the size of the + * input primitive. so we want to declare just + * the index relevant to the semantics which is in + * the second bracket */ + if (ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + brackets[0] = brackets[1]; + } + *num_brackets = 2; + } + + return TRUE; +} + + +/* Parse destination register operand. + * <register_dst> ::= <register_file_bracket_index> `]' + */ +static boolean +parse_register_dst( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} static boolean parse_dst_operand( @@ -512,26 +662,13 @@ parse_dst_operand( cur = ctx->cur; eat_opt_white( &cur ); - if (*cur == '_') { - uint i; - - for (i = 0; i < TGSI_MODULATE_COUNT; i++) { - if (str_match_no_case( &cur, modulate_names[i] )) { - if (!is_digit_alpha_underscore( cur )) { - dst->DstRegisterExtModulate.Modulate = i; - ctx->cur = cur; - break; - } - } - } - } if (!parse_opt_writemask( ctx, &writemask )) return FALSE; - dst->DstRegister.File = file; - dst->DstRegister.Index = index; - dst->DstRegister.WriteMask = writemask; + dst->Register.File = file; + dst->Register.Index = index; + dst->Register.WriteMask = writemask; return TRUE; } @@ -577,162 +714,59 @@ parse_src_operand( struct translate_ctx *ctx, struct tgsi_full_src_register *src ) { - const char *cur; - float value; uint file; - int index; - uint ind_file; - int ind_index; - uint ind_comp; uint swizzle[4]; - boolean parsed_ext_negate_paren = FALSE; boolean parsed_swizzle; + struct parsed_src_bracket bracket[2]; + int parsed_opt_brackets; if (*ctx->cur == '-') { - cur = ctx->cur; - cur++; - eat_opt_white( &cur ); - if (*cur == '(') { - cur++; - src->SrcRegisterExtMod.Negate = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - parsed_ext_negate_paren = TRUE; - } - else if (*cur == '|') { - cur++; - src->SrcRegisterExtMod.Negate = 1; - src->SrcRegisterExtMod.Absolute = 1; - eat_opt_white(&cur); - ctx->cur = cur; - } - } - else if (*ctx->cur == '|') { ctx->cur++; eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Absolute = 1; + src->Register.Negate = 1; } - if (*ctx->cur == '-') { + if (*ctx->cur == '|') { ctx->cur++; eat_opt_white( &ctx->cur ); - src->SrcRegister.Negate = 1; + src->Register.Absolute = 1; } - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 2.0f) { - eat_opt_white( &cur ); - if (*cur != '*') { - report_error( ctx, "Expected `*'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Scale2X = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } - } + if (!parse_register_src(ctx, &file, &bracket[0])) + return FALSE; + if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets)) + return FALSE; - if (*ctx->cur == '(') { - ctx->cur++; - eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Bias = 1; + src->Register.File = file; + src->Register.Index = bracket[0].index; + if (bracket[0].ind_file != TGSI_FILE_NULL) { + src->Register.Indirect = 1; + src->Indirect.File = bracket[0].ind_file; + src->Indirect.Index = bracket[0].ind_index; + src->Indirect.SwizzleX = bracket[0].ind_comp; + src->Indirect.SwizzleY = bracket[0].ind_comp; + src->Indirect.SwizzleZ = bracket[0].ind_comp; + src->Indirect.SwizzleW = bracket[0].ind_comp; } - - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 1.0f) { - eat_opt_white( &cur ); - if (*cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Complement = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } - } - - if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp)) - return FALSE; - src->SrcRegister.File = file; - src->SrcRegister.Index = index; - if (ind_file != TGSI_FILE_NULL) { - src->SrcRegister.Indirect = 1; - src->SrcRegisterInd.File = ind_file; - src->SrcRegisterInd.Index = ind_index; - src->SrcRegisterInd.SwizzleX = ind_comp; - src->SrcRegisterInd.SwizzleY = ind_comp; - src->SrcRegisterInd.SwizzleZ = ind_comp; - src->SrcRegisterInd.SwizzleW = ind_comp; + if (parsed_opt_brackets) { + src->Register.Dimension = 1; + src->Dimension.Indirect = 0; + src->Dimension.Dimension = 0; + src->Dimension.Index = bracket[1].index; } /* Parse optional swizzle. */ if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) { if (parsed_swizzle) { - src->SrcRegister.SwizzleX = swizzle[0]; - src->SrcRegister.SwizzleY = swizzle[1]; - src->SrcRegister.SwizzleZ = swizzle[2]; - src->SrcRegister.SwizzleW = swizzle[3]; - } - } - - if (src->SrcRegisterExtMod.Complement) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Bias) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_float( &ctx->cur, &value )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - if (value != 0.5f) { - report_error( ctx, "Expected 0.5" ); - return FALSE; + src->Register.SwizzleX = swizzle[0]; + src->Register.SwizzleY = swizzle[1]; + src->Register.SwizzleZ = swizzle[2]; + src->Register.SwizzleW = swizzle[3]; } } - if (src->SrcRegisterExtMod.Scale2X) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Absolute) { + if (src->Register.Absolute) { eat_opt_white( &ctx->cur ); if (*ctx->cur != '|') { report_error( ctx, "Expected `|'" ); @@ -741,14 +775,6 @@ parse_src_operand( ctx->cur++; } - if (parsed_ext_negate_paren) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } return TRUE; } @@ -840,11 +866,11 @@ parse_instruction( } if (i < info->num_dst) { - if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) + if (!parse_dst_operand( ctx, &inst.Dst[i] )) return FALSE; } else if (i < info->num_dst + info->num_src) { - if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) + if (!parse_src_operand( ctx, &inst.Src[i - info->num_dst] )) return FALSE; } else { @@ -853,7 +879,8 @@ parse_instruction( for (j = 0; j < TGSI_TEXTURE_COUNT; j++) { if (str_match_no_case( &ctx->cur, texture_names[j] )) { if (!is_digit_alpha_underscore( ctx->cur )) { - inst.InstructionExtTexture.Texture = j; + inst.Instruction.Texture = 1; + inst.Texture.Texture = j; break; } } @@ -879,7 +906,8 @@ parse_instruction( report_error( ctx, "Expected a label" ); return FALSE; } - inst.InstructionExtLabel.Label = target; + inst.Instruction.Label = 1; + inst.Label.Label = target; } advance = tgsi_build_full_instruction( @@ -903,7 +931,9 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] = "PSIZE", "GENERIC", "NORMAL", - "FACE" + "FACE", + "EDGEFLAG", + "PRIM_ID" }; static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = @@ -917,8 +947,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) { struct tgsi_full_declaration decl; uint file; - int first; - int last; + struct parsed_dcl_bracket brackets[2]; + int num_brackets; uint writemask; const char *cur; uint advance; @@ -930,7 +960,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - if (!parse_register_dcl( ctx, &file, &first, &last )) + if (!parse_register_dcl( ctx, &file, brackets, &num_brackets)) return FALSE; if (!parse_opt_writemask( ctx, &writemask )) return FALSE; @@ -938,8 +968,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl = tgsi_default_full_declaration(); decl.Declaration.File = file; decl.Declaration.UsageMask = writemask; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = brackets[0].first; + decl.Range.Last = brackets[0].last; cur = ctx->cur; eat_opt_white( &cur ); @@ -970,13 +1000,13 @@ static boolean parse_declaration( struct translate_ctx *ctx ) } cur2++; - decl.Semantic.SemanticIndex = index; + decl.Semantic.Index = index; cur = cur2; } decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = i; + decl.Semantic.Name = i; ctx->cur = cur; break; @@ -1082,6 +1112,110 @@ static boolean parse_immediate( struct translate_ctx *ctx ) return TRUE; } +static const char *property_names[] = +{ + "GS_INPUT_PRIMITIVE", + "GS_OUTPUT_PRIMITIVE", + "GS_MAX_OUTPUT_VERTICES" +}; + +static const char *primitive_names[] = +{ + "POINTS", + "LINES", + "LINE_LOOP", + "LINE_STRIP", + "TRIANGLES", + "TRIANGLE_STRIP", + "TRIANGLE_FAN", + "QUADS", + "QUAD_STRIP", + "POLYGON" +}; + +static boolean +parse_primitive( const char **pcur, uint *primitive ) +{ + uint i; + + for (i = 0; i < PIPE_PRIM_MAX; i++) { + const char *cur = *pcur; + + if (str_match_no_case( &cur, primitive_names[i])) { + *primitive = i; + *pcur = cur; + return TRUE; + } + } + return FALSE; +} + + +static boolean parse_property( struct translate_ctx *ctx ) +{ + struct tgsi_full_property prop; + uint property_name; + uint values[8]; + uint advance; + char id[64]; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (!parse_identifier( &ctx->cur, id )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + for (property_name = 0; property_name < TGSI_PROPERTY_COUNT; + ++property_name) { + if (streq_nocase_uprcase(property_names[property_name], id)) { + break; + } + } + if (property_name >= TGSI_PROPERTY_COUNT) { + debug_printf( "\nError: Unknown property : '%s'", id ); + return FALSE; + } + + eat_opt_white( &ctx->cur ); + switch(property_name) { + case TGSI_PROPERTY_GS_INPUT_PRIM: + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + if (!parse_primitive(&ctx->cur, &values[0] )) { + report_error( ctx, "Unknown primitive name as property!" ); + return FALSE; + } + if (property_name == TGSI_PROPERTY_GS_INPUT_PRIM && + ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + ctx->implied_array_size = u_vertices_per_prim(values[0]); + } + break; + default: + if (!parse_uint(&ctx->cur, &values[0] )) { + report_error( ctx, "Expected unsigned integer as property!" ); + return FALSE; + } + } + + prop = tgsi_default_full_property(); + prop.Property.PropertyName = property_name; + prop.Property.NrTokens += 1; + prop.u[0].Data = values[0]; + + advance = tgsi_build_full_property( + &prop, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + + static boolean translate( struct translate_ctx *ctx ) { eat_opt_white( &ctx->cur ); @@ -1090,7 +1224,6 @@ static boolean translate( struct translate_ctx *ctx ) while (*ctx->cur != '\0') { uint label_val = 0; - if (!eat_white( &ctx->cur )) { report_error( ctx, "Syntax error" ); return FALSE; @@ -1098,7 +1231,6 @@ static boolean translate( struct translate_ctx *ctx ) if (*ctx->cur == '\0') break; - if (parse_label( ctx, &label_val )) { if (!parse_instruction( ctx, TRUE )) return FALSE; @@ -1111,6 +1243,10 @@ static boolean translate( struct translate_ctx *ctx ) if (!parse_immediate( ctx )) return FALSE; } + else if (str_match_no_case( &ctx->cur, "PROPERTY" )) { + if (!parse_property( ctx )) + return FALSE; + } else if (!parse_instruction( ctx, FALSE )) { return FALSE; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index bc9c18fd4a7..ae875f29abf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -79,6 +79,19 @@ emit_immediate(struct tgsi_transform_context *ctx, } +static void +emit_property(struct tgsi_transform_context *ctx, + const struct tgsi_full_property *prop) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_property(prop, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + /** * Apply user-defined transformations to the input shader to produce @@ -110,6 +123,7 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, ctx->emit_instruction = emit_instruction; ctx->emit_declaration = emit_declaration; ctx->emit_immediate = emit_immediate; + ctx->emit_property = emit_property; ctx->tokens_out = tokens_out; ctx->max_tokens_out = max_tokens_out; @@ -130,15 +144,13 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, /** ** Setup output shader **/ - *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version(); - - ctx->header = (struct tgsi_header *) (tokens_out + 1); + ctx->header = (struct tgsi_header *)tokens_out; *ctx->header = tgsi_build_header(); - processor = (struct tgsi_processor *) (tokens_out + 2); + processor = (struct tgsi_processor *) (tokens_out + 1); *processor = tgsi_build_processor( procType, ctx->header ); - ctx->ti = 3; + ctx->ti = 2; /** @@ -184,6 +196,17 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, ctx->emit_immediate(ctx, fullimm); } break; + case TGSI_TOKEN_TYPE_PROPERTY: + { + struct tgsi_full_property *fullprop + = &parse.FullToken.FullProperty; + + if (ctx->transform_property) + ctx->transform_property(ctx, fullprop); + else + ctx->emit_property(ctx, fullprop); + } + break; default: assert( 0 ); @@ -215,7 +238,7 @@ tgsi_transform_foo( struct tgsi_token *tokens_out, uint max_tokens_out ) { const char *text = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], COLOR, CONSTANT\n" "DCL OUT[0], COLOR\n" " 0: MOV OUT[0], IN[0]\n" diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h index a121adbaef4..818478e277a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -53,6 +53,8 @@ struct tgsi_transform_context void (*transform_immediate)(struct tgsi_transform_context *ctx, struct tgsi_full_immediate *imm); + void (*transform_property)(struct tgsi_transform_context *ctx, + struct tgsi_full_property *prop); /** * Called at end of input program to allow caller to append extra @@ -73,6 +75,8 @@ struct tgsi_transform_context const struct tgsi_full_declaration *decl); void (*emit_immediate)(struct tgsi_transform_context *ctx, const struct tgsi_full_immediate *imm); + void (*emit_property)(struct tgsi_transform_context *ctx, + const struct tgsi_full_property *prop); struct tgsi_header *header; uint max_tokens_out; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index bf39cf54094..e64e2b731df 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -37,7 +37,6 @@ #include "util/u_math.h" union tgsi_any_token { - struct tgsi_version version; struct tgsi_header header; struct tgsi_processor processor; struct tgsi_token token; @@ -47,14 +46,12 @@ union tgsi_any_token { struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; - struct tgsi_instruction_ext_label insn_ext_label; - struct tgsi_instruction_ext_texture insn_ext_texture; - struct tgsi_instruction_ext_predicate insn_ext_predicate; + struct tgsi_instruction_predicate insn_predicate; + struct tgsi_instruction_label insn_label; + struct tgsi_instruction_texture insn_texture; struct tgsi_src_register src; - struct tgsi_src_register_ext_mod src_ext_mod; struct tgsi_dimension dim; struct tgsi_dst_register dst; - struct tgsi_dst_register_ext_modulate dst_ext_mod; unsigned value; }; @@ -72,6 +69,7 @@ struct ureg_tokens { #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 #define UREG_MAX_ADDR 2 +#define UREG_MAX_LOOP 1 #define UREG_MAX_PRED 1 #define DOMAIN_DECL 0 @@ -92,14 +90,24 @@ struct ureg_program unsigned vs_inputs[UREG_MAX_INPUT/32]; struct { + unsigned index; + } gs_input[UREG_MAX_INPUT]; + unsigned nr_gs_inputs; + + struct { unsigned semantic_name; unsigned semantic_index; } output[UREG_MAX_OUTPUT]; unsigned nr_outputs; struct { - float v[4]; + union { + float f[4]; + unsigned u[4]; + int i[4]; + } value; unsigned nr; + unsigned type; } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; @@ -117,6 +125,7 @@ struct ureg_program unsigned nr_addrs; unsigned nr_preds; + unsigned nr_loops; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -279,6 +288,22 @@ ureg_DECL_vs_input( struct ureg_program *ureg, } +struct ureg_src +ureg_DECL_gs_input(struct ureg_program *ureg, + unsigned index) +{ + if (ureg->nr_gs_inputs < UREG_MAX_INPUT) { + ureg->gs_input[ureg->nr_gs_inputs].index = index; + ureg->nr_gs_inputs++; + } else { + set_bad(ureg); + } + + /* XXX: Add suport for true 2D input registers. */ + return ureg_src_register(TGSI_FILE_INPUT, index); +} + + struct ureg_dst ureg_DECL_output( struct ureg_program *ureg, unsigned name, @@ -350,6 +375,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, i = ureg->nr_constant_ranges++; ureg->constant_range[i].first = index; ureg->constant_range[i].last = index; + goto out; } /* Collapse all ranges down to one: @@ -417,6 +443,19 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); } +/* Allocate a new loop register. + */ +struct ureg_dst +ureg_DECL_loop(struct ureg_program *ureg) +{ + if (ureg->nr_loops < UREG_MAX_LOOP) { + return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++); + } + + assert(0); + return ureg_dst_register(TGSI_FILE_LOOP, 0); +} + /* Allocate a new predicate register. */ struct ureg_dst @@ -452,22 +491,22 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, } - - -static int match_or_expand_immediate( const float *v, - unsigned nr, - float *v2, - unsigned *nr2, - unsigned *swizzle ) +static int +match_or_expand_immediate( const unsigned *v, + unsigned nr, + unsigned *v2, + unsigned *pnr2, + unsigned *swizzle ) { + unsigned nr2 = *pnr2; unsigned i, j; - + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; - for (j = 0; j < *nr2 && !found; j++) { + for (j = 0; j < nr2 && !found; j++) { if (v[i] == v2[j]) { *swizzle |= j << (i * 2); found = TRUE; @@ -475,24 +514,28 @@ static int match_or_expand_immediate( const float *v, } if (!found) { - if (*nr2 >= 4) + if (nr2 >= 4) { return FALSE; + } - v2[*nr2] = v[i]; - *swizzle |= *nr2 << (i * 2); - (*nr2)++; + v2[nr2] = v[i]; + *swizzle |= nr2 << (i * 2); + nr2++; } } + /* Actually expand immediate only when fully succeeded. + */ + *pnr2 = nr2; return TRUE; } - - -struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, - const float *v, - unsigned nr ) +static struct ureg_src +decl_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned nr, + unsigned type ) { unsigned i, j; unsigned swizzle; @@ -502,38 +545,82 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, */ for (i = 0; i < ureg->nr_immediates; i++) { - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + if (ureg->immediate[i].type != type) { + continue; + } + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { i = ureg->nr_immediates++; - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + ureg->immediate[i].type = type; + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } - set_bad( ureg ); + set_bad(ureg); out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - for (j = nr; j < 4; j++) + for (j = nr; j < 4; j++) { swizzle |= (swizzle & 0x3) << (j * 2); + } + + return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), + (swizzle >> 0) & 0x3, + (swizzle >> 2) & 0x3, + (swizzle >> 4) & 0x3, + (swizzle >> 6) & 0x3); +} + + +struct ureg_src +ureg_DECL_immediate( struct ureg_program *ureg, + const float *v, + unsigned nr ) +{ + union { + float f[4]; + unsigned u[4]; + } fu; + unsigned int i; + + for (i = 0; i < nr; i++) { + fu.f[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); +} - return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), - (swizzle >> 0) & 0x3, - (swizzle >> 2) & 0x3, - (swizzle >> 4) & 0x3, - (swizzle >> 6) & 0x3); + +struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); +} + + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *ureg, + const int *v, + unsigned nr ) +{ + return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } @@ -541,9 +628,7 @@ void ureg_emit_src( struct ureg_program *ureg, struct ureg_src src ) { - unsigned size = (1 + - (src.Absolute ? 1 : 0) + - (src.Indirect ? 1 : 0)); + unsigned size = 1 + (src.Indirect ? 1 : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; @@ -560,17 +645,8 @@ ureg_emit_src( struct ureg_program *ureg, out[n].src.SwizzleW = src.SwizzleW; out[n].src.Index = src.Index; out[n].src.Negate = src.Negate; + out[0].src.Absolute = src.Absolute; n++; - - if (src.Absolute) { - out[0].src.Extended = 1; - out[0].src.Negate = 0; - out[n].value = 0; - out[n].src_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; - out[n].src_ext_mod.Absolute = 1; - out[n].src_ext_mod.Negate = src.Negate; - n++; - } if (src.Indirect) { out[0].src.Indirect = 1; @@ -661,35 +737,27 @@ ureg_emit_insn(struct ureg_program *ureg, validate( opcode, num_dst, num_src ); out = get_tokens( ureg, DOMAIN_INSN, count ); - out[0].value = 0; - out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - out[0].insn.NrTokens = 0; + out[0].insn = tgsi_default_instruction(); out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; - out[0].insn.Padding = 0; result.insn_token = ureg->domain[DOMAIN_INSN].count - count; + result.extended_token = result.insn_token; if (predicate) { - out[0].insn.Extended = 1; - out[1].insn_ext_predicate = tgsi_default_instruction_ext_predicate(); - out[1].insn_ext_predicate.Negate = pred_negate; - out[1].insn_ext_predicate.SwizzleX = pred_swizzle_x; - out[1].insn_ext_predicate.SwizzleY = pred_swizzle_y; - out[1].insn_ext_predicate.SwizzleZ = pred_swizzle_z; - out[1].insn_ext_predicate.SwizzleW = pred_swizzle_w; - - result.extended_token = result.insn_token + 1; - } else { - out[0].insn.Extended = 0; - - result.extended_token = result.insn_token; + out[0].insn.Predicate = 1; + out[1].insn_predicate = tgsi_default_instruction_predicate(); + out[1].insn_predicate.Negate = pred_negate; + out[1].insn_predicate.SwizzleX = pred_swizzle_x; + out[1].insn_predicate.SwizzleY = pred_swizzle_y; + out[1].insn_predicate.SwizzleZ = pred_swizzle_z; + out[1].insn_predicate.SwizzleW = pred_swizzle_w; } ureg->nr_instructions++; - + return result; } @@ -705,13 +773,11 @@ ureg_emit_label(struct ureg_program *ureg, return; out = get_tokens( ureg, DOMAIN_INSN, 1 ); - insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); + out[0].value = 0; - insn->token.Extended = 1; + insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); + insn->insn.Label = 1; - out[0].value = 0; - out[0].insn_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; - *label_token = ureg->domain[DOMAIN_INSN].count - 1; } @@ -734,8 +800,7 @@ ureg_fixup_label(struct ureg_program *ureg, { union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token ); - assert(out->insn_ext_label.Type == TGSI_INSTRUCTION_EXT_TYPE_LABEL); - out->insn_ext_label.Label = instruction_number; + out->insn_label.Label = instruction_number; } @@ -749,11 +814,10 @@ ureg_emit_texture(struct ureg_program *ureg, out = get_tokens( ureg, DOMAIN_INSN, 1 ); insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); - insn->token.Extended = 1; + insn->insn.Texture = 1; out[0].value = 0; - out[0].insn_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; - out[0].insn_ext_texture.Texture = target; + out[0].insn_texture.Texture = target; } @@ -918,8 +982,8 @@ static void emit_decl( struct ureg_program *ureg, out[1].decl_range.Last = index; out[2].value = 0; - out[2].decl_semantic.SemanticName = semantic_name; - out[2].decl_semantic.SemanticIndex = semantic_index; + out[2].decl_semantic.Name = semantic_name; + out[2].decl_semantic.Index = semantic_index; } @@ -944,22 +1008,23 @@ static void emit_decl_range( struct ureg_program *ureg, out[1].decl_range.Last = first + count - 1; } -static void emit_immediate( struct ureg_program *ureg, - const float *v ) +static void +emit_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned type ) { union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); out[0].value = 0; out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; out[0].imm.NrTokens = 5; - out[0].imm.DataType = TGSI_IMM_FLOAT32; + out[0].imm.DataType = type; out[0].imm.Padding = 0; - out[0].imm.Extended = 0; - out[1].imm_data.Float = v[0]; - out[2].imm_data.Float = v[1]; - out[3].imm_data.Float = v[2]; - out[4].imm_data.Float = v[3]; + out[1].imm_data.Uint = v[0]; + out[2].imm_data.Uint = v[1]; + out[3].imm_data.Uint = v[2]; + out[4].imm_data.Uint = v[3]; } @@ -975,8 +1040,7 @@ static void emit_decls( struct ureg_program *ureg ) emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); } } - } - else { + } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { for (i = 0; i < ureg->nr_fs_inputs; i++) { emit_decl( ureg, TGSI_FILE_INPUT, @@ -985,6 +1049,13 @@ static void emit_decls( struct ureg_program *ureg ) ureg->fs_input[i].semantic_index, ureg->fs_input[i].interp ); } + } else { + for (i = 0; i < ureg->nr_gs_inputs; i++) { + emit_decl_range(ureg, + TGSI_FILE_INPUT, + ureg->gs_input[i].index, + 1); + } } for (i = 0; i < ureg->nr_outputs; i++) { @@ -1023,6 +1094,13 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_addrs ); } + if (ureg->nr_loops) { + emit_decl_range(ureg, + TGSI_FILE_LOOP, + 0, + ureg->nr_loops); + } + if (ureg->nr_preds) { emit_decl_range(ureg, TGSI_FILE_PREDICATE, @@ -1032,7 +1110,8 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, - ureg->immediate[i].v ); + ureg->immediate[i].value.u, + ureg->immediate[i].type ); } } @@ -1055,26 +1134,22 @@ static void copy_instructions( struct ureg_program *ureg ) static void fixup_header_size(struct ureg_program *ureg) { - union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 ); + union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 ); - out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 3; + out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2; } static void emit_header( struct ureg_program *ureg ) { - union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); - - out[0].version.MajorVersion = 1; - out[0].version.MinorVersion = 1; - out[0].version.Padding = 0; + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); - out[1].header.HeaderSize = 2; - out[1].header.BodySize = 0; + out[0].header.HeaderSize = 2; + out[0].header.BodySize = 0; - out[2].processor.Processor = ureg->processor; - out[2].processor.Padding = 0; + out[1].processor.Processor = ureg->processor; + out[1].processor.Padding = 0; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index dae42911947..6f11273320a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -133,6 +133,10 @@ struct ureg_src ureg_DECL_vs_input( struct ureg_program *, unsigned index ); +struct ureg_src +ureg_DECL_gs_input(struct ureg_program *, + unsigned index); + struct ureg_dst ureg_DECL_output( struct ureg_program *, unsigned semantic_name, @@ -144,6 +148,16 @@ ureg_DECL_immediate( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *, + const int *v, + unsigned nr ); + +struct ureg_src ureg_DECL_constant( struct ureg_program *, unsigned index ); @@ -158,6 +172,9 @@ struct ureg_dst ureg_DECL_address( struct ureg_program * ); struct ureg_dst +ureg_DECL_loop( struct ureg_program * ); + +struct ureg_dst ureg_DECL_predicate(struct ureg_program *); /* Supply an index to the sampler declaration as this is the hook to @@ -214,6 +231,90 @@ ureg_imm1f( struct ureg_program *ureg, return ureg_DECL_immediate( ureg, v, 1 ); } +static INLINE struct ureg_src +ureg_imm4u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c, unsigned d) +{ + unsigned v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_uint( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c) +{ + unsigned v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_uint( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2u( struct ureg_program *ureg, + unsigned a, unsigned b) +{ + unsigned v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_uint( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1u( struct ureg_program *ureg, + unsigned a) +{ + return ureg_DECL_immediate_uint( ureg, &a, 1 ); +} + +static INLINE struct ureg_src +ureg_imm4i( struct ureg_program *ureg, + int a, int b, + int c, int d) +{ + int v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_int( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3i( struct ureg_program *ureg, + int a, int b, + int c) +{ + int v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_int( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2i( struct ureg_program *ureg, + int a, int b) +{ + int v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_int( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1i( struct ureg_program *ureg, + int a) +{ + return ureg_DECL_immediate_int( ureg, &a, 1 ); +} + /*********************************************************************** * Functions for patching up labels */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 4dee1be9e8c..f4ca9e21ed9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -76,7 +76,7 @@ tgsi_util_get_full_src_register_swizzle( unsigned component ) { return tgsi_util_get_src_register_swizzle( - ®->SrcRegister, + ®->Register, component ); } @@ -111,10 +111,10 @@ tgsi_util_get_full_src_register_sign_mode( { unsigned sign_mode; - if( reg->SrcRegisterExtMod.Absolute ) { + if( reg->Register.Absolute ) { /* Consider only the post-abs negation. */ - if( reg->SrcRegisterExtMod.Negate ) { + if( reg->Register.Negate ) { sign_mode = TGSI_UTIL_SIGN_SET; } else { @@ -122,17 +122,7 @@ tgsi_util_get_full_src_register_sign_mode( } } else { - /* Accumulate the three negations. */ - - unsigned negate; - - negate = reg->SrcRegister.Negate; - - if( reg->SrcRegisterExtMod.Negate ) { - negate = !negate; - } - - if( negate ) { + if( reg->Register.Negate ) { sign_mode = TGSI_UTIL_SIGN_TOGGLE; } else { @@ -151,27 +141,23 @@ tgsi_util_set_full_src_register_sign_mode( switch (sign_mode) { case TGSI_UTIL_SIGN_CLEAR: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 0; + reg->Register.Negate = 0; + reg->Register.Absolute = 1; break; case TGSI_UTIL_SIGN_SET: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 1; + reg->Register.Absolute = 1; + reg->Register.Negate = 1; break; case TGSI_UTIL_SIGN_TOGGLE: - reg->SrcRegister.Negate = 1; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; + reg->Register.Negate = 1; + reg->Register.Absolute = 0; break; case TGSI_UTIL_SIGN_KEEP: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; + reg->Register.Negate = 0; + reg->Register.Absolute = 0; break; default: diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile deleted file mode 100644 index 3c82f8ae037..00000000000 --- a/src/gallium/auxiliary/translate/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = translate - -C_SOURCES = \ - translate_generic.c \ - translate_sse.c \ - translate.c \ - translate_cache.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript deleted file mode 100644 index 9553a675372..00000000000 --- a/src/gallium/auxiliary/translate/SConscript +++ /dev/null @@ -1,12 +0,0 @@ -Import('*') - -translate = env.ConvenienceLibrary( - target = 'translate', - source = [ - 'translate_generic.c', - 'translate_sse.c', - 'translate.c', - 'translate_cache.c', - ]) - -auxiliaries.insert(0, translate) diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile deleted file mode 100644 index 1d8bb55bbd6..00000000000 --- a/src/gallium/auxiliary/util/Makefile +++ /dev/null @@ -1,45 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = util - -C_SOURCES = \ - u_debug.c \ - u_debug_dump.c \ - u_debug_symbol.c \ - u_debug_stack.c \ - u_blit.c \ - u_cache.c \ - u_cpu_detect.c \ - u_draw_quad.c \ - u_format.c \ - u_format_access.c \ - u_format_table.c \ - u_gen_mipmap.c \ - u_handle_table.c \ - u_hash_table.c \ - u_hash.c \ - u_keymap.c \ - u_linear.c \ - u_network.c \ - u_math.c \ - u_mm.c \ - u_rect.c \ - u_simple_shaders.c \ - u_snprintf.c \ - u_stream_stdc.c \ - u_stream_wd.c \ - u_surface.c \ - u_tile.c \ - u_time.c \ - u_timed_winsys.c \ - u_upload_mgr.c \ - u_simple_screen.c - -include ../../Makefile.template - -u_format_table.c: u_format_table.py u_format_parse.py u_format.csv - python u_format_table.py u_format.csv > $@ - -u_format_access.c: u_format_access.py u_format_parse.py u_format.csv - python u_format_access.py u_format.csv > $@ diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript deleted file mode 100644 index 8d99106d0b8..00000000000 --- a/src/gallium/auxiliary/util/SConscript +++ /dev/null @@ -1,58 +0,0 @@ -Import('*') - -env.Clone() - -env.Append(CPPPATH = ['.']) - -env.CodeGenerate( - target = 'u_format_table.c', - script = 'u_format_table.py', - source = ['u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' -) - -env.CodeGenerate( - target = 'u_format_access.c', - script = 'u_format_access.py', - source = ['u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' -) - -util = env.ConvenienceLibrary( - target = 'util', - source = [ - 'u_bitmask.c', - 'u_blit.c', - 'u_cache.c', - 'u_cpu_detect.c', - 'u_debug.c', - 'u_debug_dump.c', - 'u_debug_memory.c', - 'u_debug_stack.c', - 'u_debug_symbol.c', - 'u_draw_quad.c', - 'u_format.c', - 'u_format_access.c', - 'u_format_table.c', - 'u_gen_mipmap.c', - 'u_handle_table.c', - 'u_hash.c', - 'u_hash_table.c', - 'u_keymap.c', - 'u_network.c', - 'u_math.c', - 'u_mm.c', - 'u_rect.c', - 'u_simple_shaders.c', - 'u_snprintf.c', - 'u_stream_stdc.c', - 'u_stream_wd.c', - 'u_surface.c', - 'u_tile.c', - 'u_time.c', - 'u_timed_winsys.c', - 'u_upload_mgr.c', - 'u_simple_screen.c', - ]) - -auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 50386425995..3f74e2aa8b8 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -42,6 +42,7 @@ #include "util/u_blit.h" #include "util/u_draw_quad.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_simple_shaders.h" @@ -126,7 +127,8 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) } /* fragment shader */ - ctx->fs[TGSI_WRITEMASK_XYZW] = util_make_fragment_tex_shader(pipe); + ctx->fs[TGSI_WRITEMASK_XYZW] = + util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D); ctx->vbuf = NULL; /* init vertex data that doesn't change */ @@ -354,10 +356,9 @@ util_blit_pixels_writemask(struct blit_state *ctx, texTemp.target = PIPE_TEXTURE_2D; texTemp.format = src->format; texTemp.last_level = 0; - texTemp.width[0] = srcW; - texTemp.height[0] = srcH; - texTemp.depth[0] = 1; - pf_get_block(src->format, &texTemp.block); + texTemp.width0 = srcW; + texTemp.height0 = srcH; + texTemp.depth0 = 1; tex = screen->texture_create(screen, &texTemp); if (!tex) @@ -389,10 +390,10 @@ util_blit_pixels_writemask(struct blit_state *ctx, } else { pipe_texture_reference(&tex, src->texture); - s0 = srcX0 / (float)tex->width[0]; - s1 = srcX1 / (float)tex->width[0]; - t0 = srcY0 / (float)tex->height[0]; - t1 = srcY1 / (float)tex->height[0]; + s0 = srcX0 / (float)tex->width0; + s1 = srcX1 / (float)tex->width0; + t0 = srcY0 / (float)tex->height0; + t1 = srcY1 / (float)tex->height0; } @@ -421,7 +422,9 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_set_sampler_textures(ctx->cso, 1, &tex); if (ctx->fs[writemask] == NULL) - ctx->fs[writemask] = util_make_fragment_tex_shader_writemask(pipe, writemask); + ctx->fs[writemask] = + util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D, + writemask); /* shaders */ cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]); @@ -518,13 +521,13 @@ util_blit_pixels_tex(struct blit_state *ctx, assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); - assert(tex->width[0] != 0); - assert(tex->height[0] != 0); + assert(tex->width0 != 0); + assert(tex->height0 != 0); - s0 = srcX0 / (float)tex->width[0]; - s1 = srcX1 / (float)tex->width[0]; - t0 = srcY0 / (float)tex->height[0]; - t1 = srcY1 / (float)tex->height[0]; + s0 = srcX0 / (float)tex->width0; + s1 = srcX1 / (float)tex->width0; + t0 = srcY0 / (float)tex->height0; + t1 = srcY1 / (float)tex->height0; assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format, PIPE_TEXTURE_2D, diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c new file mode 100644 index 00000000000..1f794d39a17 --- /dev/null +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -0,0 +1,719 @@ +/************************************************************************** + * + * Copyright 2009 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Blitter utility to facilitate acceleration of the clear, surface_copy, + * and surface_fill functions. + * + * @author Marek Olšák + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" + +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_blitter.h" +#include "util/u_draw_quad.h" +#include "util/u_pack_color.h" +#include "util/u_rect.h" +#include "util/u_simple_shaders.h" +#include "util/u_texture.h" + +struct blitter_context_priv +{ + struct blitter_context blitter; + + struct pipe_context *pipe; /**< pipe context */ + struct pipe_buffer *vbuf; /**< quad */ + + float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */ + + /* Templates for various state objects. */ + struct pipe_depth_stencil_alpha_state template_dsa; + struct pipe_sampler_state template_sampler_state; + + /* Constant state objects. */ + /* Vertex shaders. */ + void *vs_col; /**< Vertex shader which passes {pos, color} to the output */ + void *vs_tex; /**<Vertex shader which passes {pos, texcoord} to the output.*/ + + /* Fragment shaders. */ + /* FS which outputs a color to multiple color buffers. */ + void *fs_col[PIPE_MAX_COLOR_BUFS]; + + /* FS which outputs a color from a texture, + where the index is PIPE_TEXTURE_* to be sampled. */ + void *fs_texfetch_col[PIPE_MAX_TEXTURE_TYPES]; + + /* FS which outputs a depth from a texture, + where the index is PIPE_TEXTURE_* to be sampled. */ + void *fs_texfetch_depth[PIPE_MAX_TEXTURE_TYPES]; + + /* Blend state. */ + void *blend_write_color; /**< blend state with writemask of RGBA */ + void *blend_keep_color; /**< blend state with writemask of 0 */ + + /* Depth stencil alpha state. */ + void *dsa_write_depth_stencil[0xff]; /**< indices are stencil clear values */ + void *dsa_write_depth_keep_stencil; + void *dsa_keep_depth_stencil; + + /* Sampler state for clamping to a miplevel. */ + void *sampler_state[PIPE_MAX_TEXTURE_LEVELS]; + + /* Rasterizer state. */ + void *rs_state; +}; + +struct blitter_context *util_blitter_create(struct pipe_context *pipe) +{ + struct blitter_context_priv *ctx; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state *dsa; + struct pipe_rasterizer_state rs_state; + struct pipe_sampler_state *sampler_state; + unsigned i; + + ctx = CALLOC_STRUCT(blitter_context_priv); + if (!ctx) + return NULL; + + ctx->pipe = pipe; + + /* init state objects for them to be considered invalid */ + ctx->blitter.saved_fb_state.nr_cbufs = ~0; + ctx->blitter.saved_num_textures = ~0; + ctx->blitter.saved_num_sampler_states = ~0; + + /* blend state objects */ + memset(&blend, 0, sizeof(blend)); + ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend); + + blend.colormask = PIPE_MASK_RGBA; + ctx->blend_write_color = pipe->create_blend_state(pipe, &blend); + + /* depth stencil alpha state objects */ + dsa = &ctx->template_dsa; + ctx->dsa_keep_depth_stencil = + pipe->create_depth_stencil_alpha_state(pipe, dsa); + + dsa->depth.enabled = 1; + dsa->depth.writemask = 1; + dsa->depth.func = PIPE_FUNC_ALWAYS; + ctx->dsa_write_depth_keep_stencil = + pipe->create_depth_stencil_alpha_state(pipe, dsa); + + dsa->stencil[0].enabled = 1; + dsa->stencil[0].func = PIPE_FUNC_ALWAYS; + dsa->stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; + dsa->stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + dsa->stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; + dsa->stencil[0].valuemask = 0xff; + dsa->stencil[0].writemask = 0xff; + /* The DSA state objects which write depth and stencil are created + * on-demand. */ + + /* sampler state */ + sampler_state = &ctx->template_sampler_state; + sampler_state->wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler_state->wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler_state->wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + /* The sampler state objects which sample from a specified mipmap level + * are created on-demand. */ + + /* rasterizer state */ + memset(&rs_state, 0, sizeof(rs_state)); + rs_state.front_winding = PIPE_WINDING_CW; + rs_state.cull_mode = PIPE_WINDING_NONE; + rs_state.bypass_vs_clip_and_viewport = 1; + rs_state.gl_rasterization_rules = 1; + ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state); + + /* fragment shaders are created on-demand */ + + /* vertex shaders */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR }; + const uint semantic_indices[] = { 0, 0 }; + ctx->vs_col = + util_make_vertex_passthrough_shader(pipe, 2, semantic_names, + semantic_indices); + } + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indices[] = { 0, 0 }; + ctx->vs_tex = + util_make_vertex_passthrough_shader(pipe, 2, semantic_names, + semantic_indices); + } + + /* set invariant vertex coordinates */ + for (i = 0; i < 4; i++) + ctx->vertices[i][0][3] = 1; /*v.w*/ + + /* create the vertex buffer */ + ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, + 32, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(ctx->vertices)); + + return &ctx->blitter; +} + +void util_blitter_destroy(struct blitter_context *blitter) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + int i; + + pipe->delete_blend_state(pipe, ctx->blend_write_color); + pipe->delete_blend_state(pipe, ctx->blend_keep_color); + pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); + pipe->delete_depth_stencil_alpha_state(pipe, + ctx->dsa_write_depth_keep_stencil); + + for (i = 0; i < 0xff; i++) + if (ctx->dsa_write_depth_stencil[i]) + pipe->delete_depth_stencil_alpha_state(pipe, + ctx->dsa_write_depth_stencil[i]); + + pipe->delete_rasterizer_state(pipe, ctx->rs_state); + pipe->delete_vs_state(pipe, ctx->vs_col); + pipe->delete_vs_state(pipe, ctx->vs_tex); + + for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) { + if (ctx->fs_texfetch_col[i]) + pipe->delete_fs_state(pipe, ctx->fs_texfetch_col[i]); + if (ctx->fs_texfetch_depth[i]) + pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]); + } + + for (i = 0; i < PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++) + if (ctx->fs_col[i]) + pipe->delete_fs_state(pipe, ctx->fs_col[i]); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (ctx->sampler_state[i]) + pipe->delete_sampler_state(pipe, ctx->sampler_state[i]); + + pipe_buffer_reference(&ctx->vbuf, NULL); + FREE(ctx); +} + +static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) +{ + /* make sure these CSOs have been saved */ + assert(ctx->blitter.saved_blend_state && + ctx->blitter.saved_dsa_state && + ctx->blitter.saved_rs_state && + ctx->blitter.saved_fs && + ctx->blitter.saved_vs); +} + +static void blitter_restore_CSOs(struct blitter_context_priv *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + /* restore the state objects which are always required to be saved */ + pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->blitter.saved_dsa_state); + pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state); + pipe->bind_fs_state(pipe, ctx->blitter.saved_fs); + pipe->bind_vs_state(pipe, ctx->blitter.saved_vs); + + ctx->blitter.saved_blend_state = 0; + ctx->blitter.saved_dsa_state = 0; + ctx->blitter.saved_rs_state = 0; + ctx->blitter.saved_fs = 0; + ctx->blitter.saved_vs = 0; + + /* restore the state objects which are required to be saved before copy/fill + */ + if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) { + pipe->set_framebuffer_state(pipe, &ctx->blitter.saved_fb_state); + ctx->blitter.saved_fb_state.nr_cbufs = ~0; + } + + if (ctx->blitter.saved_num_sampler_states != ~0) { + pipe->bind_fragment_sampler_states(pipe, + ctx->blitter.saved_num_sampler_states, + ctx->blitter.saved_sampler_states); + ctx->blitter.saved_num_sampler_states = ~0; + } + + if (ctx->blitter.saved_num_textures != ~0) { + pipe->set_fragment_sampler_textures(pipe, + ctx->blitter.saved_num_textures, + ctx->blitter.saved_textures); + ctx->blitter.saved_num_textures = ~0; + } +} + +static void blitter_set_rectangle(struct blitter_context_priv *ctx, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float depth) +{ + int i; + + /* set vertex positions */ + ctx->vertices[0][0][0] = x1; /*v0.x*/ + ctx->vertices[0][0][1] = y1; /*v0.y*/ + + ctx->vertices[1][0][0] = x2; /*v1.x*/ + ctx->vertices[1][0][1] = y1; /*v1.y*/ + + ctx->vertices[2][0][0] = x2; /*v2.x*/ + ctx->vertices[2][0][1] = y2; /*v2.y*/ + + ctx->vertices[3][0][0] = x1; /*v3.x*/ + ctx->vertices[3][0][1] = y2; /*v3.y*/ + + for (i = 0; i < 4; i++) + ctx->vertices[i][0][2] = depth; /*z*/ +} + +static void blitter_set_clear_color(struct blitter_context_priv *ctx, + const float *rgba) +{ + int i; + + for (i = 0; i < 4; i++) { + ctx->vertices[i][1][0] = rgba[0]; + ctx->vertices[i][1][1] = rgba[1]; + ctx->vertices[i][1][2] = rgba[2]; + ctx->vertices[i][1][3] = rgba[3]; + } +} + +static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx, + struct pipe_surface *surf, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2) +{ + int i; + float s1 = x1 / (float)surf->width; + float t1 = y1 / (float)surf->height; + float s2 = x2 / (float)surf->width; + float t2 = y2 / (float)surf->height; + + ctx->vertices[0][1][0] = s1; /*t0.s*/ + ctx->vertices[0][1][1] = t1; /*t0.t*/ + + ctx->vertices[1][1][0] = s2; /*t1.s*/ + ctx->vertices[1][1][1] = t1; /*t1.t*/ + + ctx->vertices[2][1][0] = s2; /*t2.s*/ + ctx->vertices[2][1][1] = t2; /*t2.t*/ + + ctx->vertices[3][1][0] = s1; /*t3.s*/ + ctx->vertices[3][1][1] = t2; /*t3.t*/ + + for (i = 0; i < 4; i++) { + ctx->vertices[i][1][2] = 0; /*r*/ + ctx->vertices[i][1][3] = 1; /*q*/ + } +} + +static void blitter_set_texcoords_3d(struct blitter_context_priv *ctx, + struct pipe_surface *surf, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2) +{ + int i; + float depth = u_minify(surf->texture->depth0, surf->level); + float r = surf->zslice / depth; + + blitter_set_texcoords_2d(ctx, surf, x1, y1, x2, y2); + + for (i = 0; i < 4; i++) + ctx->vertices[i][1][2] = r; /*r*/ +} + +static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, + struct pipe_surface *surf, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2) +{ + int i; + float s1 = x1 / (float)surf->width; + float t1 = y1 / (float)surf->height; + float s2 = x2 / (float)surf->width; + float t2 = y2 / (float)surf->height; + const float st[4][2] = { + {s1, t1}, {s2, t1}, {s2, t2}, {s1, t2} + }; + + util_map_texcoords2d_onto_cubemap(surf->face, + /* pointer, stride in floats */ + &st[0][0], 2, + &ctx->vertices[0][1][0], 8); + + for (i = 0; i < 4; i++) + ctx->vertices[i][1][3] = 1; /*q*/ +} + +static void blitter_draw_quad(struct blitter_context_priv *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + /* write vertices and draw them */ + pipe_buffer_write(pipe->screen, ctx->vbuf, + 0, sizeof(ctx->vertices), ctx->vertices); + + util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ +} + +static INLINE +void *blitter_get_state_write_depth_stencil( + struct blitter_context_priv *ctx, + unsigned stencil) +{ + struct pipe_context *pipe = ctx->pipe; + + stencil &= 0xff; + + /* Create the DSA state on-demand. */ + if (!ctx->dsa_write_depth_stencil[stencil]) { + ctx->template_dsa.stencil[0].ref_value = stencil; + + ctx->dsa_write_depth_stencil[stencil] = + pipe->create_depth_stencil_alpha_state(pipe, &ctx->template_dsa); + } + + return ctx->dsa_write_depth_stencil[stencil]; +} + +static INLINE +void **blitter_get_sampler_state(struct blitter_context_priv *ctx, + int miplevel) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state; + + assert(miplevel < PIPE_MAX_TEXTURE_LEVELS); + + /* Create the sampler state on-demand. */ + if (!ctx->sampler_state[miplevel]) { + sampler_state->lod_bias = miplevel; + sampler_state->min_lod = miplevel; + sampler_state->max_lod = miplevel; + + ctx->sampler_state[miplevel] = pipe->create_sampler_state(pipe, + sampler_state); + } + + /* Return void** so that it can be passed to bind_fragment_sampler_states + * directly. */ + return &ctx->sampler_state[miplevel]; +} + +static INLINE +void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs) +{ + struct pipe_context *pipe = ctx->pipe; + unsigned index = num_cbufs ? num_cbufs - 1 : 0; + + assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); + + if (!ctx->fs_col[index]) + ctx->fs_col[index] = + util_make_fragment_clonecolor_shader(pipe, num_cbufs); + + return ctx->fs_col[index]; +} + +static INLINE +void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, + unsigned tex_target) +{ + struct pipe_context *pipe = ctx->pipe; + + assert(tex_target < PIPE_MAX_TEXTURE_TYPES); + + /* Create the fragment shader on-demand. */ + if (!ctx->fs_texfetch_col[tex_target]) { + switch (tex_target) { + case PIPE_TEXTURE_1D: + ctx->fs_texfetch_col[PIPE_TEXTURE_1D] = + util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D); + break; + case PIPE_TEXTURE_2D: + ctx->fs_texfetch_col[PIPE_TEXTURE_2D] = + util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D); + break; + case PIPE_TEXTURE_3D: + ctx->fs_texfetch_col[PIPE_TEXTURE_3D] = + util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_3D); + break; + case PIPE_TEXTURE_CUBE: + ctx->fs_texfetch_col[PIPE_TEXTURE_CUBE] = + util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE); + break; + default:; + } + } + + return ctx->fs_texfetch_col[tex_target]; +} + +static INLINE +void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, + unsigned tex_target) +{ + struct pipe_context *pipe = ctx->pipe; + + assert(tex_target < PIPE_MAX_TEXTURE_TYPES); + + /* Create the fragment shader on-demand. */ + if (!ctx->fs_texfetch_depth[tex_target]) { + switch (tex_target) { + case PIPE_TEXTURE_1D: + ctx->fs_texfetch_depth[PIPE_TEXTURE_1D] = + util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_1D); + break; + case PIPE_TEXTURE_2D: + ctx->fs_texfetch_depth[PIPE_TEXTURE_2D] = + util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D); + break; + case PIPE_TEXTURE_3D: + ctx->fs_texfetch_depth[PIPE_TEXTURE_3D] = + util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_3D); + break; + case PIPE_TEXTURE_CUBE: + ctx->fs_texfetch_depth[PIPE_TEXTURE_CUBE] = + util_make_fragment_tex_shader_writedepth(pipe,TGSI_TEXTURE_CUBE); + break; + default:; + } + } + + return ctx->fs_texfetch_depth[tex_target]; +} + +void util_blitter_clear(struct blitter_context *blitter, + unsigned width, unsigned height, + unsigned num_cbufs, + unsigned clear_buffers, + const float *rgba, + double depth, unsigned stencil) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + + assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); + + blitter_check_saved_CSOs(ctx); + + /* bind CSOs */ + if (clear_buffers & PIPE_CLEAR_COLOR) + pipe->bind_blend_state(pipe, ctx->blend_write_color); + else + pipe->bind_blend_state(pipe, ctx->blend_keep_color); + + if (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) + pipe->bind_depth_stencil_alpha_state(pipe, + blitter_get_state_write_depth_stencil(ctx, stencil)); + else + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); + + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs)); + pipe->bind_vs_state(pipe, ctx->vs_col); + + blitter_set_clear_color(ctx, rgba); + blitter_set_rectangle(ctx, 0, 0, width, height, depth); + blitter_draw_quad(ctx); + blitter_restore_CSOs(ctx); +} + +void util_blitter_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height, + boolean ignore_stencil) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_framebuffer_state fb_state; + boolean is_stencil, is_depth; + unsigned dst_tex_usage; + + /* give up if textures are not set */ + assert(dst->texture && src->texture); + if (!dst->texture || !src->texture) + return; + + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0; + dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL : + PIPE_TEXTURE_USAGE_RENDER_TARGET; + + /* check if we can sample from and render to the surfaces */ + /* (assuming copying a stencil buffer is not possible) */ + if ((!ignore_stencil && is_stencil) || + !screen->is_format_supported(screen, dst->format, dst->texture->target, + dst_tex_usage, 0) || + !screen->is_format_supported(screen, src->format, src->texture->target, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, + width, height); + return; + } + + /* check whether the states are properly saved */ + blitter_check_saved_CSOs(ctx); + assert(blitter->saved_fb_state.nr_cbufs != ~0); + assert(blitter->saved_num_textures != ~0); + assert(blitter->saved_num_sampler_states != ~0); + assert(src->texture->target < PIPE_MAX_TEXTURE_TYPES); + + /* bind CSOs */ + fb_state.width = dst->width; + fb_state.height = dst->height; + + if (is_depth) { + pipe->bind_blend_state(pipe, ctx->blend_keep_color); + pipe->bind_depth_stencil_alpha_state(pipe, + ctx->dsa_write_depth_keep_stencil); + pipe->bind_fs_state(pipe, + blitter_get_fs_texfetch_depth(ctx, src->texture->target)); + + fb_state.nr_cbufs = 0; + fb_state.zsbuf = dst; + } else { + pipe->bind_blend_state(pipe, ctx->blend_write_color); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); + pipe->bind_fs_state(pipe, + blitter_get_fs_texfetch_col(ctx, src->texture->target)); + + fb_state.nr_cbufs = 1; + fb_state.cbufs[0] = dst; + fb_state.zsbuf = 0; + } + + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_vs_state(pipe, ctx->vs_tex); + pipe->bind_fragment_sampler_states(pipe, 1, + blitter_get_sampler_state(ctx, src->level)); + pipe->set_fragment_sampler_textures(pipe, 1, &src->texture); + pipe->set_framebuffer_state(pipe, &fb_state); + + /* set texture coordinates */ + switch (src->texture->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + blitter_set_texcoords_2d(ctx, src, srcx, srcy, + srcx+width, srcy+height); + break; + case PIPE_TEXTURE_3D: + blitter_set_texcoords_3d(ctx, src, srcx, srcy, + srcx+width, srcy+height); + break; + case PIPE_TEXTURE_CUBE: + blitter_set_texcoords_cube(ctx, src, srcx, srcy, + srcx+width, srcy+height); + break; + default: + assert(0); + } + + blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); + blitter_draw_quad(ctx); + blitter_restore_CSOs(ctx); +} + +void util_blitter_fill(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_framebuffer_state fb_state; + float rgba[4]; + ubyte ub_rgba[4] = {0}; + union util_color color; + int i; + + assert(dst->texture); + if (!dst->texture) + return; + + /* check if we can render to the surface */ + if (util_format_is_depth_or_stencil(dst->format) || /* unlikely, but you never know */ + !screen->is_format_supported(screen, dst->format, dst->texture->target, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { + util_surface_fill(pipe, dst, dstx, dsty, width, height, value); + return; + } + + /* unpack the color */ + color.ui = value; + util_unpack_color_ub(dst->format, &color, + ub_rgba, ub_rgba+1, ub_rgba+2, ub_rgba+3); + for (i = 0; i < 4; i++) + rgba[i] = ubyte_to_float(ub_rgba[i]); + + /* check the saved state */ + blitter_check_saved_CSOs(ctx); + assert(blitter->saved_fb_state.nr_cbufs != ~0); + + /* bind CSOs */ + pipe->bind_blend_state(pipe, ctx->blend_write_color); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1)); + pipe->bind_vs_state(pipe, ctx->vs_col); + + /* set a framebuffer state */ + fb_state.width = dst->width; + fb_state.height = dst->height; + fb_state.nr_cbufs = 1; + fb_state.cbufs[0] = dst; + fb_state.zsbuf = 0; + pipe->set_framebuffer_state(pipe, &fb_state); + + blitter_set_clear_color(ctx, rgba); + blitter_set_rectangle(ctx, 0, 0, width, height, 0); + blitter_draw_quad(ctx); + blitter_restore_CSOs(ctx); +} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h new file mode 100644 index 00000000000..3da5a6ca525 --- /dev/null +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -0,0 +1,230 @@ +/************************************************************************** + * + * Copyright 2009 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_BLITTER_H +#define U_BLITTER_H + +#include "util/u_memory.h" + +#include "pipe/p_state.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_context; + +struct blitter_context +{ + /* Private members, really. */ + void *saved_blend_state; /**< blend state */ + void *saved_dsa_state; /**< depth stencil alpha state */ + void *saved_rs_state; /**< rasterizer state */ + void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */ + + struct pipe_framebuffer_state saved_fb_state; /**< framebuffer state */ + + int saved_num_sampler_states; + void *saved_sampler_states[32]; + + int saved_num_textures; + struct pipe_texture *saved_textures[32]; /* is 32 enough? */ +}; + +/** + * Create a blitter context. + */ +struct blitter_context *util_blitter_create(struct pipe_context *pipe); + +/** + * Destroy a blitter context. + */ +void util_blitter_destroy(struct blitter_context *blitter); + +/* + * These CSOs must be saved before any of the following functions is called: + * - blend state + * - depth stencil alpha state + * - rasterizer state + * - vertex shader + * - fragment shader + */ + +/** + * Clear a specified set of currently bound buffers to specified values. + */ +void util_blitter_clear(struct blitter_context *blitter, + unsigned width, unsigned height, + unsigned num_cbufs, + unsigned clear_buffers, + const float *rgba, + double depth, unsigned stencil); + +/** + * Copy a block of pixels from one surface to another. + * + * You can copy from any color format to any other color format provided + * the former can be sampled and the latter can be rendered to. Otherwise, + * a software fallback path is taken and both surfaces must be of the same + * format. + * + * The same holds for depth-stencil formats with the exception that stencil + * cannot be copied unless you set ignore_stencil to FALSE. In that case, + * a software fallback path is taken and both surfaces must be of the same + * format. + * + * Use pipe_screen->is_format_supported to know your options. + * + * These states must be saved in the blitter in addition to the state objects + * already required to be saved: + * - framebuffer state + * - fragment sampler states + * - fragment sampler textures + */ +void util_blitter_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height, + boolean ignore_stencil); + +/** + * Fill a region of a surface with a constant value. + * + * If the surface cannot be rendered to or it's a depth-stencil format, + * a software fallback path is taken. + * + * These states must be saved in the blitter in addition to the state objects + * already required to be saved: + * - framebuffer state + */ +void util_blitter_fill(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value); + +/** + * Copy all pixels from one surface to another. + * + * The rules are the same as in util_blitter_copy with the addition that + * surfaces must have the same size. + */ +static INLINE +void util_blitter_copy_surface(struct blitter_context *blitter, + struct pipe_surface *dst, + struct pipe_surface *src, + boolean ignore_stencil) +{ + assert(dst->width == src->width && dst->height == src->height); + + util_blitter_copy(blitter, dst, 0, 0, src, 0, 0, src->width, src->height, + ignore_stencil); +} + + +/* The functions below should be used to save currently bound constant state + * objects inside a driver. The objects are automatically restored at the end + * of the util_blitter_{clear, fill, copy, copy_surface} functions and then + * forgotten. + * + * CSOs not listed here are not affected by util_blitter. */ + +static INLINE +void util_blitter_save_blend(struct blitter_context *blitter, + void *state) +{ + blitter->saved_blend_state = state; +} + +static INLINE +void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter, + void *state) +{ + blitter->saved_dsa_state = state; +} + +static INLINE +void util_blitter_save_rasterizer(struct blitter_context *blitter, + void *state) +{ + blitter->saved_rs_state = state; +} + +static INLINE +void util_blitter_save_fragment_shader(struct blitter_context *blitter, + void *fs) +{ + blitter->saved_fs = fs; +} + +static INLINE +void util_blitter_save_vertex_shader(struct blitter_context *blitter, + void *vs) +{ + blitter->saved_vs = vs; +} + +static INLINE +void util_blitter_save_framebuffer(struct blitter_context *blitter, + struct pipe_framebuffer_state *state) +{ + blitter->saved_fb_state = *state; +} + +static INLINE +void util_blitter_save_fragment_sampler_states( + struct blitter_context *blitter, + int num_sampler_states, + void **sampler_states) +{ + assert(num_sampler_states <= Elements(blitter->saved_sampler_states)); + + blitter->saved_num_sampler_states = num_sampler_states; + memcpy(blitter->saved_sampler_states, sampler_states, + num_sampler_states * sizeof(void *)); +} + +static INLINE +void util_blitter_save_fragment_sampler_textures( + struct blitter_context *blitter, + int num_textures, + struct pipe_texture **textures) +{ + assert(num_textures <= Elements(blitter->saved_textures)); + + blitter->saved_num_textures = num_textures; + memcpy(blitter->saved_textures, textures, + num_textures * sizeof(struct pipe_texture *)); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h index 6be5ca2297d..2c32db61756 100644 --- a/src/gallium/auxiliary/util/u_clear.h +++ b/src/gallium/auxiliary/util/u_clear.h @@ -46,13 +46,13 @@ util_clear(struct pipe_context *pipe, { if (buffers & PIPE_CLEAR_COLOR) { struct pipe_surface *ps = framebuffer->cbufs[0]; - unsigned color = 0; + union util_color uc; - util_pack_color(rgba, ps->format, &color); + util_pack_color(rgba, ps->format, &uc); if (pipe->surface_fill) { - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); } else { - util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); + util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); } } diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index be5eb87e474..9b4e6ca2a73 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -64,11 +64,13 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_string.h" #include "util/u_stream.h" #include "util/u_math.h" #include "util/u_tile.h" +#include "util/u_prim.h" #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY @@ -602,6 +604,32 @@ const char *pf_name( enum pipe_format format ) } + +static const struct debug_named_value pipe_prim_names[] = { +#ifdef DEBUG + DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINES), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_LOOP), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLES), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_FAN), + DEBUG_NAMED_VALUE(PIPE_PRIM_QUADS), + DEBUG_NAMED_VALUE(PIPE_PRIM_QUAD_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_POLYGON), +#endif + DEBUG_NAMED_VALUE_END +}; + + +const char *u_prim_name( unsigned prim ) +{ + return debug_dump_enum(pipe_prim_names, prim); +} + + + + #ifdef DEBUG void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, @@ -671,10 +699,10 @@ void debug_dump_surface(const char *prefix, goto error; debug_dump_image(prefix, - transfer->format, - transfer->block.size, - transfer->nblocksx, - transfer->nblocksy, + texture->format, + util_format_get_blocksize(texture->format), + util_format_get_nblocksx(texture->format, transfer->width), + util_format_get_nblocksy(texture->format, transfer->height), transfer->stride, data); diff --git a/src/gallium/auxiliary/util/u_debug_dump.c b/src/gallium/auxiliary/util/u_debug_dump.c index 09866880aea..61624d05c0a 100644 --- a/src/gallium/auxiliary/util/u_debug_dump.c +++ b/src/gallium/auxiliary/util/u_debug_dump.c @@ -255,15 +255,13 @@ DEFINE_DEBUG_DUMP_CONTINUOUS(tex_mipfilter) static const char * debug_dump_tex_filter_names[] = { "PIPE_TEX_FILTER_NEAREST", - "PIPE_TEX_FILTER_LINEAR", - "PIPE_TEX_FILTER_ANISO" + "PIPE_TEX_FILTER_LINEAR" }; static const char * debug_dump_tex_filter_short_names[] = { "nearest", - "linear", - "aniso" + "linear" }; DEFINE_DEBUG_DUMP_CONTINUOUS(tex_filter) diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c new file mode 100644 index 00000000000..b42b429d4d7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_dl.c @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * Copyright 1999-2008 Brian Paul + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_UNIX) +#include <dlfcn.h> +#endif +#if defined(PIPE_OS_WINDOWS) +#include <windows.h> +#endif + +#include "u_dl.h" + + +struct util_dl_library * +util_dl_open(const char *filename) +{ +#if defined(PIPE_OS_UNIX) + return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL); +#elif defined(PIPE_OS_WINDOWS) + return (struct util_dl_library *)LoadLibraryA(filename); +#else + return NULL; +#endif +} + + +util_dl_proc +util_dl_get_proc_address(struct util_dl_library *library, + const char *procname) +{ +#if defined(PIPE_OS_UNIX) + return (util_dl_proc)dlsym((void *)library, procname); +#elif defined(PIPE_OS_WINDOWS) + return (util_dl_proc)GetProcAddress((HMODULE)library, procname); +#else + return (util_dl_proc)NULL; +#endif +} + + +void +util_dl_close(struct util_dl_library *library) +{ +#if defined(PIPE_OS_UNIX) + dlclose((void *)library); +#elif defined(PIPE_OS_WINDOWS) + FreeLibrary((HMODULE)library); +#else + (void)library; +#endif +} diff --git a/src/gallium/winsys/drm/radeon/core/radeon_winsys_softpipe.c b/src/gallium/auxiliary/util/u_dl.h index f038bfa40ef..85296c58af6 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_winsys_softpipe.c +++ b/src/gallium/auxiliary/util/u_dl.h @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * + * Copyright 2009 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,32 +10,64 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * - * + * **************************************************************************/ -/* - * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> + + +#ifndef U_DL_H_ +#define U_DL_H_ + + +#include "pipe/p_config.h" + + +#if defined(PIPE_OS_WINDOWS) +# define UTIL_DL_EXT ".dll" +#elif defined(PIPE_OS_APPLE) +# define UTIL_DL_EXT ".dylib" +#else +# define UTIL_DL_EXT ".so" +#endif + + +struct util_dl_library; + + +typedef void (*util_dl_proc)(void); + + +/** + * Open a library dynamically. */ +struct util_dl_library * +util_dl_open(const char *filename); + -#include "radeon_winsys_softpipe.h" +/** + * Lookup a function in a library. + */ +util_dl_proc +util_dl_get_proc_address(struct util_dl_library *library, + const char *procname); -struct pipe_context *radeon_create_softpipe(struct pipe_winsys* winsys) -{ - struct pipe_screen *pipe_screen; - pipe_screen = softpipe_create_screen(winsys); +/** + * Close a library. + */ +void +util_dl_close(struct util_dl_library *library); + - return softpipe_create(pipe_screen); -} +#endif /* U_DL_H_ */ diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index 98ea13b60b5..e0724a1a8be 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -32,15 +32,14 @@ const struct util_format_description * util_format_description(enum pipe_format format) { - const struct util_format_description *desc = util_format_description_table; + const struct util_format_description *desc; - while(TRUE) { - if(desc->format == format) - return desc; + if (format >= PIPE_FORMAT_COUNT) { + return NULL; + } - if(desc->format == PIPE_FORMAT_NONE) - return NULL; + desc = &util_format_description_table[format]; + assert(desc->format == format); - ++desc; - }; + return desc; } diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index f1bf94f17dd..866b18ff160 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -11,6 +11,8 @@ PIPE_FORMAT_A8_UNORM , arith , 1, 1, un8 , , , , 000x, PIPE_FORMAT_I8_UNORM , arith , 1, 1, un8 , , , , xxxx, rgb PIPE_FORMAT_A8L8_UNORM , arith , 1, 1, un8 , un8 , , , xxxy, rgb PIPE_FORMAT_L16_UNORM , arith , 1, 1, un16, , , , xxx1, rgb +PIPE_FORMAT_YCBCR , yuv , 2, 1, x32 , , , , xyz1, yuv +PIPE_FORMAT_YCBCR_REV , yuv , 2, 1, x32 , , , , xyz1, yuv PIPE_FORMAT_Z16_UNORM , array , 1, 1, un16, , , , x___, zs PIPE_FORMAT_Z32_UNORM , array , 1, 1, un32, , , , x___, zs PIPE_FORMAT_Z32_FLOAT , array , 1, 1, f32 , , , , x___, zs @@ -97,3 +99,11 @@ PIPE_FORMAT_B8G8R8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyxw, PIPE_FORMAT_B8G8R8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb PIPE_FORMAT_X8UB8UG8SR8S_NORM , arith , 1, 1, sn8 , sn8 , un8 , x8 , 1zyx, rgb PIPE_FORMAT_B6UG5SR5S_NORM , arith , 1, 1, sn5 , sn5 , un6 , , xyz1, rgb +PIPE_FORMAT_DXT1_RGB , dxt , 4, 4, x64 , , , , xyz1, rgb +PIPE_FORMAT_DXT1_RGBA , dxt , 4, 4, x64 , , , , xyzw, rgb +PIPE_FORMAT_DXT3_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT5_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT1_SRGB , dxt , 4, 4, x64 , , , , xyz1, srgb +PIPE_FORMAT_DXT1_SRGBA , dxt , 4, 4, x64 , , , , xyzw, srgb +PIPE_FORMAT_DXT3_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb +PIPE_FORMAT_DXT5_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index bd27f346924..a558923b2ed 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -32,11 +32,51 @@ #include "pipe/p_format.h" +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Describe how to best pack/unpack pixels into/from the prescribed format. + * + * These are used for automatic code generation of pixel packing and unpacking + * routines (in compile time, e.g., u_format_access.py, or in runtime, like + * llvmpipe does). + * + * Thumb rule is: if you're not code generating pixel packing/unpacking then + * these are irrelevant for you. + * + * Note that this can be deduced from other values in util_format_description + * structure. This is by design, to make code generation of pixel + * packing/unpacking/sampling routines simple and efficient. + * + * XXX: This should be renamed to something like util_format_pack. + */ enum util_format_layout { + /** + * Single scalar component. + */ UTIL_FORMAT_LAYOUT_SCALAR = 0, + + /** + * One or more components of mixed integer formats, arithmetically encoded + * in a word up to 32bits. + */ UTIL_FORMAT_LAYOUT_ARITH = 1, + + /** + * One or more components, no mixed formats, each with equal power of two + * number of bytes. + */ UTIL_FORMAT_LAYOUT_ARRAY = 2, + + /** + * XXX: Not used yet. These might go away and be replaced by a single entry, + * for formats where multiple pixels have to be + * read in order to determine a single pixel value (i.e., block.width > 1 + * || block.height > 1) + */ UTIL_FORMAT_LAYOUT_YUV = 3, UTIL_FORMAT_LAYOUT_DXT = 4 }; @@ -50,7 +90,7 @@ struct util_format_block /** Block height in pixels */ unsigned height; - /** Block size in bytes */ + /** Block size in bits */ unsigned bits; }; @@ -111,6 +151,250 @@ const struct util_format_description * util_format_description(enum pipe_format format); +/* + * Format query functions. + */ + +static INLINE boolean +util_format_is_compressed(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(format); + if (!format) { + return FALSE; + } + + return desc->layout == UTIL_FORMAT_LAYOUT_DXT ? TRUE : FALSE; +} + +static INLINE boolean +util_format_is_depth_or_stencil(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(format); + if (!format) { + return FALSE; + } + + return desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? TRUE : FALSE; +} + +static INLINE boolean +util_format_is_depth_and_stencil(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(format); + if (!format) { + return FALSE; + } + + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) { + return FALSE; + } + + return (desc->swizzle[0] != UTIL_FORMAT_SWIZZLE_NONE && + desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE) ? TRUE : FALSE; +} + + +/** + * Return total bits needed for the pixel format per block. + */ +static INLINE uint +util_format_get_blocksizebits(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(format); + if (!format) { + return 0; + } + + return desc->block.bits; +} + +/** + * Return bytes per block (not pixel) for the given format. + */ +static INLINE uint +util_format_get_blocksize(enum pipe_format format) +{ + uint bits = util_format_get_blocksizebits(format); + + assert(bits % 8 == 0); + + return bits / 8; +} + +static INLINE uint +util_format_get_blockwidth(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(format); + if (!format) { + return 1; + } + + switch (desc->layout) { + case UTIL_FORMAT_LAYOUT_YUV: + return 2; + case UTIL_FORMAT_LAYOUT_DXT: + return 4; + default: + return 1; + } +} + +static INLINE uint +util_format_get_blockheight(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(format); + if (!format) { + return 1; + } + + switch (desc->layout) { + case UTIL_FORMAT_LAYOUT_DXT: + return 4; + default: + return 1; + } +} + +static INLINE unsigned +util_format_get_nblocksx(enum pipe_format format, + unsigned x) +{ + unsigned blockwidth = util_format_get_blockwidth(format); + return (x + blockwidth - 1) / blockwidth; +} + +static INLINE unsigned +util_format_get_nblocksy(enum pipe_format format, + unsigned y) +{ + unsigned blockheight = util_format_get_blockheight(format); + return (y + blockheight - 1) / blockheight; +} + +static INLINE unsigned +util_format_get_nblocks(enum pipe_format format, + unsigned width, + unsigned height) +{ + return util_format_get_nblocksx(format, width) * util_format_get_nblocksy(format, height); +} + +static INLINE size_t +util_format_get_stride(enum pipe_format format, + unsigned width) +{ + return util_format_get_nblocksx(format, width) * util_format_get_blocksize(format); +} + +static INLINE size_t +util_format_get_2d_size(enum pipe_format format, + size_t stride, + unsigned height) +{ + return util_format_get_nblocksy(format, height) * stride; +} + +static INLINE uint +util_format_get_component_bits(enum pipe_format format, + enum util_format_colorspace colorspace, + uint component) +{ + const struct util_format_description *desc = util_format_description(format); + enum util_format_colorspace desc_colorspace; + + assert(format); + if (!format) { + return 0; + } + + assert(component < 4); + + /* Treat RGB and SRGB as equivalent. */ + if (colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + colorspace = UTIL_FORMAT_COLORSPACE_RGB; + } + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + desc_colorspace = UTIL_FORMAT_COLORSPACE_RGB; + } else { + desc_colorspace = desc->colorspace; + } + + if (desc_colorspace != colorspace) { + return 0; + } + + switch (desc->swizzle[component]) { + case UTIL_FORMAT_SWIZZLE_X: + return desc->channel[0].size; + case UTIL_FORMAT_SWIZZLE_Y: + return desc->channel[1].size; + case UTIL_FORMAT_SWIZZLE_Z: + return desc->channel[2].size; + case UTIL_FORMAT_SWIZZLE_W: + return desc->channel[3].size; + default: + return 0; + } +} + +static INLINE boolean +util_format_has_alpha(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(format); + if (!format) { + return FALSE; + } + + switch (desc->layout) { + case UTIL_FORMAT_LAYOUT_SCALAR: + case UTIL_FORMAT_LAYOUT_ARITH: + case UTIL_FORMAT_LAYOUT_ARRAY: + /* FIXME: pf_get_component_bits( PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_COMP_A ) should not return 0 right? */ + if (format == PIPE_FORMAT_A8_UNORM || + format == PIPE_FORMAT_A8L8_UNORM || + format == PIPE_FORMAT_A8L8_SRGB) { + return TRUE; + } + return util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3) != 0; + case UTIL_FORMAT_LAYOUT_YUV: + return FALSE; + case UTIL_FORMAT_LAYOUT_DXT: + switch (format) { + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: + return TRUE; + default: + return FALSE; + } + default: + assert(0); + return FALSE; + } +} + + +/* + * Format access functions. + */ + void util_format_read_4f(enum pipe_format format, float *dst, unsigned dst_stride, @@ -135,4 +419,8 @@ util_format_write_4ub(enum pipe_format format, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h); +#ifdef __cplusplus +} // extern "C" { +#endif + #endif /* ! U_FORMAT_H */ diff --git a/src/gallium/auxiliary/util/u_format_access.py b/src/gallium/auxiliary/util/u_format_access.py index eeb1a9657fd..0b05ddb9312 100644 --- a/src/gallium/auxiliary/util/u_format_access.py +++ b/src/gallium/auxiliary/util/u_format_access.py @@ -325,14 +325,14 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): elif swizzle == SWIZZLE_0: value = '0' elif swizzle == SWIZZLE_1: - value = '1' + value = get_one(dst_type) else: assert False elif format.colorspace == 'zs': if i < 3: value = 'z' else: - value = '1' + value = get_one(dst_type) else: assert False print ' *dst_pixel++ = %s; /* %s */' % (value, 'rgba'[i]) diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index 8834568e8ee..571cab55dc8 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -44,11 +44,10 @@ def colorspace_map(colorspace): colorspace_channels_map = { - 'rgb': 'rgba', - 'rgba': 'rgba', - 'zs': 'zs', - 'yuv': ['y1', 'y2', 'u', 'v'], - 'dxt': [] + 'rgb': ['r', 'g', 'b', 'a'], + 'srgb': ['sr', 'sg', 'sb', 'a'], + 'zs': ['z', 's'], + 'yuv': ['y', 'u', 'v'], } @@ -90,11 +89,20 @@ def write_format_table(formats): print 'const struct util_format_description' print 'util_format_description_table[] = ' print "{" + print " {" + print " PIPE_FORMAT_NONE," + print " \"PIPE_FORMAT_NONE\"," + print " {0, 0, 0}," + print " 0," + print " {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}}," + print " {0, 0, 0, 0}," + print " 0" + print " }," for format in formats: print " {" print " %s," % (format.name,) print " \"%s\"," % (format.name,) - print " {%u, %u, %u}, /* block */" % (format.block_width, format.block_height, format.block_size()) + print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size()) print " %s," % (layout_map(format.layout),) print " {" for i in range(4): @@ -103,7 +111,7 @@ def write_format_table(formats): sep = "," else: sep = "" - print " {%s, %s, %u}%s /* %s */" % (kind_map[type.kind], bool_map(type.norm), type.size, sep, "xyzw"[i]) + print " {%s, %s, %u}%s\t/* %s */" % (kind_map[type.kind], bool_map(type.norm), type.size, sep, "xyzw"[i]) print " }," print " {" for i in range(4): @@ -113,22 +121,13 @@ def write_format_table(formats): else: sep = "" try: - comment = layout_channels_map[format.layout][i] - except: + comment = colorspace_channels_map[format.colorspace][i] + except (KeyError, IndexError): comment = 'ignored' - print " %s%s /* %s */" % (swizzle_map[swizzle], sep, comment) + print " %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment) print " }," print " %s," % (colorspace_map(format.colorspace),) print " }," - print " {" - print " PIPE_FORMAT_NONE," - print " \"PIPE_FORMAT_NONE\"," - print " {0, 0, 0}," - print " 0," - print " {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}}," - print " {0, 0, 0, 0}," - print " 0" - print " }," print "};" diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index aa823aa218b..76023794dcd 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -41,10 +41,13 @@ #include "pipe/p_shader_tokens.h" #include "pipe/p_state.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_draw_quad.h" #include "util/u_gen_mipmap.h" #include "util/u_simple_shaders.h" +#include "util/u_math.h" +#include "util/u_texture.h" #include "cso_cache/cso_context.h" @@ -60,7 +63,7 @@ struct gen_mipmap_state struct pipe_sampler_state sampler; void *vs; - void *fs; + void *fs2d, *fsCube; struct pipe_buffer *vbuf; /**< quad vertices */ unsigned vbuf_slot; @@ -995,7 +998,7 @@ reduce_2d(enum pipe_format pformat, { enum dtype datatype; uint comps; - const int bpt = pf_get_size(pformat); + const int bpt = util_format_get_blocksize(pformat); const ubyte *srcA, *srcB; ubyte *dst; int row; @@ -1034,7 +1037,7 @@ reduce_3d(enum pipe_format pformat, int dstWidth, int dstHeight, int dstDepth, int dstRowStride, ubyte *dstPtr) { - const int bpt = pf_get_size(pformat); + const int bpt = util_format_get_blocksize(pformat); const int border = 0; int img, row; int bytesPerSrcImage, bytesPerDstImage; @@ -1125,12 +1128,12 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1158,8 +1161,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, const uint zslice = 0; uint dstLevel; - assert(pt->block.width == 1); - assert(pt->block.height == 1); + assert(util_format_get_blockwidth(pt->format) == 1); + assert(util_format_get_blockheight(pt->format) == 1); for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; @@ -1168,12 +1171,12 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1203,8 +1206,8 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_screen *screen = pipe->screen; uint dstLevel, zslice = 0; - assert(pt->block.width == 1); - assert(pt->block.height == 1); + assert(util_format_get_blockwidth(pt->format) == 1); + assert(util_format_get_blockheight(pt->format) == 1); for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; @@ -1213,12 +1216,12 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1316,7 +1319,8 @@ util_create_gen_mipmap(struct pipe_context *pipe, } /* fragment shader */ - ctx->fs = util_make_fragment_tex_shader(pipe); + ctx->fs2d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D); + ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE); /* vertex data that doesn't change */ for (i = 0; i < 4; i++) { @@ -1382,59 +1386,9 @@ set_vertex_data(struct gen_mipmap_state *ctx, static const float st[4][2] = { {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f} }; - float rx, ry, rz; - uint i; - - /* loop over quad verts */ - for (i = 0; i < 4; i++) { - /* Compute sc = +/-scale and tc = +/-scale. - * Not +/-1 to avoid cube face selection ambiguity near the edges, - * though that can still sometimes happen with this scale factor... - */ - const float scale = 0.9999f; - const float sc = (2.0f * st[i][0] - 1.0f) * scale; - const float tc = (2.0f * st[i][1] - 1.0f) * scale; - - switch (face) { - case PIPE_TEX_FACE_POS_X: - rx = 1.0f; - ry = -tc; - rz = -sc; - break; - case PIPE_TEX_FACE_NEG_X: - rx = -1.0f; - ry = -tc; - rz = sc; - break; - case PIPE_TEX_FACE_POS_Y: - rx = sc; - ry = 1.0f; - rz = tc; - break; - case PIPE_TEX_FACE_NEG_Y: - rx = sc; - ry = -1.0f; - rz = -tc; - break; - case PIPE_TEX_FACE_POS_Z: - rx = sc; - ry = -tc; - rz = 1.0f; - break; - case PIPE_TEX_FACE_NEG_Z: - rx = -sc; - ry = -tc; - rz = -1.0f; - break; - default: - rx = ry = rz = 0.0f; - assert(0); - } - ctx->vertices[i][1][0] = rx; /*s*/ - ctx->vertices[i][1][1] = ry; /*t*/ - ctx->vertices[i][1][2] = rz; /*r*/ - } + util_map_texcoords2d_onto_cubemap(face, &st[0][0], 2, + &ctx->vertices[0][1][0], 8); } else { /* 1D/2D */ @@ -1474,7 +1428,8 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) struct pipe_context *pipe = ctx->pipe; pipe->delete_vs_state(pipe, ctx->vs); - pipe->delete_fs_state(pipe, ctx->fs); + pipe->delete_fs_state(pipe, ctx->fs2d); + pipe->delete_fs_state(pipe, ctx->fsCube); pipe_buffer_reference(&ctx->vbuf, NULL); @@ -1512,6 +1467,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb; + void *fs = (pt->target == PIPE_TEXTURE_CUBE) ? ctx->fsCube : ctx->fs2d; uint dstLevel; uint zslice = 0; uint offset; @@ -1549,7 +1505,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); - cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_fragment_shader_handle(ctx->cso, fs); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* init framebuffer state */ @@ -1575,8 +1531,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, * Setup framebuffer / dest surface */ fb.cbufs[0] = surf; - fb.width = pt->width[dstLevel]; - fb.height = pt->height[dstLevel]; + fb.width = u_minify(pt->width0, dstLevel); + fb.height = u_minify(pt->height0, dstLevel); cso_set_framebuffer(ctx->cso, &fb); /* @@ -1597,8 +1553,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, offset = set_vertex_data(ctx, pt->target, face, - (float) pt->width[dstLevel], - (float) pt->height[dstLevel]); + (float) u_minify(pt->width0, dstLevel), + (float) u_minify(pt->height0, dstLevel)); util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, diff --git a/src/gallium/auxiliary/util/u_linear.c b/src/gallium/auxiliary/util/u_linear.c index a1dce3f5cf4..f1aef216771 100644 --- a/src/gallium/auxiliary/util/u_linear.c +++ b/src/gallium/auxiliary/util/u_linear.c @@ -82,7 +82,7 @@ void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, void pipe_linear_fill_info(struct pipe_tile_info *t, - const struct pipe_format_block *block, + const struct u_linear_format_block *block, unsigned tile_width, unsigned tile_height, unsigned tiles_x, unsigned tiles_y) { diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h index b74308ffa3d..42c40b2aa75 100644 --- a/src/gallium/auxiliary/util/u_linear.h +++ b/src/gallium/auxiliary/util/u_linear.h @@ -35,6 +35,19 @@ #include "pipe/p_format.h" +struct u_linear_format_block +{ + /** Block size in bytes */ + unsigned size; + + /** Block width in pixels */ + unsigned width; + + /** Block height in pixels */ + unsigned height; +}; + + struct pipe_tile_info { unsigned size; @@ -49,10 +62,10 @@ struct pipe_tile_info unsigned rows; /* Describe the tile in pixels */ - struct pipe_format_block tile; + struct u_linear_format_block tile; /* Describe each block within the tile */ - struct pipe_format_block block; + struct u_linear_format_block block; }; void pipe_linear_to_tile(size_t src_stride, const void *src_ptr, @@ -71,7 +84,7 @@ void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, * @tiles_y number of tiles in y axis */ void pipe_linear_fill_info(struct pipe_tile_info *t, - const struct pipe_format_block *block, + const struct u_linear_format_block *block, unsigned tile_width, unsigned tile_height, unsigned tiles_x, unsigned tiles_y); diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 75b075f160d..b2969a210a7 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -491,6 +491,47 @@ util_next_power_of_two(unsigned x) /** + * Return number of bits set in n. + */ +static INLINE unsigned +util_bitcount(unsigned n) +{ +#if defined(PIPE_CC_GCC) + return __builtin_popcount(n); +#else + /* K&R classic bitcount. + * + * For each iteration, clear the LSB from the bitfield. + * Requires only one iteration per set bit, instead of + * one iteration per bit less than highest set bit. + */ + unsigned bits = 0; + for (bits; n; bits++) { + n &= n - 1; + } + return bits; +#endif +} + + +/** + * Reverse byte order of a 32 bit word. + */ +static INLINE uint32_t +util_bswap32(uint32_t n) +{ +#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 403) + return __builtin_bswap32(n); +#else + return (n >> 24) | + ((n >> 8) & 0x0000ff00) | + ((n << 8) & 0x00ff0000) | + (n << 24); +#endif +} + + +/** * Clamp X to [MIN, MAX]. * This is a macro to allow float, int, uint, etc. types. */ @@ -499,6 +540,9 @@ util_next_power_of_two(unsigned x) #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) +#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C ) +#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C ) + static INLINE int align(int value, int alignment) @@ -507,9 +551,9 @@ align(int value, int alignment) } static INLINE unsigned -minify(unsigned value) +u_minify(unsigned value, unsigned levels) { - return MAX2(1, value >> 1); + return MAX2(1, value >> levels); } #ifndef COPY_4V @@ -539,6 +583,18 @@ do { \ #endif +static INLINE uint32_t util_unsigned_fixed(float value, unsigned frac_bits) +{ + return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits)); +} + +static INLINE int32_t util_signed_fixed(float value, unsigned frac_bits) +{ + return (int32_t)(value * (1<<frac_bits)); +} + + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 9dacc6d83db..43eb0153ee7 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -37,106 +37,110 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" +#include "util/u_format.h" #include "util/u_math.h" + +union util_color { + ubyte ub; + ushort us; + uint ui; + float f[4]; +}; + /** * Pack ubyte R,G,B,A into dest pixel. */ static INLINE void util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, - enum pipe_format format, void *dest) + enum pipe_format format, union util_color *uc) { switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | a; + uc->ui = (r << 24) | (g << 16) | (b << 8) | a; } return; case PIPE_FORMAT_R8G8B8X8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff; } return; case PIPE_FORMAT_A8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (a << 24) | (r << 16) | (g << 8) | b; + uc->ui = (a << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_X8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (0xff << 24) | (r << 16) | (g << 8) | b; + uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_B8G8R8A8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | a; + uc->ui = (b << 24) | (g << 16) | (r << 8) | a; } return; case PIPE_FORMAT_B8G8R8X8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff; } return; case PIPE_FORMAT_R5G6B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; case PIPE_FORMAT_A1R5G5B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); } return; case PIPE_FORMAT_A4R4G4B4_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; case PIPE_FORMAT_A8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = a; + uc->ub = a; } return; case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_I8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = r; + uc->ub = a; } return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { - float *d = (float *) dest; - d[0] = (float)r / 255.0f; - d[1] = (float)g / 255.0f; - d[2] = (float)b / 255.0f; - d[3] = (float)a / 255.0f; + uc->f[0] = (float)r / 255.0f; + uc->f[1] = (float)g / 255.0f; + uc->f[2] = (float)b / 255.0f; + uc->f[3] = (float)a / 255.0f; } return; case PIPE_FORMAT_R32G32B32_FLOAT: { - float *d = (float *) dest; - d[0] = (float)r / 255.0f; - d[1] = (float)g / 255.0f; - d[2] = (float)b / 255.0f; + uc->f[0] = (float)r / 255.0f; + uc->f[1] = (float)g / 255.0f; + uc->f[2] = (float)b / 255.0f; } return; - /* XXX lots more cases to add */ + /* Handle other cases with a generic function. + */ default: - debug_print_format("gallium: unhandled format in util_pack_color_ub()", format); - assert(0); + { + ubyte src[4]; + + src[0] = r; + src[1] = g; + src[2] = b; + src[3] = a; + util_format_write_4ub(format, src, 0, uc, 0, 0, 0, 1, 1); + } } } @@ -145,13 +149,13 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, * Unpack RGBA from a packed pixel, returning values as ubytes in [0,255]. */ static INLINE void -util_unpack_color_ub(enum pipe_format format, const void *src, +util_unpack_color_ub(enum pipe_format format, union util_color *uc, ubyte *r, ubyte *g, ubyte *b, ubyte *a) { switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 24) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 8) & 0xff); @@ -160,7 +164,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_R8G8B8X8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 24) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 8) & 0xff); @@ -169,7 +173,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A8R8G8B8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 16) & 0xff); *g = (ubyte) ((p >> 8) & 0xff); *b = (ubyte) ((p >> 0) & 0xff); @@ -178,7 +182,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_X8R8G8B8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 16) & 0xff); *g = (ubyte) ((p >> 8) & 0xff); *b = (ubyte) ((p >> 0) & 0xff); @@ -187,7 +191,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_B8G8R8A8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 8) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 24) & 0xff); @@ -196,7 +200,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_B8G8R8X8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 8) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 24) & 0xff); @@ -205,7 +209,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_R5G6B5_UNORM: { - ushort p = ((const ushort *) src)[0]; + ushort p = uc->us; *r = (ubyte) (((p >> 8) & 0xf8) | ((p >> 13) & 0x7)); *g = (ubyte) (((p >> 3) & 0xfc) | ((p >> 9) & 0x3)); *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); @@ -214,7 +218,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A1R5G5B5_UNORM: { - ushort p = ((const ushort *) src)[0]; + ushort p = uc->us; *r = (ubyte) (((p >> 7) & 0xf8) | ((p >> 12) & 0x7)); *g = (ubyte) (((p >> 2) & 0xf8) | ((p >> 7) & 0x7)); *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); @@ -223,7 +227,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A4R4G4B4_UNORM: { - ushort p = ((const ushort *) src)[0]; + ushort p = uc->us; *r = (ubyte) (((p >> 4) & 0xf0) | ((p >> 8) & 0xf)); *g = (ubyte) (((p >> 0) & 0xf0) | ((p >> 4) & 0xf)); *b = (ubyte) (((p << 4) & 0xf0) | ((p >> 0) & 0xf)); @@ -232,27 +236,27 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A8_UNORM: { - ubyte p = ((const ubyte *) src)[0]; + ubyte p = uc->ub; *r = *g = *b = (ubyte) 0xff; *a = p; } return; case PIPE_FORMAT_L8_UNORM: { - ubyte p = ((const ubyte *) src)[0]; + ubyte p = uc->ub; *r = *g = *b = p; *a = (ubyte) 0xff; } return; case PIPE_FORMAT_I8_UNORM: { - ubyte p = ((const ubyte *) src)[0]; + ubyte p = uc->ub; *r = *g = *b = *a = p; } return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = float_to_ubyte(p[1]); *b = float_to_ubyte(p[2]); @@ -261,7 +265,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_R32G32B32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = float_to_ubyte(p[1]); *b = float_to_ubyte(p[2]); @@ -271,7 +275,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, case PIPE_FORMAT_R32G32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = float_to_ubyte(p[1]); *b = *a = (ubyte) 0xff; @@ -280,34 +284,40 @@ util_unpack_color_ub(enum pipe_format format, const void *src, case PIPE_FORMAT_R32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = *b = *a = (ubyte) 0xff; } return; - /* XXX lots more cases to add */ + /* Handle other cases with a generic function. + */ default: - debug_print_format("gallium: unhandled format in util_unpack_color_ub()", - format); - assert(0); + { + ubyte dst[4]; + + util_format_read_4ub(format, dst, 0, uc, 0, 0, 0, 1, 1); + *r = dst[0]; + *g = dst[1]; + *b = dst[2]; + *a = dst[3]; + } } } - /** * Note rgba outside [0,1] will be clamped for int pixel formats. */ static INLINE void -util_pack_color(const float rgba[4], enum pipe_format format, void *dest) +util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc) { ubyte r = 0; ubyte g = 0; ubyte b = 0; ubyte a = 0; - if (pf_size_x(format) <= 8) { + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) <= 8) { /* format uses 8-bit components or less */ r = float_to_ubyte(rgba[0]); g = float_to_ubyte(rgba[1]); @@ -318,92 +328,80 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | a; + uc->ui = (r << 24) | (g << 16) | (b << 8) | a; } return; case PIPE_FORMAT_R8G8B8X8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff; } return; case PIPE_FORMAT_A8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (a << 24) | (r << 16) | (g << 8) | b; + uc->ui = (a << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_X8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (0xff << 24) | (r << 16) | (g << 8) | b; + uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_B8G8R8A8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | a; + uc->ui = (b << 24) | (g << 16) | (r << 8) | a; } return; case PIPE_FORMAT_B8G8R8X8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff; } return; case PIPE_FORMAT_R5G6B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; case PIPE_FORMAT_A1R5G5B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); } return; case PIPE_FORMAT_A4R4G4B4_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + uc->ub = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; case PIPE_FORMAT_A8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = a; + uc->ub = a; } return; case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_I8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = r; + uc->ub = r; } return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { - float *d = (float *) dest; - d[0] = rgba[0]; - d[1] = rgba[1]; - d[2] = rgba[2]; - d[3] = rgba[3]; + uc->f[0] = rgba[0]; + uc->f[1] = rgba[1]; + uc->f[2] = rgba[2]; + uc->f[3] = rgba[3]; } return; case PIPE_FORMAT_R32G32B32_FLOAT: { - float *d = (float *) dest; - d[0] = rgba[0]; - d[1] = rgba[1]; - d[2] = rgba[2]; + uc->f[0] = rgba[0]; + uc->f[1] = rgba[1]; + uc->f[2] = rgba[2]; } return; - /* XXX lots more cases to add */ + + /* Handle other cases with a generic function. + */ default: - debug_print_format("gallium: unhandled format in util_pack_color()", format); - assert(0); + util_format_write_4f(format, rgba, 0, uc, 0, 0, 0, 1, 1); } } diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index a9b533eea70..10a874f3416 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -135,4 +135,39 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim ) } } +static INLINE unsigned +u_vertices_per_prim(int primitive) +{ + switch(primitive) { + case PIPE_PRIM_POINTS: + return 1; + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + return 2; + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + return 3; + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return 4; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return 6; + + /* following primitives should never be used + * with geometry shaders abd their size is + * undefined */ + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + default: + debug_printf("Unrecognized geometry shader primitive"); + return 3; + } +} + +const char *u_prim_name( unsigned pipe_prim ); + #endif diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 9866b6fc8a0..298fbacecba 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -34,6 +34,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_screen.h" +#include "util/u_format.h" #include "util/u_rect.h" @@ -44,7 +45,7 @@ */ void util_copy_rect(ubyte * dst, - const struct pipe_format_block *block, + enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, @@ -57,27 +58,30 @@ util_copy_rect(ubyte * dst, { unsigned i; int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; + int blocksize = util_format_get_blocksize(format); + int blockwidth = util_format_get_blockwidth(format); + int blockheight = util_format_get_blockheight(format); - assert(block->size > 0); - assert(block->width > 0); - assert(block->height > 0); + assert(blocksize > 0); + assert(blockwidth > 0); + assert(blockheight > 0); assert(src_x >= 0); assert(src_y >= 0); assert(dst_x >= 0); assert(dst_y >= 0); - dst_x /= block->width; - dst_y /= block->height; - width = (width + block->width - 1)/block->width; - height = (height + block->height - 1)/block->height; - src_x /= block->width; - src_y /= block->height; + dst_x /= blockwidth; + dst_y /= blockheight; + width = (width + blockwidth - 1)/blockwidth; + height = (height + blockheight - 1)/blockheight; + src_x /= blockwidth; + src_y /= blockheight; - dst += dst_x * block->size; - src += src_x * block->size; + dst += dst_x * blocksize; + src += src_x * blocksize; dst += dst_y * dst_stride; src += src_y * src_stride_pos; - width *= block->size; + width *= blocksize; if (width == dst_stride && width == src_stride) memcpy(dst, src, height * width); @@ -92,7 +96,7 @@ util_copy_rect(ubyte * dst, void util_fill_rect(ubyte * dst, - const struct pipe_format_block *block, + enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, @@ -102,23 +106,26 @@ util_fill_rect(ubyte * dst, { unsigned i, j; unsigned width_size; + int blocksize = util_format_get_blocksize(format); + int blockwidth = util_format_get_blockwidth(format); + int blockheight = util_format_get_blockheight(format); - assert(block->size > 0); - assert(block->width > 0); - assert(block->height > 0); + assert(blocksize > 0); + assert(blockwidth > 0); + assert(blockheight > 0); assert(dst_x >= 0); assert(dst_y >= 0); - dst_x /= block->width; - dst_y /= block->height; - width = (width + block->width - 1)/block->width; - height = (height + block->height - 1)/block->height; + dst_x /= blockwidth; + dst_y /= blockheight; + width = (width + blockwidth - 1)/blockwidth; + height = (height + blockheight - 1)/blockheight; - dst += dst_x * block->size; + dst += dst_x * blocksize; dst += dst_y * dst_stride; - width_size = width * block->size; + width_size = width * blocksize; - switch (block->size) { + switch (blocksize) { case 1: if(dst_stride == width_size) memset(dst, (ubyte) value, height * width_size); @@ -172,10 +179,15 @@ util_surface_copy(struct pipe_context *pipe, struct pipe_transfer *src_trans, *dst_trans; void *dst_map; const void *src_map; + enum pipe_format src_format, dst_format; assert(src->texture && dst->texture); if (!src->texture || !dst->texture) return; + + src_format = src->texture->format; + dst_format = dst->texture->format; + src_trans = screen->get_tex_transfer(screen, src->texture, src->face, @@ -192,9 +204,9 @@ util_surface_copy(struct pipe_context *pipe, PIPE_TRANSFER_WRITE, dst_x, dst_y, w, h); - assert(dst_trans->block.size == src_trans->block.size); - assert(dst_trans->block.width == src_trans->block.width); - assert(dst_trans->block.height == src_trans->block.height); + assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format)); + assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format)); + assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format)); src_map = pipe->screen->transfer_map(screen, src_trans); dst_map = pipe->screen->transfer_map(screen, dst_trans); @@ -205,7 +217,7 @@ util_surface_copy(struct pipe_context *pipe, if (src_map && dst_map) { /* If do_flip, invert src_y position and pass negative src stride */ util_copy_rect(dst_map, - &dst_trans->block, + dst_format, dst_trans->stride, 0, 0, w, h, @@ -259,11 +271,11 @@ util_surface_fill(struct pipe_context *pipe, if (dst_map) { assert(dst_trans->stride > 0); - switch (dst_trans->block.size) { + switch (util_format_get_blocksize(dst_trans->texture->format)) { case 1: case 2: case 4: - util_fill_rect(dst_map, &dst_trans->block, dst_trans->stride, + util_fill_rect(dst_map, dst_trans->texture->format, dst_trans->stride, 0, 0, width, height, value); break; case 8: diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h index daa50834d36..5e444ffae21 100644 --- a/src/gallium/auxiliary/util/u_rect.h +++ b/src/gallium/auxiliary/util/u_rect.h @@ -42,13 +42,13 @@ struct pipe_surface; extern void -util_copy_rect(ubyte * dst, const struct pipe_format_block *block, +util_copy_rect(ubyte * dst, enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, const ubyte * src, int src_stride, unsigned src_x, int src_y); extern void -util_fill_rect(ubyte * dst, const struct pipe_format_block *block, +util_fill_rect(ubyte * dst, enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, uint32_t value); diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 1c8b157d91f..b751e29ab62 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -2,6 +2,7 @@ * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -30,6 +31,7 @@ * Simple vertex/fragment shader generators. * * @author Brian Paul + Marek Olšák */ @@ -42,13 +44,15 @@ /** * Make simple vertex pass-through shader. + * \param num_attribs number of attributes to pass through + * \param semantic_names array of semantic names for each attribute + * \param semantic_indexes array of semantic indexes for each attribute */ void * util_make_vertex_passthrough_shader(struct pipe_context *pipe, uint num_attribs, const uint *semantic_names, const uint *semantic_indexes) - { struct ureg_program *ureg; uint i; @@ -76,8 +80,6 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, } - - /** * Make simple fragment texture shader: * IMM {0,0,0,1} // (if writemask != 0xf) @@ -87,6 +89,7 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, */ void * util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, + unsigned tex_target, unsigned writemask ) { struct ureg_program *ureg; @@ -116,20 +119,71 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, ureg_TEX( ureg, ureg_writemask(out, writemask), - TGSI_TEXTURE_2D, tex, sampler ); + tex_target, tex, sampler ); ureg_END( ureg ); return ureg_create_shader_and_destroy( ureg, pipe ); } + +/** + * Make a simple fragment shader that sets the output color to a color + * taken from a texture. + * \param tex_target one of PIPE_TEXTURE_x + */ void * -util_make_fragment_tex_shader(struct pipe_context *pipe ) +util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target ) { return util_make_fragment_tex_shader_writemask( pipe, + tex_target, TGSI_WRITEMASK_XYZW ); } +/** + * Make a simple fragment texture shader which reads an X component from + * a texture and writes it as depth. + */ +void * +util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe, + unsigned tex_target) +{ + struct ureg_program *ureg; + struct ureg_src sampler; + struct ureg_src tex; + struct ureg_dst out, depth; + struct ureg_src imm; + + ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); + if (ureg == NULL) + return NULL; + + sampler = ureg_DECL_sampler( ureg, 0 ); + + tex = ureg_DECL_fs_input( ureg, + TGSI_SEMANTIC_GENERIC, 0, + TGSI_INTERPOLATE_PERSPECTIVE ); + + out = ureg_DECL_output( ureg, + TGSI_SEMANTIC_COLOR, + 0 ); + + depth = ureg_DECL_output( ureg, + TGSI_SEMANTIC_POSITION, + 0 ); + + imm = ureg_imm4f( ureg, 0, 0, 0, 1 ); + + ureg_MOV( ureg, out, imm ); + + ureg_TEX( ureg, + ureg_writemask(depth, TGSI_WRITEMASK_Z), + tex_target, tex, sampler ); + ureg_END( ureg ); + + return ureg_create_shader_and_destroy( ureg, pipe ); +} + /** * Make simple fragment color pass-through shader. @@ -137,9 +191,22 @@ util_make_fragment_tex_shader(struct pipe_context *pipe ) void * util_make_fragment_passthrough_shader(struct pipe_context *pipe) { + return util_make_fragment_clonecolor_shader(pipe, 1); +} + + +/** + * Make a fragment shader that copies the input color to N output colors. + */ +void * +util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs) +{ struct ureg_program *ureg; struct ureg_src src; - struct ureg_dst dst; + struct ureg_dst dst[PIPE_MAX_COLOR_BUFS]; + int i; + + assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) @@ -148,12 +215,13 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe) src = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE ); - dst = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ); + for (i = 0; i < num_cbufs; i++) + dst[i] = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, i ); + + for (i = 0; i < num_cbufs; i++) + ureg_MOV( ureg, dst[i], src ); - ureg_MOV( ureg, dst, src ); ureg_END( ureg ); return ureg_create_shader_and_destroy( ureg, pipe ); } - - diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index d2e80d6eb4d..6e760942e25 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -51,16 +51,25 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, extern void * util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, - unsigned writemask ); + unsigned tex_target, + unsigned writemask); extern void * -util_make_fragment_tex_shader(struct pipe_context *pipe); +util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target); + + +extern void * +util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe, + unsigned tex_target); extern void * util_make_fragment_passthrough_shader(struct pipe_context *pipe); +extern void * +util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs); + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 85e443204e3..35c49782043 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -36,6 +36,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" +#include "util/u_format.h" #include "util/u_surface.h" @@ -79,10 +80,9 @@ util_create_rgba_surface(struct pipe_screen *screen, templ.target = target; templ.format = format; templ.last_level = 0; - templ.width[0] = width; - templ.height[0] = height; - templ.depth[0] = 1; - pf_get_block(format, &templ.block); + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; templ.tex_usage = usage; *textureOut = screen->texture_create(screen, &templ); diff --git a/src/gallium/auxiliary/util/u_texture.c b/src/gallium/auxiliary/util/u_texture.c new file mode 100644 index 00000000000..cd477ab640f --- /dev/null +++ b/src/gallium/auxiliary/util/u_texture.c @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. + * Copyright 2009 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture mapping utility functions. + * + * @author Brian Paul + * Marek Olšák + */ + +#include "pipe/p_defines.h" + +#include "util/u_texture.h" + +void util_map_texcoords2d_onto_cubemap(unsigned face, + const float *in_st, unsigned in_stride, + float *out_str, unsigned out_stride) +{ + int i; + float rx, ry, rz; + + /* loop over quad verts */ + for (i = 0; i < 4; i++) { + /* Compute sc = +/-scale and tc = +/-scale. + * Not +/-1 to avoid cube face selection ambiguity near the edges, + * though that can still sometimes happen with this scale factor... + */ + const float scale = 0.9999f; + const float sc = (2 * in_st[0] - 1) * scale; + const float tc = (2 * in_st[1] - 1) * scale; + + switch (face) { + case PIPE_TEX_FACE_POS_X: + rx = 1; + ry = -tc; + rz = -sc; + break; + case PIPE_TEX_FACE_NEG_X: + rx = -1; + ry = -tc; + rz = sc; + break; + case PIPE_TEX_FACE_POS_Y: + rx = sc; + ry = 1; + rz = tc; + break; + case PIPE_TEX_FACE_NEG_Y: + rx = sc; + ry = -1; + rz = -tc; + break; + case PIPE_TEX_FACE_POS_Z: + rx = sc; + ry = -tc; + rz = 1; + break; + case PIPE_TEX_FACE_NEG_Z: + rx = -sc; + ry = -tc; + rz = -1; + break; + default: + rx = ry = rz = 0; + assert(0); + } + + out_str[0] = rx; /*s*/ + out_str[1] = ry; /*t*/ + out_str[2] = rz; /*r*/ + + in_st += in_stride; + out_str += out_stride; + } +} diff --git a/src/gallium/auxiliary/util/u_texture.h b/src/gallium/auxiliary/util/u_texture.h new file mode 100644 index 00000000000..93b2f1e4c97 --- /dev/null +++ b/src/gallium/auxiliary/util/u_texture.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2009 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_TEXTURE_H +#define U_TEXTURE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Convert 2D texture coordinates of 4 vertices into cubemap coordinates + * in the given face. + * Coordinates must be in the range [0,1]. + * + * \param face Cubemap face. + * \param in_st 4 pairs of 2D texture coordinates to convert. + * \param in_stride Stride of in_st in floats. + * \param out_str STR cubemap texture coordinates to compute. + * \param out_stride Stride of out_str in floats. + */ +void util_map_texcoords2d_onto_cubemap(unsigned face, + const float *in_st, unsigned in_stride, + float *out_str, unsigned out_stride); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 8a22f584bee..5b8dd1abb94 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_rect.h" @@ -52,7 +53,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt, const void *src; if (dst_stride == 0) - dst_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size; + dst_stride = util_format_get_stride(pt->texture->format, w); if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -62,7 +63,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt, if(!src) return; - util_copy_rect(dst, &pt->block, dst_stride, 0, 0, w, h, src, pt->stride, x, y); + util_copy_rect(dst, pt->texture->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y); screen->transfer_unmap(screen, pt); } @@ -78,9 +79,10 @@ pipe_put_tile_raw(struct pipe_transfer *pt, { struct pipe_screen *screen = pt->texture->screen; void *dst; + enum pipe_format format = pt->texture->format; if (src_stride == 0) - src_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size; + src_stride = util_format_get_stride(format, w); if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -90,7 +92,7 @@ pipe_put_tile_raw(struct pipe_transfer *pt, if(!dst) return; - util_copy_rect(dst, &pt->block, pt->stride, x, y, w, h, src, src_stride, 0, 0); + util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0); screen->transfer_unmap(screen, pt); } @@ -246,6 +248,53 @@ b8g8r8a8_put_tile_rgba(unsigned *dst, } +/*** PIPE_FORMAT_R8G8B8A8_UNORM ***/ + +static void +r8g8b8a8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 24) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 0) & 0xff); + } + p += dst_stride; + } +} + + +static void +r8g8b8a8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (r << 24) | (g << 16) | (b << 8) | a; + } + p += src_stride; + } +} + + /*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ static void @@ -1143,6 +1192,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_B8G8R8A8_UNORM: b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + r8g8b8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_A1R5G5B5_UNORM: a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; @@ -1219,21 +1271,22 @@ pipe_get_tile_rgba(struct pipe_transfer *pt, { unsigned dst_stride = w * 4; void *packed; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); + packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format)); if (!packed) return; - if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV) + if(format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV) assert((x & 1) == 0); pipe_get_tile_raw(pt, x, y, w, h, packed, 0); - pipe_tile_raw_to_rgba(pt->format, packed, w, h, p, dst_stride); + pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); FREE(packed); } @@ -1246,16 +1299,17 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, { unsigned src_stride = w * 4; void *packed; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); + packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format)); if (!packed) return; - switch (pt->format) { + switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; @@ -1265,6 +1319,9 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, case PIPE_FORMAT_B8G8R8A8_UNORM: b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + r8g8b8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; case PIPE_FORMAT_A1R5G5B5_UNORM: a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; @@ -1274,9 +1331,6 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, case PIPE_FORMAT_R8G8B8_UNORM: r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - assert(0); - break; case PIPE_FORMAT_A4R4G4B4_UNORM: a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; @@ -1322,7 +1376,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format)); + debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); } pipe_put_tile_raw(pt, x, y, w, h, packed, 0); @@ -1344,6 +1398,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, ubyte *map; uint *pDest = z; uint i, j; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -1354,7 +1409,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, return; } - switch (pt->format) { + switch (format) { case PIPE_FORMAT_Z32_UNORM: { const uint *ptrc @@ -1428,6 +1483,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, const uint *ptrc = zSrc; ubyte *map; uint i, j; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -1438,7 +1494,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, return; } - switch (pt->format) { + switch (format) { case PIPE_FORMAT_Z32_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h index 745b5834af6..e158bed9d04 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.h +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -32,6 +32,8 @@ #ifndef U_UPLOAD_MGR_H #define U_UPLOAD_MGR_H +#include "pipe/p_defines.h" + struct pipe_screen; struct pipe_buffer; struct u_upload_mgr; diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile deleted file mode 100644 index 4314c1e8d69..00000000000 --- a/src/gallium/auxiliary/vl/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = vl - -C_SOURCES = \ - vl_bitstream_parser.c \ - vl_mpeg12_mc_renderer.c \ - vl_compositor.c \ - vl_csc.c \ - vl_shader_build.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript deleted file mode 100644 index aed69f5efed..00000000000 --- a/src/gallium/auxiliary/vl/SConscript +++ /dev/null @@ -1,13 +0,0 @@ -Import('*') - -vl = env.ConvenienceLibrary( - target = 'vl', - source = [ - 'vl_bitstream_parser.c', - 'vl_mpeg12_mc_renderer.c', - 'vl_compositor.c', - 'vl_csc.c', - 'vl_shader_build.c', - ]) - -auxiliaries.insert(0, vl) diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index cda6dc134a0..fc2a1c59a6b 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -95,12 +95,11 @@ create_vert_shader(struct vl_compositor *c) assert(c); tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - header = (struct tgsi_header*)&tokens[1]; + header = (struct tgsi_header*)&tokens[0]; *header = tgsi_build_header(); - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + *(struct tgsi_processor*)&tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - ti = 3; + ti = 2; /* * decl i0 ; Vertex pos @@ -172,12 +171,11 @@ create_frag_shader(struct vl_compositor *c) assert(c); tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - header = (struct tgsi_header*)&tokens[1]; + header = (struct tgsi_header*)&tokens[0]; *header = tgsi_build_header(); - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + *(struct tgsi_processor*)&tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - ti = 3; + ti = 2; /* decl i0 ; Texcoords for s0 */ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); @@ -213,7 +211,7 @@ create_frag_shader(struct vl_compositor *c) */ for (i = 0; i < 4; ++i) { inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -455,8 +453,8 @@ void vl_compositor_render(struct vl_compositor *compositor, assert(dst_area); assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME); - compositor->fb_state.width = dst_surface->width[0]; - compositor->fb_state.height = dst_surface->height[0]; + compositor->fb_state.width = dst_surface->width0; + compositor->fb_state.height = dst_surface->height0; compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface ( compositor->pipe->screen, @@ -479,8 +477,8 @@ void vl_compositor_render(struct vl_compositor *compositor, compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state); compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport); compositor->pipe->set_scissor_state(compositor->pipe, &compositor->scissor); - compositor->pipe->bind_sampler_states(compositor->pipe, 1, &compositor->sampler); - compositor->pipe->set_sampler_textures(compositor->pipe, 1, &src_surface); + compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 1, &compositor->sampler); + compositor->pipe->set_fragment_sampler_textures(compositor->pipe, 1, &src_surface); compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader); compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader); compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs); @@ -504,12 +502,12 @@ void vl_compositor_render(struct vl_compositor *compositor, vs_consts->dst_trans.z = 0; vs_consts->dst_trans.w = 0; - vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0]; - vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0]; + vs_consts->src_scale.x = src_area->w / (float)src_surface->width0; + vs_consts->src_scale.y = src_area->h / (float)src_surface->height0; vs_consts->src_scale.z = 1; vs_consts->src_scale.w = 1; - vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0]; - vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0]; + vs_consts->src_trans.x = src_area->x / (float)src_surface->width0; + vs_consts->src_trans.y = src_area->y / (float)src_surface->height0; vs_consts->src_trans.z = 0; vs_consts->src_trans.w = 0; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index bbe0d5fa4c9..caf581aca60 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -29,6 +29,7 @@ #include <assert.h> #include <pipe/p_context.h> #include <pipe/p_inlines.h> +#include <util/u_format.h> #include <util/u_math.h> #include <util/u_memory.h> #include <tgsi/tgsi_parse.h> @@ -115,12 +116,11 @@ create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r) assert(r); tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); - header = (struct tgsi_header *) &tokens[1]; + header = (struct tgsi_header *) &tokens[0]; *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - ti = 3; + ti = 2; /* * decl i0 ; Vertex pos @@ -185,12 +185,11 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) assert(r); tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); - header = (struct tgsi_header *) &tokens[1]; + header = (struct tgsi_header *) &tokens[0]; *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - ti = 3; + ti = 2; /* * decl i0 ; Luma texcoords @@ -237,10 +236,10 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -278,12 +277,11 @@ create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) assert(r); tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); - header = (struct tgsi_header *) &tokens[1]; + header = (struct tgsi_header *) &tokens[0]; *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - ti = 3; + ti = 2; /* * decl i0 ; Vertex pos @@ -363,12 +361,11 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) assert(r); tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); - header = (struct tgsi_header *) &tokens[1]; + header = (struct tgsi_header *) &tokens[0]; *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - ti = 3; + ti = 2; /* * decl i0 ; Luma texcoords @@ -417,10 +414,10 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -474,12 +471,11 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) assert(r); tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); - header = (struct tgsi_header *) &tokens[1]; + header = (struct tgsi_header *) &tokens[0]; *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - ti = 3; + ti = 2; /* * decl i0 ; Vertex pos @@ -567,12 +563,11 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) assert(r); tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); - header = (struct tgsi_header *) &tokens[1]; + header = (struct tgsi_header *) &tokens[0]; *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - ti = 3; + ti = 2; /* * decl i0 ; Luma texcoords @@ -626,10 +621,10 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -648,10 +643,10 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ @@ -689,7 +684,7 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) ( r->pipe->screen, r->textures.all[i], 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, - r->textures.all[i]->width[0], r->textures.all[i]->height[0] + r->textures.all[i]->width0, r->textures.all[i]->height0 ); r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]); @@ -843,26 +838,25 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */ template.format = PIPE_FORMAT_R16_SNORM; template.last_level = 0; - template.width[0] = r->pot_buffers ? + template.width0 = r->pot_buffers ? util_next_power_of_two(r->picture_width) : r->picture_width; - template.height[0] = r->pot_buffers ? + template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height) : r->picture_height; - template.depth[0] = 1; - pf_get_block(template.format, &template.block); + template.depth0 = 1; template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template); if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { - template.width[0] = r->pot_buffers ? + template.width0 = r->pot_buffers ? util_next_power_of_two(r->picture_width / 2) : r->picture_width / 2; - template.height[0] = r->pot_buffers ? + template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height / 2) : r->picture_height / 2; } else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) - template.height[0] = r->pot_buffers ? + template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height / 2) : r->picture_height / 2; @@ -1294,8 +1288,8 @@ flush(struct vl_mpeg12_mc_renderer *r) PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); - vs_consts->denorm.x = r->surface->width[0]; - vs_consts->denorm.y = r->surface->height[0]; + vs_consts->denorm.x = r->surface->width0; + vs_consts->denorm.y = r->surface->height0; pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer); @@ -1307,8 +1301,8 @@ flush(struct vl_mpeg12_mc_renderer *r) if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) { r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems); - r->pipe->set_sampler_textures(r->pipe, 3, r->textures.all); - r->pipe->bind_sampler_states(r->pipe, 3, r->samplers.all); + r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all); r->pipe->bind_vs_state(r->pipe, r->i_vs); r->pipe->bind_fs_state(r->pipe, r->i_fs); @@ -1321,8 +1315,8 @@ flush(struct vl_mpeg12_mc_renderer *r) r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); r->textures.individual.ref[0] = r->past; - r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); - r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); @@ -1335,8 +1329,8 @@ flush(struct vl_mpeg12_mc_renderer *r) r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); r->textures.individual.ref[0] = r->past; - r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); - r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); @@ -1349,8 +1343,8 @@ flush(struct vl_mpeg12_mc_renderer *r) r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); r->textures.individual.ref[0] = r->future; - r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); - r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); @@ -1363,8 +1357,8 @@ flush(struct vl_mpeg12_mc_renderer *r) r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); r->textures.individual.ref[0] = r->future; - r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); - r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); @@ -1378,8 +1372,8 @@ flush(struct vl_mpeg12_mc_renderer *r) r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); r->textures.individual.ref[0] = r->past; r->textures.individual.ref[1] = r->future; - r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all); - r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all); + r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all); r->pipe->bind_vs_state(r->pipe, r->b_vs[0]); r->pipe->bind_fs_state(r->pipe, r->b_fs[0]); @@ -1393,8 +1387,8 @@ flush(struct vl_mpeg12_mc_renderer *r) r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); r->textures.individual.ref[0] = r->past; r->textures.individual.ref[1] = r->future; - r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all); - r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all); + r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all); r->pipe->bind_vs_state(r->pipe, r->b_vs[1]); r->pipe->bind_fs_state(r->pipe, r->b_fs[1]); @@ -1461,7 +1455,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, assert(r); assert(blocks); - tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size; + tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->texture->format); texels = r->texels[0] + mbpy * tex_pitch + mbpx; for (y = 0; y < 2; ++y) { @@ -1500,7 +1494,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, mbpy /= 2; for (tb = 0; tb < 2; ++tb) { - tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size; + tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->texture->format); texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx; if ((cbp >> (1 - tb)) & 1) { @@ -1644,8 +1638,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer renderer->past = past; renderer->future = future; renderer->fence = fence; - renderer->surface_tex_inv_size.x = 1.0f / surface->width[0]; - renderer->surface_tex_inv_size.y = 1.0f / surface->height[0]; + renderer->surface_tex_inv_size.x = 1.0f / surface->width0; + renderer->surface_tex_inv_size.y = 1.0f / surface->height0; } while (num_macroblocks) { diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c index faa20a903cd..d011ef97bd6 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -36,10 +36,10 @@ struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = name; - decl.Semantic.SemanticIndex = index; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Semantic.Name = name; + decl.Semantic.Index = index; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -64,11 +64,11 @@ struct tgsi_full_declaration vl_decl_interpolated_input decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = name; - decl.Semantic.SemanticIndex = index; + decl.Semantic.Name = name; + decl.Semantic.Index = index; decl.Declaration.Interpolate = interpolation;; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -79,10 +79,10 @@ struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int i decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = name; - decl.Semantic.SemanticIndex = index; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Semantic.Name = name; + decl.Semantic.Index = index; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -93,10 +93,10 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde decl.Declaration.File = TGSI_FILE_OUTPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = name; - decl.Semantic.SemanticIndex = index; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Semantic.Name = name; + decl.Semantic.Index = index; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -107,8 +107,8 @@ struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -119,8 +119,8 @@ struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int l decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -138,11 +138,11 @@ struct tgsi_full_instruction vl_inst2 inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = dst_file; - inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = src_file; - inst.FullSrcRegisters[0].SrcRegister.Index = src_index; + inst.Src[0].Register.File = src_file; + inst.Src[0].Register.Index = src_index; return inst; } @@ -162,13 +162,13 @@ struct tgsi_full_instruction vl_inst3 inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = dst_file; - inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = src1_file; - inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; - inst.FullSrcRegisters[1].SrcRegister.File = src2_file; - inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.Src[0].Register.File = src1_file; + inst.Src[0].Register.Index = src1_index; + inst.Src[1].Register.File = src2_file; + inst.Src[1].Register.Index = src2_index; return inst; } @@ -188,14 +188,15 @@ struct tgsi_full_instruction vl_tex inst.Instruction.Opcode = TGSI_OPCODE_TEX; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = dst_file; - inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = tex; - inst.FullSrcRegisters[0].SrcRegister.File = src1_file; - inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; - inst.FullSrcRegisters[1].SrcRegister.File = src2_file; - inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.Instruction.Texture = 1; + inst.Texture.Texture = tex; + inst.Src[0].Register.File = src1_file; + inst.Src[0].Register.Index = src1_index; + inst.Src[1].Register.File = src2_file; + inst.Src[1].Register.Index = src2_index; return inst; } @@ -217,15 +218,15 @@ struct tgsi_full_instruction vl_inst4 inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = dst_file; - inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = src1_file; - inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; - inst.FullSrcRegisters[1].SrcRegister.File = src2_file; - inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; - inst.FullSrcRegisters[2].SrcRegister.File = src3_file; - inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; + inst.Src[0].Register.File = src1_file; + inst.Src[0].Register.Index = src1_index; + inst.Src[1].Register.File = src2_file; + inst.Src[1].Register.Index = src2_index; + inst.Src[2].Register.File = src3_file; + inst.Src[2].Register.Index = src3_index; return inst; } diff --git a/src/gallium/docs/Makefile b/src/gallium/docs/Makefile new file mode 100644 index 00000000000..d4a5be41922 --- /dev/null +++ b/src/gallium/docs/Makefile @@ -0,0 +1,89 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Gallium.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Gallium.qhc" + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/src/gallium/docs/make.bat b/src/gallium/docs/make.bat new file mode 100644 index 00000000000..6f97e0730a7 --- /dev/null +++ b/src/gallium/docs/make.bat @@ -0,0 +1,113 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +set SPHINXBUILD=sphinx-build +set BUILDDIR=build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^<target^>` where ^<target^> is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Gallium.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Gallium.ghc + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py new file mode 100644 index 00000000000..9b0c86babdb --- /dev/null +++ b/src/gallium/docs/source/conf.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# +# Gallium documentation build configuration file, created by +# sphinx-quickstart on Sun Dec 20 14:09:05 2009. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.append(os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.pngmath'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Gallium' +copyright = u'2009, VMWare, X.org, Nouveau' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.3' +# The full version, including alpha/beta/rc tags. +release = '0.3' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# The language for highlighting source code. +highlight_language = 'c' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. Major themes that come with +# Sphinx are currently 'default' and 'sphinxdoc'. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_use_modindex = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Galliumdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'Gallium.tex', u'Gallium Documentation', + u'VMWare, X.org, Nouveau', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst new file mode 100644 index 00000000000..21f5f9111a0 --- /dev/null +++ b/src/gallium/docs/source/context.rst @@ -0,0 +1,128 @@ +Context +======= + +The context object represents the purest, most directly accessible, abilities +of the device's 3D rendering pipeline. + +Methods +------- + +CSO State +^^^^^^^^^ + +All CSO state is created, bound, and destroyed, with triplets of methods that +all follow a specific naming scheme. For example, ``create_blend_state``, +``bind_blend_state``, and ``destroy_blend_state``. + +CSO objects handled by the context object: + +* :ref:`Blend`: ``*_blend_state`` +* :ref:`Sampler`: These are special; they can be bound to either vertex or + fragment samplers, and they are bound in groups. + ``bind_fragment_sampler_states``, ``bind_vertex_sampler_states`` +* :ref:`Rasterizer`: ``*_rasterizer_state`` +* :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state`` +* :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for + fragment shaders, and ``*_vs_state`` is for vertex shaders. + + +Resource Binding State +^^^^^^^^^^^^^^^^^^^^^^ + +This state describes how resources in various flavours (textures, +buffers, surfaces) are bound to the driver. + + +* ``set_constant_buffer`` +* ``set_framebuffer_state`` +* ``set_fragment_sampler_textures`` +* ``set_vertex_sampler_textures`` +* ``set_vertex_buffers`` + + +Non-CSO State +^^^^^^^^^^^^^ + +These pieces of state are too small, variable, and/or trivial to have CSO +objects. They all follow simple, one-method binding calls, e.g. +``set_edgeflags``. + +* ``set_edgeflags`` +* ``set_blend_color`` +* ``set_clip_state`` +* ``set_polygon_stipple`` +* ``set_scissor_state`` +* ``set_viewport_state`` +* ``set_vertex_elements`` + + +Clearing +^^^^^^^^ + +``clear`` initializes some or all of the surfaces currently bound to +the framebuffer to particular RGBA, depth, or stencil values. + +Clear is one of the most difficult concepts to nail down to a single +interface and it seems likely that we will want to add additional +clear paths, for instance clearing surfaces not bound to the +framebuffer, or read-modify-write clears such as depth-only or +stencil-only clears of packed depth-stencil buffers. + + +Drawing +^^^^^^^ + +``draw_arrays`` + +``draw_elements`` + +``draw_range_elements`` + + +Queries +^^^^^^^ + +Queries gather some statistic from the 3D pipeline over one or more +draws. Queries may be nested, though no state tracker currently +exercises this. + +Queries can be created with ``create_query`` and deleted with +``destroy_query``. To enable a query, use ``begin_query``, and when finished, +use ``end_query`` to stop the query. Finally, ``get_query_result`` is used +to retrieve the results. + +Flushing +^^^^^^^^ + +``flush`` + + +Resource Busy Queries +^^^^^^^^^^^^^^^^^^^^^ + +``is_texture_referenced`` + +``is_buffer_referenced`` + + + +Blitting +^^^^^^^^ + +These methods emulate classic blitter controls. They are not guaranteed to be +available; if they are set to NULL, then they are not present. + +These methods operate directly on ``pipe_surface`` objects, and stand +apart from any 3D state in the context. Blitting functionality may be +moved to a separate abstraction at some point in the future. + +``surface_fill`` performs a fill operation on a section of a surface. + +``surface_copy`` blits a region of a surface to a region of another surface, +provided that both surfaces are the same format. The source and destination +may be the same surface, and overlapping blits are permitted. + +The interfaces to these calls are likely to change to make it easier +for a driver to batch multiple blits with the same source and +destination. + diff --git a/src/gallium/docs/source/cso.rst b/src/gallium/docs/source/cso.rst new file mode 100644 index 00000000000..dab1ee50f39 --- /dev/null +++ b/src/gallium/docs/source/cso.rst @@ -0,0 +1,14 @@ +CSO +=== + +CSO, Constant State Objects, are a core part of Gallium's API. + +CSO work on the principle of reusable state; they are created by filling +out a state object with the desired properties, then passing that object +to a context. The context returns an opaque context-specific handle which +can be bound at any time for the desired effect. + +.. toctree:: + :glob: + + cso/* diff --git a/src/gallium/docs/source/cso/blend.rst b/src/gallium/docs/source/cso/blend.rst new file mode 100644 index 00000000000..fd9e4a1e2d5 --- /dev/null +++ b/src/gallium/docs/source/cso/blend.rst @@ -0,0 +1,14 @@ +.. _blend: + +Blend +===== + +This state controls blending of the final fragments into the target rendering +buffers. + +XXX it is unresolved what behavior should result if blend_enable is off. + +Members +------- + +XXX undocumented members diff --git a/src/gallium/docs/source/cso/dsa.rst b/src/gallium/docs/source/cso/dsa.rst new file mode 100644 index 00000000000..12abaa9d6fe --- /dev/null +++ b/src/gallium/docs/source/cso/dsa.rst @@ -0,0 +1,58 @@ +.. _depth,stencil,&alpha: + +Depth, Stencil, & Alpha +======================= + +These three states control the depth, stencil, and alpha tests, used to +discard fragments that have passed through the fragment shader. + +Traditionally, these three tests have been clumped together in hardware, so +they are all stored in one structure. + +During actual execution, the order of operations done on fragments is always: + +* Stencil +* Depth +* Alpha + +Depth Members +------------- + +enabled + Whether the depth test is enabled. +writemask + Whether the depth buffer receives depth writes. +func + The depth test function. One of PIPE_FUNC. + +Stencil Members +--------------- + +XXX document valuemask, writemask + +enabled + Whether the stencil test is enabled. For the second stencil, whether the + two-sided stencil is enabled. +func + The stencil test function. One of PIPE_FUNC. +ref_value + Stencil test reference value; used for certain functions. +fail_op + The operation to carry out if the stencil test fails. One of + PIPE_STENCIL_OP. +zfail_op + The operation to carry out if the stencil test passes but the depth test + fails. One of PIPE_STENCIL_OP. +zpass_op + The operation to carry out if the stencil test and depth test both pass. + One of PIPE_STENCIL_OP. + +Alpha Members +------------- + +enabled + Whether the alpha test is enabled. +func + The alpha test function. One of PIPE_FUNC. +ref_value + Alpha test reference value; used for certain functions. diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst new file mode 100644 index 00000000000..afb58ea375f --- /dev/null +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -0,0 +1,87 @@ +.. _rasterizer: + +Rasterizer +========== + +The rasterizer is the main chunk of state controlling how vertices are +interpolated into fragments. + +Members +------- + +XXX undocumented light_twoside, front_winding, cull_mode, fill_cw, fill_ccw, offset_cw, offset_ccw +XXX moar undocumented poly_smooth, line_stipple_factor, line_last_pixel, offset_units, offset_scale +XXX sprite_coord_mode + +flatshade + If set, the provoking vertex of each polygon is used to determine the + color of the entire polygon. If not set, the color fragments will be + interpolated from each vertex's color. +scissor + Whether the scissor test is enabled. +poly_stipple_enable + Whether polygon stippling is enabled. +point_smooth + Whether points should be smoothed. Point smoothing turns rectangular + points into circles or ovals. +point_sprite + Whether point sprites are enabled. +point_size_per_vertex + Whether vertices have a point size element. +multisample + Whether :ref:`MSAA` is enabled. +line_smooth + Whether lines should be smoothed. Line smoothing is simply anti-aliasing. +line_stipple_enable + Whether line stippling is enabled. +line_stipple_pattern + 16-bit bitfield of on/off flags, used to pattern the line stipple. +bypass_vs_clip_and_viewport + Whether the entire TCL pipeline should be bypassed. This implies that + vertices are pre-transformed for the viewport, and will not be run + through the vertex shader. Note that implementations may still clip away + vertices that are not in the viewport. +flatshade_first + Whether the first vertex should be the provoking vertex, for most + primitives. If not set, the last vertex is the provoking vertex. +gl_rasterization_rules + Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set, + the rasterizer will use (0, 0) for pixel centers. +line_width + The width of lines. +point_size + The size of points, if not specified per-vertex. +point_size_min + The minimum size of points. +point_size_max + The maximum size of points. + +Notes +----- + +flatshade +^^^^^^^^^ + +The actual interpolated shading algorithm is obviously +implementation-dependent, but will usually be Gourard for most hardware. + +bypass_vs_clip_and_viewport +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When set, this implies that vertices are pre-transformed for the viewport, and +will not be run through the vertex shader. Note that implementations may still +clip away vertices that are not visible. + +flatshade_first +^^^^^^^^^^^^^^^ + +There are several important exceptions to the specification of this rule. + +* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first + vertex. If the caller wishes to change the provoking vertex, they merely + need to rotate the vertices themselves. +* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no + effect; the provoking vertex is always the last vertex. +* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the + second vertex, not the first. This permits each segment of the fan to have + a different color. diff --git a/src/gallium/docs/source/cso/sampler.rst b/src/gallium/docs/source/cso/sampler.rst new file mode 100644 index 00000000000..e3f1757f57a --- /dev/null +++ b/src/gallium/docs/source/cso/sampler.rst @@ -0,0 +1,46 @@ +.. _sampler: + +Sampler +======= + +Texture units have many options for selecting texels from loaded textures; +this state controls an individual texture unit's texel-sampling settings. + +Texture coordinates are always treated as four-dimensional, and referred to +with the traditional (S, T, R, Q) notation. + +Members +------- + +XXX undocumented compare_mode, compare_func + +wrap_s + How to wrap the S coordinate. One of PIPE_TEX_WRAP. +wrap_t + How to wrap the T coordinate. One of PIPE_TEX_WRAP. +wrap_r + How to wrap the R coordinate. One of PIPE_TEX_WRAP. +min_img_filter + The filter to use when minifying texels. One of PIPE_TEX_FILTER. +min_mip_filter + The filter to use when minifying mipmapped textures. One of + PIPE_TEX_FILTER. +mag_img_filter + The filter to use when magnifying texels. One of PIPE_TEX_FILTER. +normalized_coords + Whether the texture coordinates are normalized. If normalized, they will + always be in [0, 1]. If not, they will be in the range of each dimension + of the loaded texture. +prefilter + XXX From the Doxy, "weird sampling state exposed by some APIs." Refine. +lod_bias + The bias to apply to the level of detail. +min_lod + Minimum level of detail, used to clamp LoD after bias. +max_lod + Maximum level of detail, used to clamp LoD after bias. +border_color + RGBA color used for out-of-bounds coordinates. +max_anisotropy + Maximum filtering to apply anisotropically to textures. Setting this to + 1.0 effectively disables anisotropic filtering. diff --git a/src/gallium/docs/source/cso/shader.rst b/src/gallium/docs/source/cso/shader.rst new file mode 100644 index 00000000000..0ee42c87876 --- /dev/null +++ b/src/gallium/docs/source/cso/shader.rst @@ -0,0 +1,12 @@ +.. _shader: + +Shader +====== + +One of the two types of shaders supported by Gallium. + +Members +------- + +tokens + A list of tgsi_tokens. diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst new file mode 100644 index 00000000000..33e846e33d2 --- /dev/null +++ b/src/gallium/docs/source/distro.rst @@ -0,0 +1,141 @@ +Distribution +============ + +Along with the interface definitions, the following drivers, state trackers, +and auxiliary modules are shipped in the standard Gallium distribution. + +Drivers +------- + +Cell +^^^^ + +Failover +^^^^^^^^ + +Deprecated. + +Intel i915 +^^^^^^^^^^ + +Intel i965 +^^^^^^^^^^ + +Highly experimental. + +Identity +^^^^^^^^ + +Wrapper driver. + +LLVM Softpipe +^^^^^^^^^^^^^ + +nVidia nv04 +^^^^^^^^^^^ + +Deprecated. + +nVidia nv10 +^^^^^^^^^^^ + +Deprecated. + +nVidia nv20 +^^^^^^^^^^^ + +Deprecated. + +nVidia nv30 +^^^^^^^^^^^ + +nVidia nv40 +^^^^^^^^^^^ + +nVidia nv50 +^^^^^^^^^^^ + +VMWare SVGA +^^^^^^^^^^^ + +ATI r300 +^^^^^^^^ + +AMD/ATI r600 +^^^^^^^^^^^^ + +Highly experimental. + +Softpipe +^^^^^^^^ + +Reference software rasterizer. + +Trace +^^^^^ + +Wrapper driver. + +State Trackers +-------------- + +Direct Rendering Infrastructure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +EGL +^^^ + +GLX +^^^ + +MesaGL +^^^^^^ + +Python +^^^^^^ + +OpenVG +^^^^^^ + +WGL +^^^ + +Xorg XFree86 DDX +^^^^^^^^^^^^^^^^ + +Auxiliary +--------- + +CSO Cache +^^^^^^^^^ + +Draw +^^^^ + +Gallivm +^^^^^^^ + +Indices +^^^^^^^ + +Pipe Buffer Manager +^^^^^^^^^^^^^^^^^^^ + +Remote Debugger +^^^^^^^^^^^^^^^ + +Runtime Assembly Emission +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Surface Context Tracker +^^^^^^^^^^^^^^^^^^^^^^^ + +TGSI +^^^^ + +Translate +^^^^^^^^^ + +Util +^^^^ + diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst new file mode 100644 index 00000000000..6a9110ce786 --- /dev/null +++ b/src/gallium/docs/source/glossary.rst @@ -0,0 +1,10 @@ +Glossary +======== + +.. glossary:: + :sorted: + + MSAA + Multi-Sampled Anti-Aliasing. A basic anti-aliasing technique that takes + multiple samples of the depth buffer, and uses this information to + smooth the edges of polygons. diff --git a/src/gallium/docs/source/index.rst b/src/gallium/docs/source/index.rst new file mode 100644 index 00000000000..54bc883fced --- /dev/null +++ b/src/gallium/docs/source/index.rst @@ -0,0 +1,28 @@ +.. Gallium documentation master file, created by + sphinx-quickstart on Sun Dec 20 14:09:05 2009. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Gallium's documentation! +=================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + tgsi + screen + context + cso + distro + glossary + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/src/gallium/docs/source/intro.rst b/src/gallium/docs/source/intro.rst new file mode 100644 index 00000000000..1ea103840a2 --- /dev/null +++ b/src/gallium/docs/source/intro.rst @@ -0,0 +1,9 @@ +Introduction +============ + +What is Gallium? +---------------- + +Gallium is essentially an API for writing graphics drivers in a largely +device-agnostic fashion. It provides several objects which encapsulate the +core services of graphics hardware in a straightforward manner. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst new file mode 100644 index 00000000000..9631e6967ef --- /dev/null +++ b/src/gallium/docs/source/screen.rst @@ -0,0 +1,39 @@ +Screen +====== + +A screen is an object representing the context-independent part of a device. + +Methods +------- + +XXX moar; got bored + +get_name +^^^^^^^^ + +Returns an identifying name for the screen. + +get_vendor +^^^^^^^^^^ + +Returns the screen vendor. + +get_param +^^^^^^^^^ + +Get an integer/boolean screen parameter. + +get_paramf +^^^^^^^^^^ + +Get a floating-point screen parameter. + +is_format_supported +^^^^^^^^^^^^^^^^^^^ + +See if a format can be used in a specific manner. + +texture_create +^^^^^^^^^^^^^^ + +Given a template of texture setup, create a BO-backed texture. diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst new file mode 100644 index 00000000000..ebee4902b05 --- /dev/null +++ b/src/gallium/docs/source/tgsi.rst @@ -0,0 +1,1270 @@ +TGSI +==== + +TGSI, Tungsten Graphics Shader Infrastructure, is an intermediate language +for describing shaders. Since Gallium is inherently shaderful, shaders are +an important part of the API. TGSI is the only intermediate representation +used by all drivers. + +Instruction Set +--------------- + +From GL_NV_vertex_program +^^^^^^^^^^^^^^^^^^^^^^^^^ + + +ARL - Address Register Load + +.. math:: + + dst.x = \lfloor src.x\rfloor + + dst.y = \lfloor src.y\rfloor + + dst.z = \lfloor src.z\rfloor + + dst.w = \lfloor src.w\rfloor + + +MOV - Move + +.. math:: + + dst.x = src.x + + dst.y = src.y + + dst.z = src.z + + dst.w = src.w + + +LIT - Light Coefficients + +.. math:: + + dst.x = 1 + + dst.y = max(src.x, 0) + + dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 + + dst.w = 1 + + +RCP - Reciprocal + +.. math:: + + dst.x = \frac{1}{src.x} + + dst.y = \frac{1}{src.x} + + dst.z = \frac{1}{src.x} + + dst.w = \frac{1}{src.x} + + +RSQ - Reciprocal Square Root + +.. math:: + + dst.x = \frac{1}{\sqrt{|src.x|}} + + dst.y = \frac{1}{\sqrt{|src.x|}} + + dst.z = \frac{1}{\sqrt{|src.x|}} + + dst.w = \frac{1}{\sqrt{|src.x|}} + + +EXP - Approximate Exponential Base 2 + +.. math:: + + dst.x = 2^{\lfloor src.x\rfloor} + + dst.y = src.x - \lfloor src.x\rfloor + + dst.z = 2^{src.x} + + dst.w = 1 + + +LOG - Approximate Logarithm Base 2 + +.. math:: + + dst.x = \lfloor\log_2{|src.x|}\rfloor + + dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} + + dst.z = \log_2{|src.x|} + + dst.w = 1 + + +MUL - Multiply + +.. math:: + + dst.x = src0.x \times src1.x + + dst.y = src0.y \times src1.y + + dst.z = src0.z \times src1.z + + dst.w = src0.w \times src1.w + + +ADD - Add + +.. math:: + + dst.x = src0.x + src1.x + + dst.y = src0.y + src1.y + + dst.z = src0.z + src1.z + + dst.w = src0.w + src1.w + + +DP3 - 3-component Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + +DP4 - 4-component Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + +DST - Distance Vector + +.. math:: + + dst.x = 1 + + dst.y = src0.y \times src1.y + + dst.z = src0.z + + dst.w = src1.w + + +MIN - Minimum + +.. math:: + + dst.x = min(src0.x, src1.x) + + dst.y = min(src0.y, src1.y) + + dst.z = min(src0.z, src1.z) + + dst.w = min(src0.w, src1.w) + + +MAX - Maximum + +.. math:: + + dst.x = max(src0.x, src1.x) + + dst.y = max(src0.y, src1.y) + + dst.z = max(src0.z, src1.z) + + dst.w = max(src0.w, src1.w) + + +SLT - Set On Less Than + +.. math:: + + dst.x = (src0.x < src1.x) ? 1 : 0 + + dst.y = (src0.y < src1.y) ? 1 : 0 + + dst.z = (src0.z < src1.z) ? 1 : 0 + + dst.w = (src0.w < src1.w) ? 1 : 0 + + +SGE - Set On Greater Equal Than + +.. math:: + + dst.x = (src0.x >= src1.x) ? 1 : 0 + + dst.y = (src0.y >= src1.y) ? 1 : 0 + + dst.z = (src0.z >= src1.z) ? 1 : 0 + + dst.w = (src0.w >= src1.w) ? 1 : 0 + + +MAD - Multiply And Add + +.. math:: + + dst.x = src0.x \times src1.x + src2.x + + dst.y = src0.y \times src1.y + src2.y + + dst.z = src0.z \times src1.z + src2.z + + dst.w = src0.w \times src1.w + src2.w + + +SUB - Subtract + +.. math:: + + dst.x = src0.x - src1.x + + dst.y = src0.y - src1.y + + dst.z = src0.z - src1.z + + dst.w = src0.w - src1.w + + +LRP - Linear Interpolate + +.. math:: + + dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x + + dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y + + dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z + + dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w + + +CND - Condition + +.. math:: + + dst.x = (src2.x > 0.5) ? src0.x : src1.x + + dst.y = (src2.y > 0.5) ? src0.y : src1.y + + dst.z = (src2.z > 0.5) ? src0.z : src1.z + + dst.w = (src2.w > 0.5) ? src0.w : src1.w + + +DP2A - 2-component Dot Product And Add + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x + + +FRAC - Fraction + +.. math:: + + dst.x = src.x - \lfloor src.x\rfloor + + dst.y = src.y - \lfloor src.y\rfloor + + dst.z = src.z - \lfloor src.z\rfloor + + dst.w = src.w - \lfloor src.w\rfloor + + +CLAMP - Clamp + +.. math:: + + dst.x = clamp(src0.x, src1.x, src2.x) + + dst.y = clamp(src0.y, src1.y, src2.y) + + dst.z = clamp(src0.z, src1.z, src2.z) + + dst.w = clamp(src0.w, src1.w, src2.w) + + +FLR - Floor + +This is identical to ARL. + +.. math:: + + dst.x = \lfloor src.x\rfloor + + dst.y = \lfloor src.y\rfloor + + dst.z = \lfloor src.z\rfloor + + dst.w = \lfloor src.w\rfloor + + +ROUND - Round + +.. math:: + + dst.x = round(src.x) + + dst.y = round(src.y) + + dst.z = round(src.z) + + dst.w = round(src.w) + + +EX2 - Exponential Base 2 + +.. math:: + + dst.x = 2^{src.x} + + dst.y = 2^{src.x} + + dst.z = 2^{src.x} + + dst.w = 2^{src.x} + + +LG2 - Logarithm Base 2 + +.. math:: + + dst.x = \log_2{src.x} + + dst.y = \log_2{src.x} + + dst.z = \log_2{src.x} + + dst.w = \log_2{src.x} + + +POW - Power + +.. math:: + + dst.x = src0.x^{src1.x} + + dst.y = src0.x^{src1.x} + + dst.z = src0.x^{src1.x} + + dst.w = src0.x^{src1.x} + +XPD - Cross Product + +.. math:: + + dst.x = src0.y \times src1.z - src1.y \times src0.z + + dst.y = src0.z \times src1.x - src1.z \times src0.x + + dst.z = src0.x \times src1.y - src1.x \times src0.y + + dst.w = 1 + + +ABS - Absolute + +.. math:: + + dst.x = |src.x| + + dst.y = |src.y| + + dst.z = |src.z| + + dst.w = |src.w| + + +RCC - Reciprocal Clamped + +XXX cleanup on aisle three + +.. math:: + + dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + +DPH - Homogeneous Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + +COS - Cosine + +.. math:: + + dst.x = \cos{src.x} + + dst.y = \cos{src.x} + + dst.z = \cos{src.x} + + dst.w = \cos{src.x} + + +DDX - Derivative Relative To X + +.. math:: + + dst.x = partialx(src.x) + + dst.y = partialx(src.y) + + dst.z = partialx(src.z) + + dst.w = partialx(src.w) + + +DDY - Derivative Relative To Y + +.. math:: + + dst.x = partialy(src.x) + + dst.y = partialy(src.y) + + dst.z = partialy(src.z) + + dst.w = partialy(src.w) + + +KILP - Predicated Discard + + discard + + +PK2H - Pack Two 16-bit Floats + + TBD + + +PK2US - Pack Two Unsigned 16-bit Scalars + + TBD + + +PK4B - Pack Four Signed 8-bit Scalars + + TBD + + +PK4UB - Pack Four Unsigned 8-bit Scalars + + TBD + + +RFL - Reflection Vector + +.. math:: + + dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x + + dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y + + dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z + + dst.w = 1 + +Considered for removal. + + +SEQ - Set On Equal + +.. math:: + + dst.x = (src0.x == src1.x) ? 1 : 0 + dst.y = (src0.y == src1.y) ? 1 : 0 + dst.z = (src0.z == src1.z) ? 1 : 0 + dst.w = (src0.w == src1.w) ? 1 : 0 + + +SFL - Set On False + +.. math:: + + dst.x = 0 + dst.y = 0 + dst.z = 0 + dst.w = 0 + +Considered for removal. + +SGT - Set On Greater Than + +.. math:: + + dst.x = (src0.x > src1.x) ? 1 : 0 + dst.y = (src0.y > src1.y) ? 1 : 0 + dst.z = (src0.z > src1.z) ? 1 : 0 + dst.w = (src0.w > src1.w) ? 1 : 0 + + +SIN - Sine + +.. math:: + + dst.x = \sin{src.x} + + dst.y = \sin{src.x} + + dst.z = \sin{src.x} + + dst.w = \sin{src.x} + + +SLE - Set On Less Equal Than + +.. math:: + + dst.x = (src0.x <= src1.x) ? 1 : 0 + dst.y = (src0.y <= src1.y) ? 1 : 0 + dst.z = (src0.z <= src1.z) ? 1 : 0 + dst.w = (src0.w <= src1.w) ? 1 : 0 + + +SNE - Set On Not Equal + +.. math:: + + dst.x = (src0.x != src1.x) ? 1 : 0 + dst.y = (src0.y != src1.y) ? 1 : 0 + dst.z = (src0.z != src1.z) ? 1 : 0 + dst.w = (src0.w != src1.w) ? 1 : 0 + + +STR - Set On True + +.. math:: + + dst.x = 1 + dst.y = 1 + dst.z = 1 + dst.w = 1 + + +TEX - Texture Lookup + + TBD + + +TXD - Texture Lookup with Derivatives + + TBD + + +TXP - Projective Texture Lookup + + TBD + + +UP2H - Unpack Two 16-Bit Floats + + TBD + + Considered for removal. + +UP2US - Unpack Two Unsigned 16-Bit Scalars + + TBD + + Considered for removal. + +UP4B - Unpack Four Signed 8-Bit Values + + TBD + + Considered for removal. + +UP4UB - Unpack Four Unsigned 8-Bit Scalars + + TBD + + Considered for removal. + +X2D - 2D Coordinate Transformation + +.. math:: + + dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w + dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w + +Considered for removal. + + +From GL_NV_vertex_program2 +^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +ARA - Address Register Add + + TBD + + Considered for removal. + +ARR - Address Register Load With Round + +.. math:: + + dst.x = round(src.x) + + dst.y = round(src.y) + + dst.z = round(src.z) + + dst.w = round(src.w) + + +BRA - Branch + + pc = target + + Considered for removal. + +CAL - Subroutine Call + + push(pc) + pc = target + + +RET - Subroutine Call Return + + pc = pop() + + Potential restrictions: + * Only occurs at end of function. + +SSG - Set Sign + +.. math:: + + dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 + + dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 + + dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 + + dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 + + +CMP - Compare + +.. math:: + + dst.x = (src0.x < 0) ? src1.x : src2.x + + dst.y = (src0.y < 0) ? src1.y : src2.y + + dst.z = (src0.z < 0) ? src1.z : src2.z + + dst.w = (src0.w < 0) ? src1.w : src2.w + + +KIL - Conditional Discard + +.. math:: + + if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0) + discard + endif + + +SCS - Sine Cosine + +.. math:: + + dst.x = \cos{src.x} + + dst.y = \sin{src.x} + + dst.z = 0 + + dst.y = 1 + + +TXB - Texture Lookup With Bias + + TBD + + +NRM - 3-component Vector Normalise + +.. math:: + + dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.w = 1 + + +DIV - Divide + +.. math:: + + dst.x = \frac{src0.x}{src1.x} + + dst.y = \frac{src0.y}{src1.y} + + dst.z = \frac{src0.z}{src1.z} + + dst.w = \frac{src0.w}{src1.w} + + +DP2 - 2-component Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + + dst.y = src0.x \times src1.x + src0.y \times src1.y + + dst.z = src0.x \times src1.x + src0.y \times src1.y + + dst.w = src0.x \times src1.x + src0.y \times src1.y + + +TXL - Texture Lookup With LOD + + TBD + + +BRK - Break + + TBD + + +IF - If + + TBD + + +BGNFOR - Begin a For-Loop + + dst.x = floor(src.x) + dst.y = floor(src.y) + dst.z = floor(src.z) + + if (dst.y <= 0) + pc = [matching ENDFOR] + 1 + endif + + Note: The destination must be a loop register. + The source must be a constant register. + + Considered for cleanup / removal. + + +REP - Repeat + + TBD + + +ELSE - Else + + TBD + + +ENDIF - End If + + TBD + + +ENDFOR - End a For-Loop + + dst.x = dst.x + dst.z + dst.y = dst.y - 1.0 + + if (dst.y > 0) + pc = [matching BGNFOR instruction] + 1 + endif + + Note: The destination must be a loop register. + + Considered for cleanup / removal. + +ENDREP - End Repeat + + TBD + + +PUSHA - Push Address Register On Stack + + push(src.x) + push(src.y) + push(src.z) + push(src.w) + + Considered for cleanup / removal. + +POPA - Pop Address Register From Stack + + dst.w = pop() + dst.z = pop() + dst.y = pop() + dst.x = pop() + + Considered for cleanup / removal. + + +From GL_NV_gpu_program4 +^^^^^^^^^^^^^^^^^^^^^^^^ + +Support for these opcodes indicated by a special pipe capability bit (TBD). + +CEIL - Ceiling + +.. math:: + + dst.x = \lceil src.x\rceil + + dst.y = \lceil src.y\rceil + + dst.z = \lceil src.z\rceil + + dst.w = \lceil src.w\rceil + + +I2F - Integer To Float + +.. math:: + + dst.x = (float) src.x + + dst.y = (float) src.y + + dst.z = (float) src.z + + dst.w = (float) src.w + + +NOT - Bitwise Not + +.. math:: + + dst.x = ~src.x + + dst.y = ~src.y + + dst.z = ~src.z + + dst.w = ~src.w + + +TRUNC - Truncate + +.. math:: + + dst.x = trunc(src.x) + + dst.y = trunc(src.y) + + dst.z = trunc(src.z) + + dst.w = trunc(src.w) + + +SHL - Shift Left + +.. math:: + + dst.x = src0.x << src1.x + + dst.y = src0.y << src1.x + + dst.z = src0.z << src1.x + + dst.w = src0.w << src1.x + + +SHR - Shift Right + +.. math:: + + dst.x = src0.x >> src1.x + + dst.y = src0.y >> src1.x + + dst.z = src0.z >> src1.x + + dst.w = src0.w >> src1.x + + +AND - Bitwise And + +.. math:: + + dst.x = src0.x & src1.x + + dst.y = src0.y & src1.y + + dst.z = src0.z & src1.z + + dst.w = src0.w & src1.w + + +OR - Bitwise Or + +.. math:: + + dst.x = src0.x | src1.x + + dst.y = src0.y | src1.y + + dst.z = src0.z | src1.z + + dst.w = src0.w | src1.w + + +MOD - Modulus + +.. math:: + + dst.x = src0.x \bmod src1.x + + dst.y = src0.y \bmod src1.y + + dst.z = src0.z \bmod src1.z + + dst.w = src0.w \bmod src1.w + + +XOR - Bitwise Xor + +.. math:: + + dst.x = src0.x ^ src1.x + + dst.y = src0.y ^ src1.y + + dst.z = src0.z ^ src1.z + + dst.w = src0.w ^ src1.w + + +SAD - Sum Of Absolute Differences + +.. math:: + + dst.x = |src0.x - src1.x| + src2.x + + dst.y = |src0.y - src1.y| + src2.y + + dst.z = |src0.z - src1.z| + src2.z + + dst.w = |src0.w - src1.w| + src2.w + + +TXF - Texel Fetch + + TBD + + +TXQ - Texture Size Query + + TBD + + +CONT - Continue + + TBD + + +From GL_NV_geometry_program4 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +EMIT - Emit + + TBD + + +ENDPRIM - End Primitive + + TBD + + +From GLSL +^^^^^^^^^^ + + +BGNLOOP - Begin a Loop + + TBD + + +BGNSUB - Begin Subroutine + + TBD + + +ENDLOOP - End a Loop + + TBD + + +ENDSUB - End Subroutine + + TBD + + +NOP - No Operation + + Do nothing. + + +NRM4 - 4-component Vector Normalise + +.. math:: + + dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + +ps_2_x +^^^^^^^^^^^^ + + +CALLNZ - Subroutine Call If Not Zero + + TBD + + +IFC - If + + TBD + + +BREAKC - Break Conditional + + TBD + + +Explanation of symbols used +------------------------------ + + +Functions +^^^^^^^^^^^^^^ + + + :math:`|x|` Absolute value of `x`. + + :math:`\lceil x \rceil` Ceiling of `x`. + + clamp(x,y,z) Clamp x between y and z. + (x < y) ? y : (x > z) ? z : x + + :math:`\lfloor x\rfloor` Floor of `x`. + + :math:`\log_2{x}` Logarithm of `x`, base 2. + + max(x,y) Maximum of x and y. + (x > y) ? x : y + + min(x,y) Minimum of x and y. + (x < y) ? x : y + + partialx(x) Derivative of x relative to fragment's X. + + partialy(x) Derivative of x relative to fragment's Y. + + pop() Pop from stack. + + :math:`x^y` `x` to the power `y`. + + push(x) Push x on stack. + + round(x) Round x. + + trunc(x) Truncate x, i.e. drop the fraction bits. + + +Keywords +^^^^^^^^^^^^^ + + + discard Discard fragment. + + dst First destination register. + + dst0 First destination register. + + pc Program counter. + + src First source register. + + src0 First source register. + + src1 Second source register. + + src2 Third source register. + + target Label of target instruction. + + +Other tokens +--------------- + + +Declaration Semantic +^^^^^^^^^^^^^^^^^^^^^^^^ + + + Follows Declaration token if Semantic bit is set. + + Since its purpose is to link a shader with other stages of the pipeline, + it is valid to follow only those Declaration tokens that declare a register + either in INPUT or OUTPUT file. + + SemanticName field contains the semantic name of the register being declared. + There is no default value. + + SemanticIndex is an optional subscript that can be used to distinguish + different register declarations with the same semantic name. The default value + is 0. + + The meanings of the individual semantic names are explained in the following + sections. + +TGSI_SEMANTIC_POSITION +"""""""""""""""""""""" + +Position, sometimes known as HPOS or WPOS for historical reasons, is the +location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z`` +are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used +for the perspective divide, if enabled. + +As a vertex shader output, position should be scaled to the viewport. When +used in fragment shaders, position will --- + +XXX --- wait a minute. Should position be in [0,1] for x and y? + +XXX additionally, is there a way to configure the perspective divide? it's +accelerated on most chipsets AFAIK... + +Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can +be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``. + +XXX usually? can we solidify that? + +TGSI_SEMANTIC_COLOR +""""""""""""""""""" + +Colors are used to, well, color the primitives. Colors are always in +``(r, g, b, a)`` format. + +If alpha is not specified, it defaults to 1. + +TGSI_SEMANTIC_BCOLOR +"""""""""""""""""""" + +Back-facing colors are only used for back-facing polygons, and are only valid +in vertex shader outputs. After rasterization, all polygons are front-facing +and COLOR and BCOLOR end up occupying the same slots in the fragment, so +all BCOLORs effectively become regular COLORs in the fragment shader. + +TGSI_SEMANTIC_FOG +""""""""""""""""" + +The fog coordinate historically has been used to replace the depth coordinate +for generation of fog in dedicated fog blocks. Gallium, however, does not use +dedicated fog acceleration, placing it entirely in the fragment shader +instead. + +The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first +component matters when writing from the vertex shader; the driver will ensure +that the coordinate is in this format when used as a fragment shader input. + +TGSI_SEMANTIC_PSIZE +""""""""""""""""""" + +PSIZE, or point size, is used to specify point sizes per-vertex. It should +be in ``(p, n, x, f)`` format, where ``p`` is the point size, ``n`` is the minimum +size, ``x`` is the maximum size, and ``f`` is the fade threshold. + +XXX this is arb_vp. is this what we actually do? should double-check... + +When using this semantic, be sure to set the appropriate state in the +:ref:`rasterizer` first. + +TGSI_SEMANTIC_GENERIC +""""""""""""""""""""" + +Generic semantics are nearly always used for texture coordinate attributes, +in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds +of lookups, and ``q`` is the level-of-detail bias for biased sampling. + +These attributes are called "generic" because they may be used for anything +else, including parameters, texture generation information, or anything that +can be stored inside a four-component vector. + +TGSI_SEMANTIC_NORMAL +"""""""""""""""""""" + +Vertex normal; could be used to implement per-pixel lighting for legacy APIs +that allow mixing fixed-function and programmable stages. + +TGSI_SEMANTIC_FACE +"""""""""""""""""" + +FACE is the facing bit, to store the facing information for the fragment +shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive +when the fragment is front-facing, and negative when the component is +back-facing. + +TGSI_SEMANTIC_EDGEFLAG +"""""""""""""""""""""" + +XXX no clue diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 79ad687ea94..3a3f968a492 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -59,9 +59,9 @@ cell_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, if (buffers & PIPE_CLEAR_COLOR) { uint surfIndex = 0; - uint clearValue; + union util_color uc; - util_pack_color(rgba, cell->framebuffer.cbufs[0]->format, &clearValue); + util_pack_color(rgba, cell->framebuffer.cbufs[0]->format, &uc); /* Build a CLEAR command and place it in the current batch buffer */ STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); @@ -70,7 +70,7 @@ cell_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, cell_batch_alloc16(cell, sizeof(*clr)); clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; clr->surface = surfIndex; - clr->value = clearValue; + clr->value = uc.ui; } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 644496db40c..3fa8b975d39 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -59,7 +59,7 @@ cell_map_constant_buffers(struct cell_context *sp) } } - draw_set_mapped_constant_buffer(sp->draw, + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], sp->constants[PIPE_SHADER_VERTEX].buffer->size); } @@ -85,7 +85,7 @@ cell_unmap_constant_buffers(struct cell_context *sp) * * XXX should the element buffer be specified/bound with a separate function? */ -static boolean +static void cell_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -145,47 +145,35 @@ cell_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ cell_unmap_constant_buffers(sp); - - return TRUE; } -static boolean +static void cell_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return cell_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + cell_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } -static boolean +static void cell_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return cell_draw_elements(pipe, NULL, 0, mode, start, count); -} - - -static void -cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) -{ - struct cell_context *cell = cell_context(pipe); - draw_set_edgeflags(cell->draw, edgeflags); + cell_draw_elements(pipe, NULL, 0, mode, start, count); } - void cell_init_draw_functions(struct cell_context *cell) { cell->pipe.draw_arrays = cell_draw_arrays; cell->pipe.draw_elements = cell_draw_elements; cell->pipe.draw_range_elements = cell_draw_range_elements; - cell->pipe.set_edgeflags = cell_set_edgeflags; } diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 19e3ab08440..1d8a11a4ac9 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -237,8 +237,8 @@ is_register_src(struct codegen *gen, int channel, if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { return FALSE; } - if (src->SrcRegister.File == TGSI_FILE_TEMPORARY || - src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + if (src->Register.File == TGSI_FILE_TEMPORARY || + src->Register.File == TGSI_FILE_IMMEDIATE) { return TRUE; } return FALSE; @@ -249,7 +249,7 @@ static boolean is_memory_dst(struct codegen *gen, int channel, const struct tgsi_full_dst_register *dst) { - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + if (dst->Register.File == TGSI_FILE_OUTPUT) { return TRUE; } else { @@ -279,15 +279,15 @@ get_src_reg(struct codegen *gen, assert(swizzle <= TGSI_SWIZZLE_W); { - int index = src->SrcRegister.Index; + int index = src->Register.Index; assert(swizzle < 4); - if (src->SrcRegister.Indirect) { + if (src->Register.Indirect) { /* XXX unfinished */ } - switch (src->SrcRegister.File) { + switch (src->Register.File) { case TGSI_FILE_TEMPORARY: reg = gen->temp_regs[index][swizzle]; break; @@ -374,12 +374,12 @@ get_dst_reg(struct codegen *gen, { int reg = -1; - switch (dest->DstRegister.File) { + switch (dest->Register.File) { case TGSI_FILE_TEMPORARY: if (gen->if_nesting > 0 || gen->loop_nesting > 0) reg = get_itemp(gen); else - reg = gen->temp_regs[dest->DstRegister.Index][channel]; + reg = gen->temp_regs[dest->Register.Index][channel]; break; case TGSI_FILE_OUTPUT: reg = get_itemp(gen); @@ -419,10 +419,10 @@ store_dest_reg(struct codegen *gen, } #endif - switch (dest->DstRegister.File) { + switch (dest->Register.File) { case TGSI_FILE_TEMPORARY: if (gen->if_nesting > 0 || gen->loop_nesting > 0) { - int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; + int d_reg = gen->temp_regs[dest->Register.Index][channel]; int exec_reg = get_exec_mask_reg(gen); /* Mix d with new value according to exec mask: * d[i] = mask_reg[i] ? value_reg : d_reg @@ -437,7 +437,7 @@ store_dest_reg(struct codegen *gen, case TGSI_FILE_OUTPUT: { /* offset is measured in quadwords, not bytes */ - int offset = dest->DstRegister.Index * 4 + channel; + int offset = dest->Register.Index * 4 + channel; if (gen->if_nesting > 0 || gen->loop_nesting > 0) { int exec_reg = get_exec_mask_reg(gen); int curval_reg = get_itemp(gen); @@ -544,7 +544,7 @@ emit_epilogue(struct codegen *gen) #define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ for (ch = 0; ch < 4; ch++) \ - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) + if (inst->Dst[0].Register.WriteMask & (1 << ch)) static boolean @@ -552,7 +552,7 @@ emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch = 0, src_reg, addr_reg; - src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + src_reg = get_src_reg(gen, ch, &inst->Src[0]); addr_reg = get_address_reg(gen); /* convert float to int */ @@ -570,19 +570,19 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, src_reg[4], dst_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && - is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { + if (is_register_src(gen, ch, &inst->Src[0]) && + is_memory_dst(gen, ch, &inst->Dst[0])) { /* special-case: register to memory store */ - store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]); } else { spe_move(gen->f, dst_reg[ch], src_reg[ch]); - store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]); } } @@ -601,9 +601,9 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ @@ -626,7 +626,7 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } /* Free any intermediate temps we allocated */ @@ -645,16 +645,16 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); return TRUE; @@ -671,10 +671,10 @@ emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) /* setup/get src/dst/temp regs */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -687,7 +687,7 @@ emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); return TRUE; @@ -704,8 +704,8 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], d_reg[4], tmp_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -726,7 +726,7 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -747,8 +747,8 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } /* d = sign bit cleared in s1 */ @@ -757,7 +757,7 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -775,12 +775,12 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) int s2x_reg, s2y_reg, s2z_reg; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t0 = x0 * x1 */ spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); @@ -795,9 +795,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fa(gen->f, t0_reg, t0_reg, t1_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -815,14 +815,14 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1x_reg, s1y_reg, s1z_reg, s1w_reg; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); - s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); - s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); + s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); + s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); + s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]); + s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); /* t0 = x0 * x1 */ spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); @@ -840,9 +840,9 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fa(gen->f, t0_reg, t0_reg, t1_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -857,31 +857,31 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) { /* XXX rewrite this function to look more like DP3/DP4 */ int ch; - int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); int tmp_reg = get_itemp(gen); /* t = x0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); /* t = y0 * y1 + t */ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t = z0 * z1 + t */ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); /* t = w1 + t */ spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); spe_move(gen->f, d_reg, tmp_reg); - store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -898,9 +898,9 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) int src_reg[3]; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]); + src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]); /* t0 = x * x */ spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); @@ -919,10 +919,10 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fi(gen->f, t1_reg, t0_reg, t1_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); /* dst = src[ch] * t1 */ spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -936,48 +936,48 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); int tmp_reg = get_itemp(gen); /* t = z0 * y1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t = y0 * z1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) { - store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]); + if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) { + store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]); } - s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t = x0 * z1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); /* t = z0 * x1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) { - store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]); + if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) { + store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]); } - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); /* t = y0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); /* t = x0 * y1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) { - store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]); + if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) { + store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]); } free_itemps(gen); @@ -995,14 +995,14 @@ static boolean emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; - bool complement = FALSE; + boolean complement = FALSE; one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { @@ -1043,7 +1043,7 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1060,10 +1060,10 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int s1_reg = get_src_reg(gen, ch, &inst->Src[0]); + int s2_reg = get_src_reg(gen, ch, &inst->Src[1]); + int s3_reg = get_src_reg(gen, ch, &inst->Src[2]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); int zero_reg = get_itemp(gen); spe_zero(gen->f, zero_reg); @@ -1072,7 +1072,7 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); free_itemps(gen); } @@ -1090,8 +1090,8 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], d_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } /* Convert float to int */ @@ -1105,7 +1105,7 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1129,8 +1129,8 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -1156,7 +1156,7 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1177,8 +1177,8 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -1210,7 +1210,7 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) /* store result */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1272,7 +1272,7 @@ emit_function_call(struct codegen *gen, if (scalar) { for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]); + s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]); } /* we'll call the function, put the return value in this register, * then replicate it across all write-enabled components in d_reg. @@ -1287,11 +1287,11 @@ emit_function_call(struct codegen *gen, if (!scalar) { for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); + s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]); } } - d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); if (!scalar || !func_called) { /* for a scalar function, we'll really only call the function once */ @@ -1336,7 +1336,7 @@ emit_function_call(struct codegen *gen, spe_move(gen->f, d_reg, retval_reg); } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); free_itemps(gen); } @@ -1351,8 +1351,8 @@ emit_function_call(struct codegen *gen, static boolean emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) { - const uint target = inst->InstructionExtTexture.Texture; - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint target = inst->Texture.Texture; + const uint unit = inst->Src[1].Register.Index; uint addr; int ch; int coord_regs[4], d_regs[4]; @@ -1373,14 +1373,14 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) return FALSE; } - assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); + assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER); spe_comment(gen->f, -4, "CALL tex:"); /* get src/dst reg info */ for (ch = 0; ch < 4; ch++) { - coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } { @@ -1425,7 +1425,7 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]); free_itemps(gen); } @@ -1452,7 +1452,7 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) /* get src regs */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); } /* test if any src regs are < 0 */ @@ -1500,9 +1500,9 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -1518,7 +1518,7 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1575,7 +1575,7 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) /* update conditional execution mask with the predicate register */ int tmp_reg = get_itemp(gen); - int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); + int s1_reg = get_src_reg(gen, channel, &inst->Src[0]); /* tmp = (s1_reg == 0) */ spe_ceqi(gen->f, tmp_reg, s1_reg, 0); @@ -1699,8 +1699,8 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, int ch; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int s_reg = get_src_reg(gen, ch, &inst->Src[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); int t1_reg = get_itemp(gen); int t2_reg = get_itemp(gen); @@ -1909,8 +1909,8 @@ emit_declaration(struct cell_context *cell, switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { assert(i < MAX_TEMPS); for (ch = 0; ch < 4; ch++) { diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index ccd0fef6e84..c18a5d0635e 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -383,10 +383,10 @@ cell_init_state_functions(struct cell_context *cell) cell->pipe.delete_blend_state = cell_delete_blend_state; cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_sampler_states = cell_bind_sampler_states; + cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states; cell->pipe.delete_sampler_state = cell_delete_sampler_state; - cell->pipe.set_sampler_textures = cell_set_sampler_textures; + cell->pipe.set_fragment_sampler_textures = cell_set_sampler_textures; cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index efc4f78364b..b723e794e71 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -66,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell ) vinfo->num_attribs = 0; /* we always want to emit vertex pos */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); @@ -82,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell ) break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0); #if 1 if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ src = 0; @@ -100,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 9479c0898fd..5b87286d4c5 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -27,6 +27,7 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" +#include "util/u_math.h" #include "cell_context.h" #include "cell_gen_fragment.h" #include "cell_state.h" @@ -299,9 +300,9 @@ cell_emit_state(struct cell_context *cell) for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { texture->start[level] = (ct->mapped + ct->level_offset[level]); - texture->width[level] = ct->base.width[level]; - texture->height[level] = ct->base.height[level]; - texture->depth[level] = ct->base.depth[level]; + texture->width[level] = u_minify(ct->base.width0, level); + texture->height[level] = u_minify(ct->base.height0, level); + texture->depth[level] = u_minify(ct->base.depth0, level); } texture->target = ct->base.target; } @@ -330,7 +331,7 @@ cell_emit_state(struct cell_context *cell) const struct draw_context *const draw = cell->draw; struct cell_shader_info info; - info.num_outputs = draw_num_vs_outputs(draw); + info.num_outputs = draw_num_shader_outputs(draw); info.declarations = (uintptr_t) draw->vs.machine.Declarations; info.num_declarations = draw->vs.machine.NumDeclarations; info.instructions = (uintptr_t) draw->vs.machine.Instructions; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index ae4c61efb3b..998944f77a3 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -35,6 +35,8 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/internal/p_winsys_screen.h" + +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -49,9 +51,9 @@ cell_texture_layout(struct cell_texture *ct) { struct pipe_texture *pt = &ct->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; ct->buffer_size = 0; @@ -65,17 +67,11 @@ cell_texture_layout(struct cell_texture *ct) w_tile = align(width, TILE_SIZE); h_tile = align(height, TILE_SIZE); - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); - - ct->stride[level] = pt->nblocksx[level] * pt->block.size; + ct->stride[level] = util_format_get_stride(pt->format, w_tile); ct->level_offset[level] = ct->buffer_size; - size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; + size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile); if (pt->target == PIPE_TEXTURE_CUBE) size *= 6; else @@ -83,9 +79,9 @@ cell_texture_layout(struct cell_texture *ct) ct->buffer_size += size; - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } } @@ -276,8 +272,8 @@ cell_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = ct->level_offset[level]; /* XXX may need to override usage flags (see sp_texture.c) */ ps->usage = usage; @@ -286,10 +282,12 @@ cell_get_tex_surface(struct pipe_screen *screen, ps->zslice = zslice; if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * pt->nblocksy[level] * ct->stride[level]; + unsigned h_tile = align(ps->height, TILE_SIZE); + ps->offset += face * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level]; } else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * pt->nblocksy[level] * ct->stride[level]; + unsigned h_tile = align(ps->height, TILE_SIZE); + ps->offset += zslice * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level]; } else { assert(face == 0); @@ -330,14 +328,10 @@ cell_get_tex_transfer(struct pipe_screen *screen, if (ctrans) { struct pipe_transfer *pt = &ctrans->base; pipe_texture_reference(&pt->texture, texture); - pt->format = texture->format; - pt->block = texture->block; pt->x = x; pt->y = y; pt->width = w; pt->height = h; - pt->nblocksx = texture->nblocksx[level]; - pt->nblocksy = texture->nblocksy[level]; pt->stride = ct->stride[level]; pt->usage = usage; pt->face = face; @@ -347,10 +341,12 @@ cell_get_tex_transfer(struct pipe_screen *screen, ctrans->offset = ct->level_offset[level]; if (texture->target == PIPE_TEXTURE_CUBE) { - ctrans->offset += face * pt->nblocksy * pt->stride; + unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE); + ctrans->offset += face * util_format_get_nblocksy(texture->format, h_tile) * pt->stride; } else if (texture->target == PIPE_TEXTURE_3D) { - ctrans->offset += zslice * pt->nblocksy * pt->stride; + unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE); + ctrans->offset += zslice * util_format_get_nblocksy(texture->format, h_tile) * pt->stride; } else { assert(face == 0); @@ -386,8 +382,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) struct pipe_texture *pt = transfer->texture; struct cell_texture *ct = cell_texture(pt); const uint level = ctrans->base.level; - const uint texWidth = pt->width[level]; - const uint texHeight = pt->height[level]; + const uint texWidth = u_minify(pt->width0, level); + const uint texHeight = u_minify(pt->height0, level); const uint stride = ct->stride[level]; unsigned size; @@ -403,7 +399,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) * Create a buffer of ordinary memory for the linear texture. * This is the memory that the user will read/write. */ - size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; + size = util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) * + util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE)); ctrans->map = align_malloc(size, 16); if (!ctrans->map) @@ -411,7 +408,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) if (transfer->usage & PIPE_TRANSFER_READ) { /* need to untwiddle the texture to make a linear version */ - const uint bpp = pf_get_size(ct->base.format); + const uint bpp = util_format_get_blocksize(ct->base.format); if (bpp == 4) { const uint *src = (uint *) (ct->mapped + ctrans->offset); uint *dst = ctrans->map; @@ -440,8 +437,8 @@ cell_transfer_unmap(struct pipe_screen *screen, struct pipe_texture *pt = transfer->texture; struct cell_texture *ct = cell_texture(pt); const uint level = ctrans->base.level; - const uint texWidth = pt->width[level]; - const uint texHeight = pt->height[level]; + const uint texWidth = u_minify(pt->width0, level); + const uint texHeight = u_minify(pt->height0, level); const uint stride = ct->stride[level]; if (!ct->mapped) { @@ -454,7 +451,7 @@ cell_transfer_unmap(struct pipe_screen *screen, /* The user wrote new texture data into the mapped buffer. * We need to convert the new linear data into the twiddled/tiled format. */ - const uint bpp = pf_get_size(ct->base.format); + const uint bpp = util_format_get_blocksize(ct->base.format); if (bpp == 4) { const uint *src = ctrans->map; uint *dst = (uint *) (ct->mapped + ctrans->offset); diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 5c0179d9546..12b855a3db2 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -405,8 +405,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; break; @@ -418,8 +416,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; break; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 4c32b2d06d7..d86d8e09a51 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -108,10 +108,10 @@ for (CHAN = 0; CHAN < 4; CHAN++) #define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ FOR_EACH_CHANNEL( CHAN )\ @@ -431,22 +431,22 @@ fetch_source( index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->SrcRegister.Index; + index.i[3] = reg->Register.Index; - if (reg->SrcRegister.Indirect) { + if (reg->Register.Indirect) { union spu_exec_channel index2; union spu_exec_channel indir_index; index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; + index2.i[3] = reg->Indirect.Index; - swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, + swizzle = tgsi_util_get_src_register_swizzle(®->Indirect, CHAN_X); fetch_src_file_channel( mach, - reg->SrcRegisterInd.File, + reg->Indirect.File, swizzle, &index2, &indir_index ); @@ -454,8 +454,8 @@ fetch_source( index.q = si_a(index.q, indir_index.q); } - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { + if( reg->Register.Dimension ) { + switch( reg->Register.File ) { case TGSI_FILE_INPUT: index.q = si_mpyi(index.q, 17); break; @@ -466,24 +466,24 @@ fetch_source( ASSERT( 0 ); } - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; + index.i[0] += reg->Dimension.Index; + index.i[1] += reg->Dimension.Index; + index.i[2] += reg->Dimension.Index; + index.i[3] += reg->Dimension.Index; - if (reg->SrcRegisterDim.Indirect) { + if (reg->Dimension.Indirect) { union spu_exec_channel index2; union spu_exec_channel indir_index; index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; + index2.i[3] = reg->DimIndirect.Index; - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterDimInd.File, + reg->DimIndirect.File, swizzle, &index2, &indir_index ); @@ -495,7 +495,7 @@ fetch_source( swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, - reg->SrcRegister.File, + reg->Register.File, swizzle, &index, chan ); @@ -517,7 +517,7 @@ fetch_source( break; } - if (reg->SrcRegisterExtMod.Complement) { + if (reg->RegisterExtMod.Complement) { chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); } } @@ -532,21 +532,21 @@ store_dest( { union spu_exec_channel *dst; - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_NULL: return; case TGSI_FILE_OUTPUT: dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index].xyzw[chan_index]; + + reg->Register.Index].xyzw[chan_index]; break; case TGSI_FILE_TEMPORARY: - dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + dst = &mach->Temps[reg->Register.Index].xyzw[chan_index]; break; case TGSI_FILE_ADDRESS: - dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index]; break; default: @@ -583,10 +583,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) /** @@ -612,7 +612,7 @@ exec_kil(struct spu_exec_machine *mach, /* unswizzle channel */ swizzle = tgsi_util_get_full_src_register_swizzle ( - &inst->FullSrcRegisters[0], + &inst->Src[0], chan_index); /* check if the component has not been already tested */ @@ -677,7 +677,7 @@ exec_tex(struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, boolean biasLod, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; union spu_exec_channel r[8]; uint chan_index; float lodBias; @@ -833,8 +833,8 @@ exec_declaration(struct spu_exec_machine *mach, unsigned first, last, mask; interpolation_func interp; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; switch( decl->Declaration.Interpolate ) { @@ -1681,7 +1681,7 @@ exec_instruction( } break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index c2c32b22d5a..24057e29e36 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -33,7 +33,7 @@ tgsi_util_get_full_src_register_swizzle( unsigned component ) { return tgsi_util_get_src_register_swizzle( - reg->SrcRegister, + reg->Register, component ); } @@ -45,10 +45,10 @@ tgsi_util_get_full_src_register_sign_mode( { unsigned sign_mode; - if( reg->SrcRegisterExtMod.Absolute ) { + if( reg->RegisterExtMod.Absolute ) { /* Consider only the post-abs negation. */ - if( reg->SrcRegisterExtMod.Negate ) { + if( reg->RegisterExtMod.Negate ) { sign_mode = TGSI_UTIL_SIGN_SET; } else { @@ -60,8 +60,8 @@ tgsi_util_get_full_src_register_sign_mode( unsigned negate; - negate = reg->SrcRegister.Negate; - if( reg->SrcRegisterExtMod.Negate ) { + negate = reg->Register.Negate; + if( reg->RegisterExtMod.Negate ) { negate = !negate; } diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 37184eac7b1..46e4338d98a 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -44,11 +44,19 @@ static void failover_destroy( struct pipe_context *pipe ) } +void failover_fail_over( struct failover_context *failover ) +{ + failover->dirty = TRUE; + failover->mode = FO_SW; +} + -static boolean failover_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) +static void failover_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, + unsigned start, + unsigned count) { struct failover_context *failover = failover_context( pipe ); @@ -62,24 +70,22 @@ static boolean failover_draw_elements( struct pipe_context *pipe, /* Try hardware: */ if (failover->mode == FO_HW) { - if (!failover->hw->draw_elements( failover->hw, - indexBuffer, - indexSize, - prim, - start, - count )) { - - failover->hw->flush( failover->hw, ~0, NULL ); - failover->mode = FO_SW; - } + failover->hw->draw_elements( failover->hw, + indexBuffer, + indexSize, + prim, + start, + count ); } /* Possibly try software: */ if (failover->mode == FO_SW) { - if (failover->dirty) + if (failover->dirty) { + failover->hw->flush( failover->hw, ~0, NULL ); failover_state_emit( failover ); + } failover->sw->draw_elements( failover->sw, indexBuffer, @@ -94,15 +100,13 @@ static boolean failover_draw_elements( struct pipe_context *pipe, */ failover->sw->flush( failover->sw, ~0, NULL ); } - - return TRUE; } -static boolean failover_draw_arrays( struct pipe_context *pipe, +static void failover_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return failover_draw_elements(pipe, NULL, 0, prim, start, count); + failover_draw_elements(pipe, NULL, 0, prim, start, count); } static unsigned int diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 9ba86ba8664..149393712a3 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -72,6 +72,7 @@ struct failover_context { */ const struct fo_state *blend; const struct fo_state *sampler[PIPE_MAX_SAMPLERS]; + const struct fo_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; const struct fo_state *depth_stencil; const struct fo_state *rasterizer; const struct fo_state *fragment_shader; @@ -83,6 +84,7 @@ struct failover_context { struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; @@ -92,11 +94,15 @@ struct failover_context { void *sw_sampler_state[PIPE_MAX_SAMPLERS]; void *hw_sampler_state[PIPE_MAX_SAMPLERS]; + void *sw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS]; + void *hw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS]; unsigned dirty; unsigned num_samplers; + unsigned num_vertex_samplers; unsigned num_textures; + unsigned num_vertex_textures; unsigned mode; struct pipe_context *hw; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index c8eb9262994..3f5f5560323 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -322,8 +322,9 @@ failover_create_sampler_state(struct pipe_context *pipe, } static void -failover_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) +failover_bind_fragment_sampler_states(struct pipe_context *pipe, + unsigned num, + void **sampler) { struct failover_context *failover = failover_context(pipe); struct fo_state *state = (struct fo_state*)sampler; @@ -339,10 +340,40 @@ failover_bind_sampler_states(struct pipe_context *pipe, } failover->dirty |= FO_NEW_SAMPLER; failover->num_samplers = num; - failover->sw->bind_sampler_states(failover->sw, num, - failover->sw_sampler_state); - failover->hw->bind_sampler_states(failover->hw, num, - failover->hw_sampler_state); + failover->sw->bind_fragment_sampler_states(failover->sw, num, + failover->sw_sampler_state); + failover->hw->bind_fragment_sampler_states(failover->hw, num, + failover->hw_sampler_state); +} + +static void +failover_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)samplers; + uint i; + + assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == failover->num_vertex_samplers && + !memcmp(failover->vertex_samplers, samplers, num_samplers * sizeof(void *))) { + return; + } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + failover->sw_vertex_sampler_state[i] = i < num_samplers ? state[i].sw_state : NULL; + failover->hw_vertex_sampler_state[i] = i < num_samplers ? state[i].hw_state : NULL; + } + failover->dirty |= FO_NEW_SAMPLER; + failover->num_vertex_samplers = num_samplers; + failover->sw->bind_vertex_sampler_states(failover->sw, + num_samplers, + failover->sw_vertex_sampler_state); + failover->hw->bind_vertex_sampler_states(failover->hw, + num_samplers, + failover->hw_vertex_sampler_state); } static void @@ -360,9 +391,9 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) static void -failover_set_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) +failover_set_fragment_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct failover_context *failover = failover_context(pipe); uint i; @@ -381,8 +412,38 @@ failover_set_sampler_textures(struct pipe_context *pipe, NULL); failover->dirty |= FO_NEW_TEXTURE; failover->num_textures = num; - failover->sw->set_sampler_textures( failover->sw, num, texture ); - failover->hw->set_sampler_textures( failover->hw, num, texture ); + failover->sw->set_fragment_sampler_textures( failover->sw, num, texture ); + failover->hw->set_fragment_sampler_textures( failover->hw, num, texture ); +} + + +static void +failover_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct failover_context *failover = failover_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == failover->num_vertex_textures && + !memcmp(failover->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + for (i = 0; i < num_textures; i++) { + pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i], + textures[i]); + } + for (i = num_textures; i < failover->num_vertex_textures; i++) { + pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i], + NULL); + } + failover->dirty |= FO_NEW_TEXTURE; + failover->num_vertex_textures = num_textures; + failover->sw->set_vertex_sampler_textures(failover->sw, num_textures, textures); + failover->hw->set_vertex_sampler_textures(failover->hw, num_textures, textures); } @@ -453,7 +514,8 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.bind_blend_state = failover_bind_blend_state; failover->pipe.delete_blend_state = failover_delete_blend_state; failover->pipe.create_sampler_state = failover_create_sampler_state; - failover->pipe.bind_sampler_states = failover_bind_sampler_states; + failover->pipe.bind_fragment_sampler_states = failover_bind_fragment_sampler_states; + failover->pipe.bind_vertex_sampler_states = failover_bind_vertex_sampler_states; failover->pipe.delete_sampler_state = failover_delete_sampler_state; failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; @@ -473,7 +535,8 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; failover->pipe.set_scissor_state = failover_set_scissor_state; - failover->pipe.set_sampler_textures = failover_set_sampler_textures; + failover->pipe.set_fragment_sampler_textures = failover_set_fragment_sampler_textures; + failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; failover->pipe.set_vertex_elements = failover_set_vertex_elements; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index bd4fce9d209..a3341e33f80 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -92,13 +92,19 @@ failover_state_emit( struct failover_context *failover ) failover->sw->set_viewport_state( failover->sw, &failover->viewport ); if (failover->dirty & FO_NEW_SAMPLER) { - failover->sw->bind_sampler_states( failover->sw, failover->num_samplers, - failover->sw_sampler_state ); + failover->sw->bind_fragment_sampler_states( failover->sw, failover->num_samplers, + failover->sw_sampler_state ); + failover->sw->bind_vertex_sampler_states(failover->sw, + failover->num_vertex_samplers, + failover->sw_vertex_sampler_state); } if (failover->dirty & FO_NEW_TEXTURE) { - failover->sw->set_sampler_textures( failover->sw, failover->num_textures, - failover->texture ); + failover->sw->set_fragment_sampler_textures( failover->sw, failover->num_textures, + failover->texture ); + failover->sw->set_vertex_sampler_textures(failover->sw, + failover->num_vertex_textures, + failover->vertex_textures); } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h index a8ce997a1f6..533122b69da 100644 --- a/src/gallium/drivers/failover/fo_winsys.h +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -36,10 +36,13 @@ struct pipe_context; +struct failover_context; struct pipe_context *failover_create( struct pipe_context *hw, struct pipe_context *sw ); +void failover_fail_over( struct failover_context *failover ); + #endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 94c8aee30fe..89feeade756 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -45,7 +45,7 @@ */ -static boolean +static void i915_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -84,7 +84,7 @@ i915_draw_range_elements(struct pipe_context *pipe, } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, i915->current.constants[PIPE_SHADER_VERTEX], (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 4 * sizeof(float))); @@ -106,27 +106,25 @@ i915_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } - - return TRUE; } -static boolean +static void i915_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) { - return i915_draw_range_elements(pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - prim, start, count); + i915_draw_range_elements(pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count); } -static boolean +static void i915_draw_arrays(struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return i915_draw_elements(pipe, NULL, 0, prim, start, count); + i915_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 379d47e79a3..25c53210be8 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -143,12 +143,12 @@ static uint src_vector(struct i915_fp_compile *p, const struct tgsi_full_src_register *source) { - uint index = source->SrcRegister.Index; + uint index = source->Register.Index; uint src = 0, sem_name, sem_ind; - switch (source->SrcRegister.File) { + switch (source->Register.File) { case TGSI_FILE_TEMPORARY: - if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { + if (source->Register.Index >= I915_MAX_TEMPORARY) { i915_program_error(p, "Exceeded max temporary reg"); return 0; } @@ -215,26 +215,25 @@ src_vector(struct i915_fp_compile *p, } src = swizzle(src, - source->SrcRegister.SwizzleX, - source->SrcRegister.SwizzleY, - source->SrcRegister.SwizzleZ, - source->SrcRegister.SwizzleW); + source->Register.SwizzleX, + source->Register.SwizzleY, + source->Register.SwizzleZ, + source->Register.SwizzleW); /* There's both negate-all-components and per-component negation. * Try to handle both here. */ { - int n = source->SrcRegister.Negate; + int n = source->Register.Negate; src = negate(src, n, n, n, n); } - /* no abs() or post-abs negation */ + /* no abs() */ #if 0 /* XXX assertions disabled to allow arbfplight.c to run */ /* XXX enable these assertions, or fix things */ - assert(!source->SrcRegisterExtMod.Absolute); - assert(!source->SrcRegisterExtMod.Negate); + assert(!source->Register.Absolute); #endif return src; } @@ -247,10 +246,10 @@ static uint get_result_vector(struct i915_fp_compile *p, const struct tgsi_full_dst_register *dest) { - switch (dest->DstRegister.File) { + switch (dest->Register.File) { case TGSI_FILE_OUTPUT: { - uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; + uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; switch (sem_name) { case TGSI_SEMANTIC_POSITION: return UREG(REG_TYPE_OD, 0); @@ -262,7 +261,7 @@ get_result_vector(struct i915_fp_compile *p, } } case TGSI_FILE_TEMPORARY: - return UREG(REG_TYPE_R, dest->DstRegister.Index); + return UREG(REG_TYPE_R, dest->Register.Index); default: i915_program_error(p, "Bad inst->DstReg.File"); return 0; @@ -277,7 +276,7 @@ static uint get_result_flags(const struct tgsi_full_instruction *inst) { const uint writeMask - = inst->FullDstRegisters[0].DstRegister.WriteMask; + = inst->Dst[0].Register.WriteMask; uint flags = 0x0; if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) @@ -339,14 +338,14 @@ emit_tex(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, uint opcode) { - uint texture = inst->InstructionExtTexture.Texture; - uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint texture = inst->Texture.Texture; + uint unit = inst->Src[1].Register.Index; uint tex = translate_tex_src_target( p, texture ); uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); - uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + uint coord = src_vector( p, &inst->Src[0]); i915_emit_texld( p, - get_result_vector( p, &inst->FullDstRegisters[0] ), + get_result_vector( p, &inst->Dst[0] ), get_result_flags( inst ), sampler, coord, @@ -368,13 +367,13 @@ emit_simple_arith(struct i915_fp_compile *p, assert(numArgs <= 3); - arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); - arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); - arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] ); i915_emit_arith( p, opcode, - get_result_vector( p, &inst->FullDstRegisters[0]), + get_result_vector( p, &inst->Dst[0]), get_result_flags( inst ), 0, arg1, arg2, @@ -394,8 +393,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, /* transpose first two registers */ inst2 = *inst; - inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; - inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + inst2.Src[0] = inst->Src[1]; + inst2.Src[1] = inst->Src[0]; emit_simple_arith(p, &inst2, opcode, numArgs); } @@ -424,10 +423,10 @@ i915_translate_instruction(struct i915_fp_compile *p, switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_MAX, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, src0, negate(src0, 1, 1, 1, 1), 0); break; @@ -437,17 +436,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_CMP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - src2 = src_vector(p, &inst->FullSrcRegisters[2]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); + src2 = src_vector(p, &inst->Src[2]); i915_emit_arith(p, A0_CMP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */ break; case TGSI_OPCODE_COS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -490,7 +489,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(tmp, ONE, Z, Y, X), i915_emit_const4fv(p, cos_constants), 0); @@ -505,19 +504,19 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_DPH: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, Y, Z, ONE), src1, 0); break; case TGSI_OPCODE_DST: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); /* result[0] = 1 * 1; * result[1] = a[1] * b[1]; @@ -526,7 +525,7 @@ i915_translate_instruction(struct i915_fp_compile *p, */ i915_emit_arith(p, A0_MUL, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE), swizzle(src1, ONE, Y, ONE, W), 0); @@ -537,11 +536,11 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_EX2: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_EXP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; @@ -556,7 +555,7 @@ i915_translate_instruction(struct i915_fp_compile *p, case TGSI_OPCODE_KIL: /* kill if src[0].x < 0 || src[0].y < 0 ... */ - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); i915_emit_texld(p, @@ -572,17 +571,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LG2: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_LOG, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_LIT: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); /* tmp = max( a.xyzw, a.00zw ) @@ -606,7 +605,7 @@ i915_translate_instruction(struct i915_fp_compile *p, swizzle(tmp, Y, Y, Y, Y), 0, 0); i915_emit_arith(p, A0_CMP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), swizzle(tmp, ONE, X, ZERO, ONE), @@ -615,9 +614,9 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LRP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - src2 = src_vector(p, &inst->FullSrcRegisters[2]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); + src2 = src_vector(p, &inst->Src[2]); flags = get_result_flags(inst); tmp = i915_get_utemp(p); @@ -632,7 +631,7 @@ i915_translate_instruction(struct i915_fp_compile *p, flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); i915_emit_arith(p, A0_MAD, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); break; @@ -645,8 +644,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MIN: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -658,7 +657,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MOV, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); break; @@ -671,8 +670,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_POW: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -687,7 +686,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_EXP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), flags, 0, swizzle(tmp, X, X, X, X), 0, 0); break; @@ -696,27 +695,27 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_RCP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_RCP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_RSQ: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_RSQ, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_SCS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); /* @@ -739,7 +738,7 @@ i915_translate_instruction(struct i915_fp_compile *p, swizzle(tmp, X, Y, X, Y), swizzle(tmp, X, X, ONE, ONE), 0); - writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + writemask = inst->Dst[0].Register.WriteMask; if (writemask & TGSI_WRITEMASK_Y) { uint tmp1; @@ -757,7 +756,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_Y, 0, swizzle(tmp1, W, Z, Y, X), i915_emit_const4fv(p, sin_constants), 0); @@ -772,7 +771,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_X, 0, swizzle(tmp, ONE, Z, Y, X), i915_emit_const4fv(p, cos_constants), 0); @@ -789,7 +788,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SIN: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -832,7 +831,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(tmp, W, Z, Y, X), i915_emit_const4fv(p, sin_constants), 0); @@ -848,12 +847,12 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SUB: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); i915_emit_arith(p, A0_ADD, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, src0, negate(src1, 1, 1, 1, 1), 0); break; @@ -877,8 +876,8 @@ i915_translate_instruction(struct i915_fp_compile *p, * result.z = src0.x * src1.y - src0.y * src1.x; * result.w = undef; */ - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -889,7 +888,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MAD, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, Y, Z, X, ONE), swizzle(src1, Z, X, Y, ONE), @@ -929,8 +928,8 @@ i915_translate_instructions(struct i915_fp_compile *p, if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { uint i; - for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.Range.First; + i <= parse.FullToken.FullDeclaration.Range.Last; i++) { assert(ifs->constant_flags[i] == 0x0); ifs->constant_flags[i] = I915_CONSTFLAG_USER; @@ -940,8 +939,8 @@ i915_translate_instructions(struct i915_fp_compile *p, else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.Range.First; + i <= parse.FullToken.FullDeclaration.Range.Last; i++) { assert(i < I915_MAX_TEMPORARY); /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index a04668d32fa..5f5b6f8e185 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -74,8 +74,6 @@ static unsigned translate_img_filter( unsigned filter ) return FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return FILTER_ANISOTROPIC; default: assert(0); return FILTER_NEAREST; @@ -221,6 +219,9 @@ i915_create_sampler_state(struct pipe_context *pipe, minFilt = translate_img_filter( sampler->min_img_filter ); magFilt = translate_img_filter( sampler->mag_img_filter ); + if (sampler->max_anisotropy > 1.0) + minFilt = magFilt = FILTER_ANISOTROPIC; + if (sampler->max_anisotropy > 2.0) { cso->state[0] |= SS2_MAX_ANISO_4; } @@ -752,22 +753,15 @@ static void i915_set_vertex_elements(struct pipe_context *pipe, } -static void i915_set_edgeflags(struct pipe_context *pipe, - const unsigned *bitfield) -{ - /* TODO do something here */ -} - void i915_init_state_functions( struct i915_context *i915 ) { - i915->base.set_edgeflags = i915_set_edgeflags; i915->base.create_blend_state = i915_create_blend_state; i915->base.bind_blend_state = i915_bind_blend_state; i915->base.delete_blend_state = i915_delete_blend_state; i915->base.create_sampler_state = i915_create_sampler_state; - i915->base.bind_sampler_states = i915_bind_sampler_states; + i915->base.bind_fragment_sampler_states = i915_bind_sampler_states; i915->base.delete_sampler_state = i915_delete_sampler_state; i915->base.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; @@ -791,7 +785,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_polygon_stipple = i915_set_polygon_stipple; i915->base.set_scissor_state = i915_set_scissor_state; - i915->base.set_sampler_textures = i915_set_sampler_textures; + i915->base.set_fragment_sampler_textures = i915_set_sampler_textures; i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; i915->base.set_vertex_elements = i915_set_vertex_elements; diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 178d4e8781d..03dd5091a61 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -84,7 +84,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* pos */ - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; @@ -101,21 +101,21 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* primary color */ if (colors[0]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_COLOR; } /* secondary color */ if (colors[1]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; } /* fog coord, not fog blend factor */ if (fog) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; } @@ -125,7 +125,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) uint hwtc; if (texCoords[i]) { hwtc = TEXCOORDFMT_4D; - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } else { diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index c5e9084d12e..cbac4175c8f 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -231,7 +231,7 @@ i915_update_texture(struct i915_context *i915, { const struct pipe_texture *pt = &tex->base; uint format, pitch; - const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + const uint width = pt->width0, height = pt->height0, depth = pt->depth0; const uint num_levels = pt->last_level; unsigned max_lod = num_levels * 4; unsigned tiled = MS3_USE_FENCE_REGS; diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index ab8331f3e64..c693eb30e87 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -32,6 +32,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_inlines.h" #include "pipe/internal/p_winsys_screen.h" +#include "util/u_format.h" #include "util/u_tile.h" #include "util/u_rect.h" @@ -48,17 +49,19 @@ i915_surface_copy(struct pipe_context *pipe, { struct i915_texture *dst_tex = (struct i915_texture *)dst->texture; struct i915_texture *src_tex = (struct i915_texture *)src->texture; + struct pipe_texture *dpt = &dst_tex->base; + struct pipe_texture *spt = &src_tex->base; assert( dst != src ); - assert( dst_tex->base.block.size == src_tex->base.block.size ); - assert( dst_tex->base.block.width == src_tex->base.block.height ); - assert( dst_tex->base.block.height == src_tex->base.block.height ); - assert( dst_tex->base.block.width == 1 ); - assert( dst_tex->base.block.height == 1 ); + assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) ); + assert( util_format_get_blockwidth(dpt->format) == util_format_get_blockwidth(spt->format) ); + assert( util_format_get_blockheight(dpt->format) == util_format_get_blockheight(spt->format) ); + assert( util_format_get_blockwidth(dpt->format) == 1 ); + assert( util_format_get_blockheight(dpt->format) == 1 ); i915_copy_blit( i915_context(pipe), FALSE, - dst_tex->base.block.size, + util_format_get_blocksize(dpt->format), (unsigned short) src_tex->stride, src_tex->buffer, src->offset, (unsigned short) dst_tex->stride, dst_tex->buffer, dst->offset, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); @@ -72,12 +75,13 @@ i915_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { struct i915_texture *tex = (struct i915_texture *)dst->texture; + struct pipe_texture *pt = &tex->base; - assert(tex->base.block.width == 1); - assert(tex->base.block.height == 1); + assert(util_format_get_blockwidth(pt->format) == 1); + assert(util_format_get_blockheight(pt->format) == 1); i915_fill_blit( i915_context(pipe), - tex->base.block.size, + util_format_get_blocksize(pt->format), (unsigned short) tex->stride, tex->buffer, dst->offset, (short) dstx, (short) dsty, diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index 286c9ace8e5..50a9e19094b 100644 --- a/src/gallium/drivers/i915/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -35,6 +35,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/internal/p_winsys_screen.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -74,6 +75,9 @@ static const int step_offsets[6][2] = { {-1, 1} }; +/* XXX really need twice the size if x is already pot? + Otherwise just use util_next_power_of_two? +*/ static unsigned power_of_two(unsigned x) { @@ -83,13 +87,6 @@ power_of_two(unsigned x) return value; } -static unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - /* * More advanced helper funcs */ @@ -101,17 +98,8 @@ i915_miptree_set_level_info(struct i915_texture *tex, unsigned nr_images, unsigned w, unsigned h, unsigned d) { - struct pipe_texture *pt = &tex->base; - assert(level < PIPE_MAX_TEXTURE_LEVELS); - pt->width[level] = w; - pt->height[level] = h; - pt->depth[level] = d; - - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); - tex->nr_images[level] = nr_images; /* @@ -142,7 +130,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex, assert(img < tex->nr_images[level]); - tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size; + tex->image_offset[level][img] = y * tex->stride + x * util_format_get_blocksize(tex->base.format); /* printf("%s level %d img %d pos %d,%d image_offset %x\n", @@ -164,28 +152,28 @@ i915_scanout_layout(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - if (pt->last_level > 0 || pt->block.size != 4) + if (pt->last_level > 0 || util_format_get_blocksize(pt->format) != 4) return FALSE; i915_miptree_set_level_info(tex, 0, 1, - tex->base.width[0], - tex->base.height[0], + pt->width0, + pt->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - if (tex->base.width[0] >= 240) { - tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); - tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + if (pt->width0 >= 240) { + tex->stride = power_of_two(util_format_get_stride(pt->format, pt->width0)); + tex->total_nblocksy = align(util_format_get_nblocksy(pt->format, pt->height0), 8); tex->hw_tiled = INTEL_TILE_X; - } else if (tex->base.width[0] == 64 && tex->base.height[0] == 64) { - tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); - tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + } else if (pt->width0 == 64 && pt->height0 == 64) { + tex->stride = power_of_two(util_format_get_stride(pt->format, pt->width0)); + tex->total_nblocksy = align(util_format_get_nblocksy(pt->format, pt->height0), 8); } else { return FALSE; } debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->block.size, + pt->width0, pt->height0, util_format_get_blocksize(pt->format), tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); return TRUE; @@ -199,25 +187,25 @@ i915_display_target_layout(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - if (pt->last_level > 0 || pt->block.size != 4) + if (pt->last_level > 0 || util_format_get_blocksize(pt->format) != 4) return FALSE; /* fallback to normal textures for small textures */ - if (tex->base.width[0] < 240) + if (pt->width0 < 240) return FALSE; i915_miptree_set_level_info(tex, 0, 1, - tex->base.width[0], - tex->base.height[0], + pt->width0, + pt->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); - tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + tex->stride = power_of_two(util_format_get_stride(pt->format, pt->width0)); + tex->total_nblocksy = align(util_format_get_nblocksy(pt->format, pt->height0), 8); tex->hw_tiled = INTEL_TILE_X; debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->block.size, + pt->width0, pt->height0, util_format_get_blocksize(pt->format), tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); return TRUE; @@ -228,36 +216,34 @@ i915_miptree_layout_2d(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->width0); /* used for scanouts that need special layouts */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if (pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) if (i915_scanout_layout(tex)) return; /* for shared buffers we use some very like scanout */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) if (i915_display_target_layout(tex)) return; - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4); tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 1, width, height, 1); i915_miptree_set_image_offset(tex, level, 0, 0, tex->total_nblocksy); - nblocksy = round_up(MAX2(2, nblocksy), 2); + nblocksy = align(MAX2(2, nblocksy), 2); tex->total_nblocksy += nblocksy; - width = minify(width); - height = minify(height); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); + width = u_minify(width, 1); + height = u_minify(height, 1); + nblocksy = util_format_get_nblocksy(pt->format, height); } } @@ -267,16 +253,15 @@ i915_miptree_layout_3d(struct i915_texture *tex) struct pipe_texture *pt = &tex->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; + unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->height0); unsigned stack_nblocksy = 0; /* Calculate the size of a single slice. */ - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4); /* XXX: hardware expects/requires 9 levels at minimum. */ @@ -285,44 +270,41 @@ i915_miptree_layout_3d(struct i915_texture *tex) stack_nblocksy += MAX2(2, nblocksy); - width = minify(width); - height = minify(height); - depth = minify(depth); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); + width = u_minify(width, 1); + height = u_minify(height, 1); + nblocksy = util_format_get_nblocksy(pt->format, height); } /* Fixup depth image_offsets: */ - depth = pt->depth[0]; for (level = 0; level <= pt->last_level; level++) { unsigned i; for (i = 0; i < depth; i++) i915_miptree_set_image_offset(tex, level, i, 0, i * stack_nblocksy); - depth = minify(depth); + depth = u_minify(depth, 1); } /* Multiply slice size by texture depth for total size. It's * remarkable how wasteful of memory the i915 texture layouts * are. They are largely fixed in the i945. */ - tex->total_nblocksy = stack_nblocksy * pt->depth[0]; + tex->total_nblocksy = stack_nblocksy * pt->depth0; } static void i915_miptree_layout_cube(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - unsigned width = pt->width[0], height = pt->height[0]; - const unsigned nblocks = pt->nblocksx[0]; + unsigned width = pt->width0, height = pt->height0; + const unsigned nblocks = util_format_get_nblocksx(pt->format, pt->width0); unsigned level; unsigned face; assert(width == height); /* cubemap images are square */ /* double pitch for cube layouts */ - tex->stride = round_up(nblocks * pt->block.size * 2, 4); + tex->stride = align(nblocks * util_format_get_blocksize(pt->format) * 2, 4); tex->total_nblocksy = nblocks * 4; for (level = 0; level <= pt->last_level; level++) { @@ -383,10 +365,10 @@ i945_miptree_layout_2d(struct i915_texture *tex) unsigned level; unsigned x = 0; unsigned y = 0; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned nblocksx = util_format_get_nblocksx(pt->format, pt->width0); + unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->height0); /* used for scanouts that need special layouts */ if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) @@ -398,7 +380,7 @@ i945_miptree_layout_2d(struct i915_texture *tex) if (i915_display_target_layout(tex)) return; - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4); /* May need to adjust pitch to accomodate the placement of * the 2nd mipmap level. This occurs when the alignment @@ -407,11 +389,11 @@ i945_miptree_layout_2d(struct i915_texture *tex) */ if (pt->last_level > 0) { unsigned mip1_nblocksx - = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) - + pf_get_nblocksx(&pt->block, minify(minify(width))); + = align(util_format_get_nblocksx(pt->format, u_minify(width, 1)), align_x) + + util_format_get_nblocksx(pt->format, u_minify(width, 2)); if (mip1_nblocksx > nblocksx) - tex->stride = mip1_nblocksx * pt->block.size; + tex->stride = mip1_nblocksx * util_format_get_blocksize(pt->format); } /* Pitch must be a whole number of dwords @@ -439,10 +421,10 @@ i945_miptree_layout_2d(struct i915_texture *tex) y += nblocksy; } - width = minify(width); - height = minify(height); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); + width = u_minify(width, 1); + height = u_minify(height, 1); + nblocksx = util_format_get_nblocksx(pt->format, width); + nblocksy = util_format_get_nblocksy(pt->format, height); } } @@ -450,20 +432,19 @@ static void i945_miptree_layout_3d(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; + unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->width0); unsigned pack_x_pitch, pack_x_nr; unsigned pack_y_pitch; unsigned level; - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4); tex->total_nblocksy = 0; - pack_y_pitch = MAX2(pt->nblocksy[0], 2); - pack_x_pitch = tex->stride / pt->block.size; + pack_y_pitch = MAX2(nblocksy, 2); + pack_x_pitch = tex->stride / util_format_get_blocksize(pt->format); pack_x_nr = 1; for (level = 0; level <= pt->last_level; level++) { @@ -488,18 +469,17 @@ i945_miptree_layout_3d(struct i915_texture *tex) if (pack_x_pitch > 4) { pack_x_pitch >>= 1; pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); + assert(pack_x_pitch * pack_x_nr * util_format_get_blocksize(pt->format) <= tex->stride); } if (pack_y_pitch > 2) { pack_y_pitch >>= 1; } - width = minify(width); - height = minify(height); - depth = minify(depth); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + nblocksy = util_format_get_nblocksy(pt->format, height); } } @@ -509,13 +489,13 @@ i945_miptree_layout_cube(struct i915_texture *tex) struct pipe_texture *pt = &tex->base; unsigned level; - const unsigned nblocks = pt->nblocksx[0]; + const unsigned nblocks = util_format_get_nblocksx(pt->format, pt->width0); unsigned face; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; /* - printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); + printf("%s %i, %i\n", __FUNCTION__, pt->width0, pt->height0); */ assert(width == height); /* cubemap images are square */ @@ -529,9 +509,9 @@ i945_miptree_layout_cube(struct i915_texture *tex) * or the final row of 4x4, 2x2 and 1x1 faces below this. */ if (nblocks > 32) - tex->stride = round_up(nblocks * pt->block.size * 2, 4); + tex->stride = align(nblocks * util_format_get_blocksize(pt->format) * 2, 4); else - tex->stride = 14 * 8 * pt->block.size; + tex->stride = 14 * 8 * util_format_get_blocksize(pt->format); tex->total_nblocksy = nblocks * 4; @@ -651,9 +631,6 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->base.reference, 1); tex->base.screen = screen; - tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); - tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); - if (is->is_i945) { if (!i945_miptree_layout(tex)) goto fail; @@ -667,7 +644,7 @@ i915_texture_create(struct pipe_screen *screen, /* for scanouts and cursors, cursors arn't scanouts */ - if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width[0] != 64) + if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width0 != 64) buf_usage = INTEL_NEW_SCANOUT; else buf_usage = INTEL_NEW_TEXTURE; @@ -710,7 +687,7 @@ i915_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -724,7 +701,7 @@ i915_texture_blanket(struct pipe_screen * screen, tex->stride = stride[0]; - i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); pipe_buffer_reference(&tex->buffer, buffer); @@ -788,8 +765,8 @@ i915_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = offset; ps->usage = flags; } @@ -835,14 +812,10 @@ i915_get_tex_transfer(struct pipe_screen *screen, trans = CALLOC_STRUCT(i915_transfer); if (trans) { pipe_texture_reference(&trans->base.texture, texture); - trans->base.format = trans->base.format; trans->base.x = x; trans->base.y = y; trans->base.width = w; trans->base.height = h; - trans->base.block = texture->block; - trans->base.nblocksx = texture->nblocksx[level]; - trans->base.nblocksy = texture->nblocksy[level]; trans->base.stride = tex->stride; trans->offset = offset; trans->base.usage = usage; @@ -858,6 +831,7 @@ i915_transfer_map(struct pipe_screen *screen, struct intel_winsys *iws = i915_screen(tex->base.screen)->iws; char *map; boolean write = FALSE; + enum pipe_format format = tex->base.format; if (transfer->usage & PIPE_TRANSFER_WRITE) write = TRUE; @@ -867,8 +841,8 @@ i915_transfer_map(struct pipe_screen *screen, return NULL; return map + i915_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / util_format_get_blockheight(format) * transfer->stride + + transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } static void @@ -919,7 +893,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -933,7 +907,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen, tex->stride = stride; - i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); tex->buffer = buffer; diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile new file mode 100644 index 00000000000..95fd3cd69bd --- /dev/null +++ b/src/gallium/drivers/i965/Makefile @@ -0,0 +1,74 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i965 + +C_SOURCES = \ + brw_cc.c \ + brw_clip.c \ + brw_clip_line.c \ + brw_clip_point.c \ + brw_clip_state.c \ + brw_clip_tri.c \ + brw_clip_unfilled.c \ + brw_clip_util.c \ + brw_context.c \ + brw_curbe.c \ + brw_disasm.c \ + brw_draw.c \ + brw_draw_upload.c \ + brw_eu.c \ + brw_eu_debug.c \ + brw_eu_emit.c \ + brw_eu_util.c \ + brw_gs.c \ + brw_gs_emit.c \ + brw_gs_state.c \ + brw_misc_state.c \ + brw_pipe_blend.c \ + brw_pipe_depth.c \ + brw_pipe_fb.c \ + brw_pipe_query.c \ + brw_pipe_shader.c \ + brw_pipe_flush.c \ + brw_pipe_misc.c \ + brw_pipe_sampler.c \ + brw_pipe_vertex.c \ + brw_pipe_clear.c \ + brw_pipe_rast.c \ + brw_sf.c \ + brw_sf_emit.c \ + brw_sf_state.c \ + brw_state_batch.c \ + brw_state_debug.c \ + brw_state_cache.c \ + brw_state_upload.c \ + brw_structs_dump.c \ + brw_swtnl.c \ + brw_urb.c \ + brw_util.c \ + brw_vs.c \ + brw_vs_emit.c \ + brw_vs_state.c \ + brw_vs_surface_state.c \ + brw_wm.c \ + brw_wm_debug.c \ + brw_wm_emit.c \ + brw_wm_fp.c \ + brw_wm_iz.c \ + brw_wm_pass0.c \ + brw_wm_pass1.c \ + brw_wm_pass2.c \ + brw_wm_sampler_state.c \ + brw_wm_state.c \ + brw_wm_surface_state.c \ + brw_screen.c \ + brw_screen_buffers.c \ + brw_screen_tex_layout.c \ + brw_screen_texture.c \ + brw_screen_surface.c \ + brw_batchbuffer.c \ + brw_winsys_debug.c \ + intel_decode.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript new file mode 100644 index 00000000000..9c2faaf4b49 --- /dev/null +++ b/src/gallium/drivers/i965/SConscript @@ -0,0 +1,77 @@ +Import('*') + +env = env.Clone() + +i965 = env.ConvenienceLibrary( + target = 'i965', + source = [ + 'brw_batchbuffer.c', + 'brw_cc.c', + 'brw_clip.c', + 'brw_clip_line.c', + 'brw_clip_point.c', + 'brw_clip_state.c', + 'brw_clip_tri.c', + 'brw_clip_unfilled.c', + 'brw_clip_util.c', + 'brw_context.c', + 'brw_curbe.c', + 'brw_disasm.c', + 'brw_draw.c', + 'brw_draw_upload.c', + 'brw_eu.c', + 'brw_eu_debug.c', + 'brw_eu_emit.c', + 'brw_eu_util.c', + 'brw_gs.c', + 'brw_gs_emit.c', + 'brw_gs_state.c', + 'brw_misc_state.c', + 'brw_pipe_blend.c', + 'brw_pipe_clear.c', + 'brw_pipe_depth.c', + 'brw_pipe_fb.c', + 'brw_pipe_flush.c', + 'brw_pipe_misc.c', + 'brw_pipe_query.c', + 'brw_pipe_rast.c', + 'brw_pipe_sampler.c', + 'brw_pipe_shader.c', + 'brw_pipe_vertex.c', + 'brw_screen_buffers.c', + 'brw_screen.c', + 'brw_screen_surface.c', + 'brw_screen_tex_layout.c', + 'brw_screen_texture.c', + 'brw_structs_dump.c', + 'brw_sf.c', + 'brw_sf_emit.c', + 'brw_sf_state.c', + 'brw_state_batch.c', + 'brw_state_cache.c', +# 'brw_state_debug.c', + 'brw_state_upload.c', + 'brw_swtnl.c', + 'brw_urb.c', + 'brw_util.c', + 'brw_vs.c', + 'brw_vs_emit.c', + 'brw_vs_state.c', + 'brw_vs_surface_state.c', + 'brw_wm.c', +# 'brw_wm_constant_buffer.c', + 'brw_wm_debug.c', + 'brw_wm_emit.c', + 'brw_wm_fp.c', +# 'brw_wm_glsl.c', + 'brw_wm_iz.c', + 'brw_wm_pass0.c', + 'brw_wm_pass1.c', + 'brw_wm_pass2.c', + 'brw_wm_sampler_state.c', + 'brw_wm_state.c', + 'brw_wm_surface_state.c', + 'intel_decode.c', + ]) + +Export('i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c new file mode 100644 index 00000000000..22607dc6083 --- /dev/null +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -0,0 +1,202 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" + +#include "brw_batchbuffer.h" +#include "brw_reg.h" +#include "brw_winsys.h" +#include "brw_debug.h" +#include "brw_structs.h" + +#define ALWAYS_EMIT_MI_FLUSH 1 + +enum pipe_error +brw_batchbuffer_reset(struct brw_batchbuffer *batch) +{ + enum pipe_error ret; + + ret = batch->sws->bo_alloc( batch->sws, + BRW_BUFFER_TYPE_BATCH, + BRW_BATCH_SIZE, 4096, + &batch->buf ); + if (ret) + return ret; + + batch->size = BRW_BATCH_SIZE; + + /* With map_range semantics, the winsys can decide whether to + * inject a malloc'ed bounce buffer instead of mapping directly. + */ + batch->map = batch->sws->bo_map(batch->buf, + BRW_DATA_BATCH_BUFFER, + 0, batch->size, + GL_TRUE, + GL_TRUE, + GL_TRUE); + + batch->ptr = batch->map; + return PIPE_OK; +} + +struct brw_batchbuffer * +brw_batchbuffer_alloc(struct brw_winsys_screen *sws, + struct brw_chipset chipset) +{ + struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); + + batch->sws = sws; + batch->chipset = chipset; + brw_batchbuffer_reset(batch); + + return batch; +} + +void +brw_batchbuffer_free(struct brw_batchbuffer *batch) +{ + if (batch->map) { + batch->sws->bo_unmap(batch->buf); + batch->map = NULL; + } + + bo_reference(&batch->buf, NULL); + FREE(batch); +} + + +void +_brw_batchbuffer_flush(struct brw_batchbuffer *batch, + const char *file, + int line) +{ + GLuint used = batch->ptr - batch->map; + + if (used == 0) + return; + + /* Post-swap throttling done by the state tracker. + */ + + if (BRW_DEBUG & DEBUG_BATCH) + debug_printf("%s:%d: Batchbuffer flush with %db used\n", + file, line, used); + + if (ALWAYS_EMIT_MI_FLUSH) { + *(GLuint *) (batch->ptr) = MI_FLUSH | BRW_FLUSH_STATE_CACHE; + batch->ptr += 4; + used = batch->ptr - batch->map; + } + + /* Round batchbuffer usage to 2 DWORDs. + */ + if ((used & 4) == 0) { + *(GLuint *) (batch->ptr) = 0; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; + } + + /* Mark the end of the buffer. + */ + *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; + batch->ptr += 4; + used = batch->ptr - batch->map; + + batch->sws->bo_flush_range(batch->buf, 0, used); + batch->sws->bo_unmap(batch->buf); + batch->map = NULL; + batch->ptr = NULL; + + batch->sws->bo_exec(batch->buf, used ); + + if (BRW_DEBUG & DEBUG_SYNC) { + /* Abuse map/unmap to achieve wait-for-fence. + * + * XXX: hide this inside the winsys and export a fence + * interface. + */ + debug_printf("waiting for idle\n"); + batch->sws->bo_wait_idle(batch->buf); + } + + /* Reset the buffer: + */ + brw_batchbuffer_reset(batch); +} + + +/* The OUT_RELOC() macro ends up here, generating a relocation within + * the batch buffer. + */ +enum pipe_error +brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + uint32_t usage, + uint32_t delta) +{ + int ret; + + if (batch->ptr - batch->map > batch->buf->size) { + debug_printf("bad relocation ptr %p map %p offset %d size %d\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + + return PIPE_ERROR_OUT_OF_MEMORY; + } + + ret = batch->sws->bo_emit_reloc(batch->buf, + usage, + delta, + batch->ptr - batch->map, + buffer); + if (ret != 0) + return ret; + + /* bo_emit_reloc was resposible for writing a zero into the + * batchbuffer if necessary. Just need to update our pointer. + */ + batch->ptr += 4; + + return 0; +} + +enum pipe_error +brw_batchbuffer_data(struct brw_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode) +{ + enum pipe_error ret; + + assert((bytes & 3) == 0); + + ret = brw_batchbuffer_require_space(batch, bytes); + if (ret) + return ret; + + memcpy(batch->ptr, data, bytes); + batch->ptr += bytes; + return 0; +} diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h new file mode 100644 index 00000000000..7473f5bea4d --- /dev/null +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -0,0 +1,148 @@ +#ifndef BRW_BATCHBUFFER_H +#define BRW_BATCHBUFFER_H + +#include "util/u_debug.h" + +#include "brw_types.h" +#include "brw_winsys.h" +#include "brw_reg.h" + +#define BATCH_SZ 16384 +#define BATCH_RESERVED 16 + +/* All ignored: + */ +enum cliprect_mode { + IGNORE_CLIPRECTS, + LOOP_CLIPRECTS, + NO_LOOP_CLIPRECTS, + REFERENCES_CLIPRECTS +}; + + + + +struct brw_batchbuffer { + + struct brw_winsys_screen *sws; + struct brw_winsys_buffer *buf; + struct brw_chipset chipset; + + /** + * Values exported to speed up the writing the batchbuffer, + * instead of having to go trough a accesor function for + * each dword written. + */ + /*{@*/ + uint8_t *map; + uint8_t *ptr; + size_t size; + struct { + uint8_t *end_ptr; + } emit; + + + size_t relocs; + size_t max_relocs; + /*@}*/ +}; + +struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws, + struct brw_chipset chipset ); + +void brw_batchbuffer_free(struct brw_batchbuffer *batch); + +void _brw_batchbuffer_flush(struct brw_batchbuffer *batch, + const char *file, int line); + + +enum pipe_error +brw_batchbuffer_reset(struct brw_batchbuffer *batch); + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +int brw_batchbuffer_data(struct brw_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode); + + +int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + enum brw_buffer_usage usage, + uint32_t offset); + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static INLINE GLint +brw_batchbuffer_space(struct brw_batchbuffer *batch) +{ + return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); +} + + +static INLINE void +brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword) +{ + assert(batch->map); + assert(brw_batchbuffer_space(batch) >= 4); + *(GLuint *) (batch->ptr) = dword; + batch->ptr += 4; +} + +static INLINE enum pipe_error +brw_batchbuffer_require_space(struct brw_batchbuffer *batch, + GLuint sz) +{ + assert(sz < batch->size - 8); + if (brw_batchbuffer_space(batch) < sz) { + assert(0); + return PIPE_ERROR_OUT_OF_MEMORY; + } +#ifdef DEBUG + batch->emit.end_ptr = batch->ptr + sz; +#endif + return 0; +} + +/* Here are the crusty old macros, to be removed: + */ +#define BEGIN_BATCH(n, cliprect_mode) do { \ + brw_batchbuffer_require_space(brw->batch, (n)*4); \ + } while (0) + +#define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d) + +#define OUT_RELOC(buf, usage, delta) do { \ + assert((unsigned) (delta) < buf->size); \ + brw_batchbuffer_emit_reloc(brw->batch, buf, \ + usage, delta); \ + } while (0) + +#ifdef DEBUG +#define ADVANCE_BATCH() do { \ + unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr; \ + if (_n != 0) { \ + debug_printf("%s: %d too many bytes emitted to batch\n", \ + __FUNCTION__, _n); \ + abort(); \ + } \ + brw->batch->emit.end_ptr = NULL; \ + } while(0) +#else +#define ADVANCE_BATCH() +#endif + +static INLINE void +brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch) +{ + brw_batchbuffer_require_space(batch, 4); + brw_batchbuffer_emit_dword(batch, MI_FLUSH); +} + +#endif diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c new file mode 100644 index 00000000000..3e070f5591a --- /dev/null +++ b/src/gallium/drivers/i965/brw_cc.c @@ -0,0 +1,111 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + +static enum pipe_error prepare_cc_vp( struct brw_context *brw ) +{ + return brw_cache_data( &brw->cache, + BRW_CC_VP, + &brw->curr.ccv, + NULL, 0, + &brw->cc.reloc[CC_RELOC_VP].bo ); +} + +const struct brw_tracked_state brw_cc_vp = { + .dirty = { + .mesa = PIPE_NEW_VIEWPORT, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .prepare = prepare_cc_vp +}; + + +/* A long-winded way to OR two unsigned integers together: + */ +static INLINE struct brw_cc3 +combine_cc3( struct brw_cc3 a, struct brw_cc3 b ) +{ + union { struct brw_cc3 cc3; unsigned i; } ca, cb; + ca.cc3 = a; + cb.cc3 = b; + ca.i |= cb.i; + return ca.cc3; +} + + +static int prepare_cc_unit( struct brw_context *brw ) +{ + brw->cc.cc.cc0 = brw->curr.zstencil->cc0; + brw->cc.cc.cc1 = brw->curr.zstencil->cc1; + brw->cc.cc.cc2 = brw->curr.zstencil->cc2; + brw->cc.cc.cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 ); + + brw->cc.cc.cc5 = brw->curr.blend->cc5; + brw->cc.cc.cc6 = brw->curr.blend->cc6; + brw->cc.cc.cc7 = brw->curr.zstencil->cc7; + + return brw_cache_data_sz(&brw->cache, BRW_CC_UNIT, + &brw->cc.cc, sizeof(brw->cc.cc), + brw->cc.reloc, 1, + &brw->cc.state_bo); +} + +const struct brw_tracked_state brw_cc_unit = { + .dirty = { + .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND, + .brw = 0, + .cache = CACHE_NEW_CC_VP + }, + .prepare = prepare_cc_unit, +}; + + +void brw_hw_cc_init( struct brw_context *brw ) +{ + make_reloc(&brw->cc.reloc[0], + BRW_USAGE_STATE, + 0, + offsetof(struct brw_cc_unit_state, cc4), + NULL); +} + + +void brw_hw_cc_cleanup( struct brw_context *brw ) +{ + bo_reference(&brw->cc.state_bo, NULL); + bo_reference(&brw->cc.reloc[0].bo, NULL); +} diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c new file mode 100644 index 00000000000..d67a1a62633 --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip.c @@ -0,0 +1,224 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "pipe/p_state.h" + +#include "util/u_math.h" + +#include "brw_screen.h" +#include "brw_batchbuffer.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_pipe_rast.h" +#include "brw_clip.h" + + +#define FRONT_UNFILLED_BIT 0x1 +#define BACK_UNFILLED_BIT 0x2 + + +static enum pipe_error +compile_clip_prog( struct brw_context *brw, + struct brw_clip_prog_key *key, + struct brw_winsys_buffer **bo_out ) +{ + enum pipe_error ret; + struct brw_clip_compile c; + const GLuint *program; + GLuint program_size; + GLuint delta; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func); + + c.func.single_program_flow = 1; + + c.chipset = brw->chipset; + c.key = *key; + c.need_ff_sync = c.chipset.is_igdng; + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.header_position_offset = ATTR_SIZE; + + if (c.chipset.is_igdng) + delta = 3 * REG_SIZE; + else + delta = REG_SIZE; + + c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; + + if (c.key.output_color0 != BRW_OUTPUT_NOT_PRESENT) + c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE; + + if (c.key.output_color1 != BRW_OUTPUT_NOT_PRESENT) + c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE; + + if (c.key.output_bfc0 != BRW_OUTPUT_NOT_PRESENT) + c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE; + + if (c.key.output_bfc1 != BRW_OUTPUT_NOT_PRESENT) + c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE; + + if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT) + c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + c.nr_bytes = c.nr_regs * REG_SIZE; + + c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Would ideally have the option of producing a program which could + * do all three: + */ + switch (key->primitive) { + case PIPE_PRIM_TRIANGLES: + if (key->do_unfilled) + brw_emit_unfilled_clip( &c ); + else + brw_emit_tri_clip( &c ); + break; + case PIPE_PRIM_LINES: + brw_emit_line_clip( &c ); + break; + case PIPE_PRIM_POINTS: + brw_emit_point_clip( &c ); + break; + default: + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + + + /* get the program + */ + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; + + /* Upload + */ + ret = brw_upload_cache( &brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->clip.prog_data, + bo_out ); + if (ret) + return ret; + + return PIPE_OK; +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static enum pipe_error +upload_clip_prog(struct brw_context *brw) +{ + const struct brw_vertex_shader *vs = brw->curr.vertex_shader; + struct brw_clip_prog_key key; + enum pipe_error ret; + + /* Populate the key, starting from the almost-complete version from + * the rast state. + */ + + /* PIPE_NEW_RAST */ + key = brw->curr.rast->clip_key; + + /* BRW_NEW_REDUCED_PRIMITIVE */ + key.primitive = brw->reduced_primitive; + + /* XXX: if edgeflag is moved to a proper TGSI vs output, can remove + * dependency on CACHE_NEW_VS_PROG + */ + /* CACHE_NEW_VS_PROG */ + key.nr_attrs = brw->vs.prog_data->nr_outputs; + + /* PIPE_NEW_VS */ + key.output_hpos = vs->output_hpos; + key.output_color0 = vs->output_color0; + key.output_color1 = vs->output_color1; + key.output_bfc0 = vs->output_bfc0; + key.output_bfc1 = vs->output_bfc1; + key.output_edgeflag = vs->output_edgeflag; + + /* PIPE_NEW_CLIP */ + key.nr_userclip = brw->curr.ucp.nr; + + /* Already cached? + */ + if (brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + NULL, 0, + &brw->clip.prog_data, + &brw->clip.prog_bo)) + return PIPE_OK; + + /* Compile new program: + */ + ret = compile_clip_prog( brw, &key, &brw->clip.prog_bo ); + if (ret) + return ret; + + return PIPE_OK; +} + + +const struct brw_tracked_state brw_clip_prog = { + .dirty = { + .mesa = (PIPE_NEW_RAST | + PIPE_NEW_CLIP), + .brw = (BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .prepare = upload_clip_prog +}; diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h new file mode 100644 index 00000000000..80e3a11a370 --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip.h @@ -0,0 +1,199 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + +#include "pipe/p_state.h" +#include "brw_reg.h" +#include "brw_eu.h" + +#define MAX_VERTS (3+6+6) + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { + GLuint nr_attrs:6; + GLuint primitive:4; + GLuint nr_userclip:3; + GLuint do_flat_shading:1; + GLuint do_unfilled:1; + GLuint fill_cw:2; /* includes cull information */ + GLuint fill_ccw:2; /* includes cull information */ + GLuint offset_cw:1; + GLuint offset_ccw:1; + GLuint copy_bfc_cw:1; + GLuint copy_bfc_ccw:1; + GLuint clip_mode:3; + GLuint output_hpos:6; /* not always zero? */ + + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; + GLuint pad1:2; + + GLfloat offset_factor; + GLfloat offset_units; +}; + +struct brw_clip_prog_data { + GLuint curb_read_length; /* user planes? */ + GLuint clip_mode; + GLuint urb_read_length; + GLuint total_grf; +}; + +#define CLIP_LINE 0 +#define CLIP_POINT 1 +#define CLIP_FILL 2 +#define CLIP_CULL 3 + + +#define PRIM_MASK (0x1f) + +struct brw_clip_compile { + struct brw_compile func; + struct brw_clip_prog_key key; + struct brw_clip_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_VERTS]; + + struct brw_reg t; + struct brw_reg t0, t1; + struct brw_reg dp0, dp1; + + struct brw_reg dpPrev; + struct brw_reg dp; + struct brw_reg loopcount; + struct brw_reg nr_verts; + struct brw_reg planemask; + + struct brw_reg inlist; + struct brw_reg outlist; + struct brw_reg freelist; + + struct brw_reg dir; + struct brw_reg tmp0, tmp1; + struct brw_reg offset; + + struct brw_reg fixed_planes; + struct brw_reg plane_equation; + + struct brw_reg ff_sync; + } reg; + + /* 3 different ways of expressing vertex size, including + * key.nr_attrs. + */ + GLuint nr_regs; + GLuint nr_bytes; + + GLuint first_tmp; + GLuint last_tmp; + + GLboolean need_direction; + struct brw_chipset chipset; + + GLuint last_mrf; + + GLuint header_position_offset; + GLboolean need_ff_sync; + + GLuint nr_color_attrs; + GLuint offset_color0; + GLuint offset_color1; + GLuint offset_bfc0; + GLuint offset_bfc1; + + GLuint offset_hpos; + GLuint offset_edgeflag; +}; + +#define ATTR_SIZE (4*4) + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + GLboolean force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + GLboolean allocate, + GLboolean eot, + GLuint header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_colors( struct brw_clip_compile *c, + GLuint to, GLuint from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +struct brw_reg get_tmp( struct brw_clip_compile *c ); + +void brw_clip_project_position(struct brw_clip_compile *c, + struct brw_reg pos ); +void brw_clip_ff_sync(struct brw_clip_compile *c); +void brw_clip_init_ff_sync(struct brw_clip_compile *c); +#endif diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c new file mode 100644 index 00000000000..54282d975ed --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -0,0 +1,271 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_debug.h" + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < 4; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.t0 = brw_vec1_grf(i, 1); + c->reg.t1 = brw_vec1_grf(i, 2); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp1 = brw_vec1_grf(i, 4); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +/* Line clipping, more or less following the following algorithm: + * + * for (p=0;p<MAX_PLANES;p++) { + * if (clipmask & (1 << p)) { + * GLfloat dp0 = DOTPROD( vtx0, plane[p] ); + * GLfloat dp1 = DOTPROD( vtx1, plane[p] ); + * + * if (IS_NEGATIVE(dp1)) { + * GLfloat t = dp1 / (dp1 - dp0); + * if (t > t1) t1 = t; + * } else { + * GLfloat t = dp0 / (dp0 - dp1); + * if (t > t0) t0 = t; + * } + * + * if (t0 + t1 >= 1.0) + * return; + * } + * } + * + * interp( ctx, newvtx0, vtx0, vtx1, t0 ); + * interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx0 = brw_indirect(0, 0); + struct brw_indirect vtx1 = brw_indirect(1, 0); + struct brw_indirect newvtx0 = brw_indirect(2, 0); + struct brw_indirect newvtx1 = brw_indirect(3, 0); + struct brw_indirect plane_ptr = brw_indirect(4, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *is_negative; + struct brw_instruction *is_neg2 = NULL; + struct brw_instruction *not_culled; + struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); + brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + + /* Note: init t0, t1 together: + */ + brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + + brw_clip_init_planes(c); + brw_clip_init_clipmask(c); + + /* -ve rhw workaround */ + if (c->chipset.is_965) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + } + + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + /* dp = DP4(vtx->position, plane) + */ + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset_hpos), c->reg.plane_equation); + + /* if (IS_NEGATIVE(dp1)) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset_hpos), c->reg.plane_equation); + is_negative = brw_IF(p, BRW_EXECUTE_1); + { + /* + * Both can be negative on GM965/G965 due to RHW workaround + * if so, this object should be rejected. + */ + if (c->chipset.is_965) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_neg2); + } + + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + is_negative = brw_ELSE(p, is_negative); + { + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + /* Only on GM965/G965 */ + if (c->chipset.is_965) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + } + + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + if (c->chipset.is_965) { + brw_ENDIF(p, is_neg2); + } + } + brw_ENDIF(p, is_negative); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* while (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); + + brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + not_culled = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); + + brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, not_culled); + brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ + brw_clip_line_alloc_regs(c); + brw_clip_init_ff_sync(c); + + if (c->key.do_flat_shading) + brw_clip_copy_colors(c, 0, 1); + + clip_and_emit_line(c); +} diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c new file mode 100644 index 00000000000..e0a5330556d --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_point.c @@ -0,0 +1,48 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ + /* Send an empty message to kill the thread: + */ + brw_clip_tri_alloc_regs(c, 0); + brw_clip_init_ff_sync(c); + + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c new file mode 100644 index 00000000000..5c3ccfd8d0d --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -0,0 +1,209 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_math.h" + +#include "brw_context.h" +#include "brw_clip.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_debug.h" + +struct brw_clip_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int clip_mode; + + unsigned int curbe_offset; + + unsigned int nr_urb_entries, urb_size; + + GLboolean depth_clamp; +}; + +static void +clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_CLIP_PROG */ + key->total_grf = brw->clip.prog_data->total_grf; + key->urb_entry_read_length = brw->clip.prog_data->urb_read_length; + key->curb_entry_read_length = brw->clip.prog_data->curb_read_length; + key->clip_mode = brw->clip.prog_data->clip_mode; + + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.clip_start; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_clip_entries; + key->urb_size = brw->urb.vsize; + + /* */ + key->depth_clamp = 0; /* XXX: add this to gallium: ctx->Transform.DepthClamp; */ +} + +static enum pipe_error +clip_unit_create_from_key(struct brw_context *brw, + struct brw_clip_unit_key *key, + struct brw_winsys_reloc *reloc, + struct brw_winsys_buffer **bo_out) +{ + struct brw_clip_unit_state clip; + enum pipe_error ret; + + memset(&clip, 0, sizeof(clip)); + + clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + /* reloc */ + clip.thread0.kernel_start_pointer = 0; + + clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip.thread1.single_program_flow = 1; + + clip.thread3.urb_entry_read_length = key->urb_entry_read_length; + clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + clip.thread3.dispatch_grf_start_reg = 1; + clip.thread3.urb_entry_read_offset = 0; + + clip.thread4.nr_urb_entries = key->nr_urb_entries; + clip.thread4.urb_entry_allocation_size = key->urb_size - 1; + /* If we have enough clip URB entries to run two threads, do so. + */ + if (key->nr_urb_entries >= 10) { + /* Half of the URB entries go to each thread, and it has to be an + * even number. + */ + assert(key->nr_urb_entries % 2 == 0); + + /* Although up to 16 concurrent Clip threads are allowed on IGDNG, + * only 2 threads can output VUEs at a time. + */ + if (BRW_IS_IGDNG(brw)) + clip.thread4.max_threads = 16 - 1; + else + clip.thread4.max_threads = 2 - 1; + } else { + assert(key->nr_urb_entries >= 5); + clip.thread4.max_threads = 1 - 1; + } + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + clip.thread4.max_threads = 0; + + if (BRW_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + + clip.clip5.userclip_enable_flags = 0x7f; + clip.clip5.userclip_must_clip = 1; + clip.clip5.guard_band_enable = 0; + if (!key->depth_clamp) + clip.clip5.viewport_z_clip_enable = 1; + clip.clip5.viewport_xy_clip_enable = 1; + clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip5.clip_mode = key->clip_mode; + + if (BRW_IS_G4X(brw)) + clip.clip5.negative_w_clip_test = 1; + + clip.clip6.clipper_viewport_state_ptr = 0; + clip.viewport_xmin = -1; + clip.viewport_xmax = 1; + clip.viewport_ymin = -1; + clip.viewport_ymax = 1; + + ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, + key, sizeof(*key), + reloc, 1, + &clip, sizeof(clip), + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + +static int upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_key key; + struct brw_winsys_reloc reloc[1]; + unsigned grf_reg_count; + enum pipe_error ret; + + clip_unit_populate_key(brw, &key); + + grf_reg_count = align(key.total_grf, 16) / 16 - 1; + + /* clip program relocation + * + * XXX: these reloc structs are long lived and only need to be + * updated when the bound BO changes. Hopefully the stuff mixed in + * in the delta's is non-orthogonal. + */ + assert(brw->clip.prog_bo); + make_reloc(&reloc[0], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); + + + if (brw_search_cache(&brw->cache, BRW_CLIP_UNIT, + &key, sizeof(key), + reloc, 1, + NULL, + &brw->clip.state_bo)) + return PIPE_OK; + + /* Create new: + */ + ret = clip_unit_create_from_key(brw, &key, + reloc, + &brw->clip.state_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_clip_unit = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_CLIP_PROG + }, + .prepare = upload_clip_unit, +}; diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c new file mode 100644 index 00000000000..4cde7294ea0 --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -0,0 +1,595 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + if (c->key.nr_attrs & 1) { + for (j = 0; j < 3; j++) { + GLuint delta = c->key.nr_attrs*16 + 32; + + if (c->chipset.is_igdng) + delta = c->key.nr_attrs * 16 + 32 * 3; + + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); + } + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); + c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp = brw_vec1_grf(i, 4); + i++; + + c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->key.do_unfilled) { + c->reg.dir = brw_vec4_grf(i, 0); + c->reg.offset = brw_vec4_grf(i, 4); + i++; + c->reg.tmp0 = brw_vec4_grf(i, 0); + c->reg.tmp1 = brw_vec4_grf(i, 4); + i++; + } + + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + struct brw_instruction *is_rev; + + /* Initial list of indices for incoming vertexes: + */ + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* XXX: Is there an easier way to do this? Need to reverse every + * second tristrip element: Can ignore sometimes? + */ + is_rev = brw_IF(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(-1)); + } + is_rev = brw_ELSE(p, is_rev); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(1)); + } + brw_ENDIF(p, is_rev); + + brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + is_poly = brw_ELSE(p, is_poly); + { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } + brw_ENDIF(p, is_poly); +} + + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx = brw_indirect(0, 0); + struct brw_indirect vtxPrev = brw_indirect(1, 0); + struct brw_indirect vtxOut = brw_indirect(2, 0); + struct brw_indirect plane_ptr = brw_indirect(3, 0); + struct brw_indirect inlist_ptr = brw_indirect(4, 0); + struct brw_indirect outlist_ptr = brw_indirect(5, 0); + struct brw_indirect freelist_ptr = brw_indirect(6, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *vertex_loop; + struct brw_instruction *next_test; + struct brw_instruction *prev_test; + + brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + + brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + /* vtxOut = freelist_ptr++ + */ + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); + brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + + vertex_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* vtx = *input_ptr; + */ + brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + + /* IS_NEGATIVE(prev) */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation); + prev_test = brw_IF(p, BRW_EXECUTE_1); + { + /* IS_POSITIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + + /* Coming back in. + */ + brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + + /* If (vtxOut == 0) vtxOut = vtxPrev + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + + } + prev_test = brw_ELSE(p, prev_test); + { + /* *outlist_ptr++ = vtxPrev; + * nr_verts++; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + + /* IS_NEGATIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + /* Going out of bounds. Avoid division by zero as we + * know dp != dpPrev from DIFFERENT_SIGNS, above. + */ + brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + + /* If (vtxOut == 0) vtxOut = vtx + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + } + brw_ENDIF(p, prev_test); + + /* vtxPrev = vtx; + * inlist_ptr++; + */ + brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); + brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + + /* while (--loopcount != 0) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, vertex_loop); + + /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] + * inlist = outlist + * inlist_ptr = &inlist[0] + * outlist_ptr = &outlist[0] + */ + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); + brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); + brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* nr_verts >= 3 + */ + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + c->reg.nr_verts, + brw_imm_ud(3)); + + /* && (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop, *if_insn; + + /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, + c->reg.loopcount, + c->reg.nr_verts, + brw_imm_d(-2)); + + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect vptr = brw_indirect(1, 0); + + brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + + brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); + } + brw_ENDIF(p, if_insn); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ + brw_clip_init_planes(c); + + brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + do_clip_tri(c); + } + brw_ENDIF(p, do_clip); +} + +static void brw_clip_test( struct brw_clip_compile *c ) +{ + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + struct brw_instruction *is_outside; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos)); + brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos)); + brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos)); + brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); + + /* test nearz, xmin, ymin plane */ + /* clip.xyz < -clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + /* clip.xyz > clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +} + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_clipmask(c); + brw_clip_init_ff_sync(c); + + /* if -ve rhw workaround bit is set, + do cliptest */ + if (c->chipset.is_965) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + } + /* Can't push into do_clip_tri because with polygon (or quad) + * flatshading, need to apply the flatshade here because we don't + * respect the PV when converting to trifan for emit: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) || + (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP)) + do_clip_tri(c); + else + maybe_do_clip_tri(c); + + brw_clip_tri_emit_polygon(c); + + /* Send an empty message to kill the thread: + */ + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c new file mode 100644 index 00000000000..aec835b8cec --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -0,0 +1,497 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg e = c->reg.tmp0; + struct brw_reg f = c->reg.tmp1; + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset_hpos); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset_hpos); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset_hpos); + + + struct brw_reg v0n = get_tmp(c); + struct brw_reg v1n = get_tmp(c); + struct brw_reg v2n = get_tmp(c); + + /* Convert to NDC. + * NOTE: We can't modify the original vertex coordinates, + * as it may impact further operations. + * So, we have to keep normalized coordinates in temp registers. + * + * TBD-KC + * Try to optimize unnecessary MOV's. + */ + brw_MOV(p, v0n, v0); + brw_MOV(p, v1n, v1); + brw_MOV(p, v2n, v2); + + brw_clip_project_position(c, v0n); + brw_clip_project_position(c, v1n); + brw_clip_project_position(c, v2n); + + /* Calculate the vectors of two edges of the triangle: + */ + brw_ADD(p, e, v0n, negate(v2n)); + brw_ADD(p, f, v1n, negate(v2n)); + + /* Take their crossproduct: + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); + brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + GLuint conditional; + + assert (!(c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL)); + + if (c->key.fill_ccw == CLIP_CULL) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, ccw); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + GLuint conditional; + + /* Do we have any colors to copy? + */ + if ((c->offset_color0 == 0 || c->offset_bfc0 == 0) && + (c->offset_color1 == 0 || c->offset_bfc1 == 0)) + return; + + /* In some wierd degnerate cases we can end up testing the + * direction twice, once for culling and once for bfc copying. Oh + * well, that's what you get for setting wierd GL state. + */ + if (c->key.copy_bfc_ccw) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + GLuint i; + + for (i = 0; i < 3; i++) { + if (c->offset_color0 && c->offset_bfc0) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset_color0), + byte_offset(c->reg.vertex[i], c->offset_bfc0)); + + if (c->offset_color1 && c->offset_bfc1) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset_color0), + byte_offset(c->reg.vertex[i], c->offset_bfc0)); + } + } + brw_ENDIF(p, ccw); +} + + + + +/* + GLfloat iz = 1.0 / dir.z; + GLfloat ac = dir.x * iz; + GLfloat bc = dir.y * iz; + offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; + offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg off = c->reg.offset; + struct brw_reg dir = c->reg.dir; + + brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); + brw_MUL(p, vec2(off), dir, get_element(off, 2)); + + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + brw_abs(get_element(off, 0)), + brw_abs(get_element(off, 1))); + + brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); + brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + /* Get away with using reg.vertex because we know that this is not + * a _3DPRIM_TRISTRIP_REVERSE: + */ + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edgeflag), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edgeflag), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_poly); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, + struct brw_indirect vert ) +{ + struct brw_compile *p = &c->func; + struct brw_reg z = deref_1f(vert, c->header_position_offset + + 2 * type_sz(BRW_REGISTER_TYPE_F)); + + brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, + GLboolean do_offset) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_edge; + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v1 = brw_indirect(1, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + struct brw_indirect v1ptr = brw_indirect(3, 0); + + /* Need a seperate loop for offset: + */ + if (do_offset) { + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + apply_one_offset(c, v0); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + } + + /* v1ptr = &inlist[nr_verts] + * *v1ptr = v0 + */ + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw edge if edgeflag != 0 */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset_edgeflag), + brw_imm_f(0)); + draw_edge = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, draw_edge); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + +static void emit_points(struct brw_clip_compile *c, + GLboolean do_offset ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_point; + + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw if edgeflag != 0 + */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset_edgeflag), + brw_imm_f(0)); + draw_point = brw_IF(p, BRW_EXECUTE_1); + { + if (do_offset) + apply_one_offset(c, v0); + + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); + } + brw_ENDIF(p, draw_point); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, + GLuint mode, + GLboolean do_offset ) +{ + switch (mode) { + case CLIP_FILL: + brw_clip_tri_emit_polygon(c); + break; + + case CLIP_LINE: + emit_lines(c, do_offset); + break; + + case CLIP_POINT: + emit_points(c, do_offset); + break; + + case CLIP_CULL: + assert(0); + break; + } +} + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + + /* Direction culling has already been done. + */ + if (c->key.fill_ccw != c->key.fill_cw && + c->key.fill_ccw != CLIP_CULL && + c->key.fill_cw != CLIP_CULL) + { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } + ccw = brw_ELSE(p, ccw); + { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + brw_ENDIF(p, ccw); + } + else if (c->key.fill_cw != CLIP_CULL) { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + else if (c->key.fill_ccw != CLIP_CULL) { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, if_insn); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + + c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || + (c->key.fill_ccw != c->key.fill_cw) || + c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL || + c->key.copy_bfc_cw || + c->key.copy_bfc_ccw); + + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_ff_sync(c); + + assert(c->offset_edgeflag); + + if (c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL) { + brw_clip_kill_thread(c); + return; + } + + merge_edgeflags(c); + + /* Need to use the inlist indirection here: + */ + if (c->need_direction) + compute_tri_direction(c); + + if (c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL) + cull_direction(c); + + if (c->key.offset_ccw || + c->key.offset_cw) + compute_offset(c); + + if (c->key.copy_bfc_ccw || + c->key.copy_bfc_cw) + copy_bfc(c); + + /* Need to do this whether we clip or not: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + brw_clip_init_clipmask(c); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_init_planes(c); + brw_clip_tri(c); + check_nr_verts(c); + } + brw_ENDIF(p, do_clip); + + emit_unfilled_primitives(c); + brw_clip_kill_thread(c); +} + + + diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c new file mode 100644 index 00000000000..97a57103105 --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -0,0 +1,388 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + +struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_compile *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, BRW_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_set_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos)); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); + + release_tmp(c, tmp); +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + GLboolean force_edgeflag) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + GLuint i; + + /* Just copy the vertex header: + */ + /* + * After CLIP stage, only first 256 bits of the VUE are read + * back on IGDNG, so needn't change it + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + /* Iterate over each attribute (could be done in pairs?) + */ + for (i = 0; i < c->key.nr_attrs; i++) { + GLuint delta = i*16 + 32; + + if (c->chipset.is_igdng) + delta = i * 16 + 32 * 3; + + if (delta == c->offset_edgeflag) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } + else { + /* Interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + */ + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t0); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t0); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + } + } + + if (i & 1) { + GLuint delta = i*16 + 32; + + if (c->chipset.is_igdng) + delta = i * 16 + 32 * 3; + + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + release_tmp(c, tmp); + + /* Recreate the projected (NDC) coordinate in the new vertex + * header: + */ + brw_clip_project_vertex(c, dest_ptr ); +} + + + + +#define MAX_MRF 16 + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + GLboolean allocate, + GLboolean eot, + GLuint header) +{ + struct brw_compile *p = &c->func; + GLuint start = c->last_mrf; + + brw_clip_ff_sync(c); + + assert(!(allocate && eot)); + + /* Cycle through mrf regs - probably futile as we have to wait for + * the allocation response anyway. Also, the order this function + * is invoked doesn't correspond to the order the instructions will + * be executed, so it won't have any effect in many cases. + */ +#if 0 + if (start + c->nr_regs + 1 >= MAX_MRF) + start = 0; + + c->last_mrf = start + c->nr_regs + 1; +#endif + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a seperate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a seperate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + start, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + eot, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + + brw_clip_ff_sync(c); + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + 0, /* allocate */ + 0, /* used */ + 1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* If flatshading, distribute color from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_colors( struct brw_clip_compile *c, + GLuint to, GLuint from ) +{ + struct brw_compile *p = &c->func; + + if (c->offset_color0) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset_color0), + byte_offset(c->reg.vertex[from], c->offset_color0)); + + if (c->offset_color1) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset_color1), + byte_offset(c->reg.vertex[from], c->offset_color1)); + + if (c->offset_bfc0) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset_bfc0), + byte_offset(c->reg.vertex[from], c->offset_bfc0)); + + if (c->offset_bfc1) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset_bfc1), + byte_offset(c->reg.vertex[from], c->offset_bfc1)); +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } +} + +void brw_clip_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + struct brw_instruction *need_ff_sync; + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); + need_ff_sync = brw_IF(p, BRW_EXECUTE_1); + { + brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); + } + brw_ENDIF(p, need_ff_sync); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } +} + +void brw_clip_init_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + + brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); + } +} diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c new file mode 100644 index 00000000000..e67551882dc --- /dev/null +++ b/src/gallium/drivers/i965/brw_context.c @@ -0,0 +1,154 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "pipe/p_context.h" +#include "util/u_simple_list.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_state.h" +#include "brw_batchbuffer.h" +#include "brw_winsys.h" +#include "brw_screen.h" + + +static void brw_destroy_context( struct pipe_context *pipe ) +{ + struct brw_context *brw = brw_context(pipe); + int i; + + brw_context_flush( brw ); + brw_batchbuffer_free( brw->batch ); + brw_destroy_state(brw); + + brw_draw_cleanup( brw ); + + brw_pipe_blend_cleanup( brw ); + brw_pipe_depth_stencil_cleanup( brw ); + brw_pipe_framebuffer_cleanup( brw ); + brw_pipe_flush_cleanup( brw ); + brw_pipe_misc_cleanup( brw ); + brw_pipe_query_cleanup( brw ); + brw_pipe_rast_cleanup( brw ); + brw_pipe_sampler_cleanup( brw ); + brw_pipe_shader_cleanup( brw ); + brw_pipe_vertex_cleanup( brw ); + brw_pipe_clear_cleanup( brw ); + + brw_hw_cc_cleanup( brw ); + + + FREE(brw->wm.compile_data); + + for (i = 0; i < brw->curr.fb.nr_cbufs; i++) + pipe_surface_reference(&brw->curr.fb.cbufs[i], NULL); + brw->curr.fb.nr_cbufs = 0; + pipe_surface_reference(&brw->curr.fb.zsbuf, NULL); + + bo_reference(&brw->curbe.curbe_bo, NULL); + bo_reference(&brw->vs.prog_bo, NULL); + bo_reference(&brw->vs.state_bo, NULL); + bo_reference(&brw->vs.bind_bo, NULL); + bo_reference(&brw->gs.prog_bo, NULL); + bo_reference(&brw->gs.state_bo, NULL); + bo_reference(&brw->clip.prog_bo, NULL); + bo_reference(&brw->clip.state_bo, NULL); + bo_reference(&brw->clip.vp_bo, NULL); + bo_reference(&brw->sf.prog_bo, NULL); + bo_reference(&brw->sf.state_bo, NULL); + bo_reference(&brw->sf.vp_bo, NULL); + + for (i = 0; i < Elements(brw->wm.sdc_bo); i++) + bo_reference(&brw->wm.sdc_bo[i], NULL); + + bo_reference(&brw->wm.bind_bo, NULL); + + for (i = 0; i < Elements(brw->wm.surf_bo); i++) + bo_reference(&brw->wm.surf_bo[i], NULL); + + bo_reference(&brw->wm.sampler_bo, NULL); + bo_reference(&brw->wm.prog_bo, NULL); + bo_reference(&brw->wm.state_bo, NULL); +} + + +struct pipe_context *brw_create_context(struct pipe_screen *screen) +{ + struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + + if (!brw) { + debug_printf("%s: failed to alloc context\n", __FUNCTION__); + return NULL; + } + + brw->base.screen = screen; + brw->base.destroy = brw_destroy_context; + brw->sws = brw_screen(screen)->sws; + brw->chipset = brw_screen(screen)->chipset; + + brw_pipe_blend_init( brw ); + brw_pipe_depth_stencil_init( brw ); + brw_pipe_framebuffer_init( brw ); + brw_pipe_flush_init( brw ); + brw_pipe_misc_init( brw ); + brw_pipe_query_init( brw ); + brw_pipe_rast_init( brw ); + brw_pipe_sampler_init( brw ); + brw_pipe_shader_init( brw ); + brw_pipe_vertex_init( brw ); + brw_pipe_clear_init( brw ); + + brw_hw_cc_init( brw ); + + brw_init_state( brw ); + brw_draw_init( brw ); + + brw->state.dirty.mesa = ~0; + brw->state.dirty.brw = ~0; + + brw->flags.always_emit_state = 0; + + make_empty_list(&brw->query.active_head); + + brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset ); + if (brw->batch == NULL) + goto fail; + + return &brw->base; + +fail: + if (brw->batch) + brw_batchbuffer_free( brw->batch ); + return NULL; +} + diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h new file mode 100644 index 00000000000..8c006bb95b2 --- /dev/null +++ b/src/gallium/drivers/i965/brw_context.h @@ -0,0 +1,858 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + +#include "brw_structs.h" +#include "brw_winsys.h" +#include "brw_reg.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "tgsi/tgsi_scan.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawning a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contiguous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes decisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + + +/* Need a value to say a particular vertex shader output isn't + * present. Limits us to 63 outputs currently. + */ +#define BRW_OUTPUT_NOT_PRESENT ((1<<6)-1) + + +struct brw_context; + +struct brw_depth_stencil_state { + /* Precalculated hardware state: + */ + struct brw_cc0 cc0; + struct brw_cc1 cc1; + struct brw_cc2 cc2; + struct brw_cc3 cc3; + struct brw_cc7 cc7; + + unsigned iz_lookup; +}; + + +struct brw_blend_state { + /* Precalculated hardware state: + */ + struct brw_cc2 cc2; + struct brw_cc3 cc3; + struct brw_cc5 cc5; + struct brw_cc6 cc6; + + struct brw_surf_ss0 ss0; +}; + + +struct brw_rasterizer_state; + +struct brw_immediate_data { + unsigned nr; + float (*data)[4]; +}; + +struct brw_vertex_shader { + const struct tgsi_token *tokens; + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + + struct tgsi_shader_info info; + struct brw_immediate_data immediates; + + GLuint has_flow_control:1; + GLuint use_const_buffer:1; + + /* Offsets of special vertex shader outputs required for clipping. + */ + GLuint output_hpos:6; /* not always zero? */ + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; + + unsigned id; +}; + +struct brw_fs_signature { + GLuint nr_inputs; + struct { + GLuint interp:3; /* TGSI_INTERPOLATE_x */ + GLuint semantic:5; /* TGSI_SEMANTIC_x */ + GLuint semantic_index:24; + } input[PIPE_MAX_SHADER_INPUTS]; +}; + +#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \ + ((s)->nr_inputs * sizeof (s)->input[0])) + + +struct brw_fragment_shader { + const struct tgsi_token *tokens; + struct tgsi_shader_info info; + + struct brw_fs_signature signature; + struct brw_immediate_data immediates; + + unsigned iz_lookup; + /*unsigned wm_lookup;*/ + + unsigned uses_depth:1; + unsigned has_flow_control:1; + + unsigned id; + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; +}; + + +struct brw_sampler { + struct brw_ss0 ss0; + struct brw_ss1 ss1; + float border_color[4]; + struct brw_ss3 ss3; +}; + + + +#define PIPE_NEW_DEPTH_STENCIL_ALPHA 0x1 +#define PIPE_NEW_RAST 0x2 +#define PIPE_NEW_BLEND 0x4 +#define PIPE_NEW_VIEWPORT 0x8 +#define PIPE_NEW_SAMPLERS 0x10 +#define PIPE_NEW_VERTEX_BUFFER 0x20 +#define PIPE_NEW_VERTEX_ELEMENT 0x40 +#define PIPE_NEW_FRAGMENT_SHADER 0x80 +#define PIPE_NEW_VERTEX_SHADER 0x100 +#define PIPE_NEW_FRAGMENT_CONSTANTS 0x200 +#define PIPE_NEW_VERTEX_CONSTANTS 0x400 +#define PIPE_NEW_CLIP 0x800 +#define PIPE_NEW_INDEX_BUFFER 0x1000 +#define PIPE_NEW_INDEX_RANGE 0x2000 +#define PIPE_NEW_BLEND_COLOR 0x4000 +#define PIPE_NEW_POLYGON_STIPPLE 0x8000 +#define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000 +#define PIPE_NEW_DEPTH_BUFFER 0x20000 +#define PIPE_NEW_COLOR_BUFFERS 0x40000 +#define PIPE_NEW_QUERY 0x80000 +#define PIPE_NEW_SCISSOR 0x100000 +#define PIPE_NEW_BOUND_TEXTURES 0x200000 +#define PIPE_NEW_NR_CBUFS 0x400000 +#define PIPE_NEW_FRAGMENT_SIGNATURE 0x800000 + + + +#define BRW_NEW_URB_FENCE 0x1 +#define BRW_NEW_FRAGMENT_PROGRAM 0x2 +#define BRW_NEW_VERTEX_PROGRAM 0x4 +#define BRW_NEW_INPUT_DIMENSIONS 0x8 +#define BRW_NEW_CURBE_OFFSETS 0x10 +#define BRW_NEW_REDUCED_PRIMITIVE 0x20 +#define BRW_NEW_PRIMITIVE 0x40 +#define BRW_NEW_CONTEXT 0x80 +#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 +#define BRW_NEW_PSP 0x800 +#define BRW_NEW_WM_SURFACES 0x1000 +#define BRW_NEW_xxx 0x2000 /* was FENCE */ +#define BRW_NEW_INDICES 0x4000 + +/** + * Used for any batch entry with a relocated pointer that will be used + * by any 3D rendering. Need to re-emit these fresh in each + * batchbuffer as the referenced buffers may be relocated in the + * meantime. + */ +#define BRW_NEW_BATCH 0x10000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 +#define BRW_NEW_INDEX_BUFFER 0x100000 + +struct brw_state_flags { + /** State update flags signalled by mesa internals */ + GLuint mesa; + /** + * State update flags signalled as the result of brw_tracked_state updates + */ + GLuint brw; + /** State update flags signalled by brw_state_cache.c searches */ + GLuint cache; +}; + + + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +struct brw_wm_prog_data { + GLuint curb_read_length; + GLuint urb_read_length; + + GLuint first_curbe_grf; + GLuint total_grf; + GLuint total_scratch; + + GLuint nr_params; /**< number of float params/constants */ + GLboolean error; + + /* Pointer to tracked values (only valid once + * _mesa_load_state_parameters has been called at runtime). + */ + const GLfloat *param[BRW_MAX_CURBE]; +}; + +struct brw_sf_prog_data { + GLuint urb_read_length; + GLuint total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + GLuint urb_entry_size; +}; + + +struct brw_clip_prog_data; + +struct brw_gs_prog_data { + GLuint urb_read_length; + GLuint total_grf; +}; + +struct brw_vs_prog_data { + GLuint curb_read_length; + GLuint urb_read_length; + GLuint total_grf; + + GLuint nr_outputs; + GLuint nr_inputs; + + GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ + + GLboolean writes_psiz; + + /* Used for calculating urb partitions: + */ + GLuint urb_entry_size; +}; + + +/* Size == 0 if output either not written, or always [0,0,0,1] + */ +struct brw_vs_ouput_sizes { + GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS]; +}; + + +/** Number of texture sampler units */ +#define BRW_MAX_TEX_UNIT 16 + +/** Max number of render targets in a shader */ +#define BRW_MAX_DRAW_BUFFERS 4 + +/** + * Size of our surface binding table for the WM. + * This contains pointers to the drawing surfaces and current texture + * objects and shader constant buffers (+2). + */ +#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) + +/** + * Helpers to convert drawing buffers, textures and constant buffers + * to surface binding table indexes, for WM. + */ +#define BTI_COLOR_BUF(d) (d) +#define BTI_FRAGMENT_CONSTANTS (BRW_MAX_DRAW_BUFFERS) +#define BTI_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 + + +/* Bit of a hack to align these with the winsys buffer_data_type enum. + */ +enum brw_cache_id { + BRW_CC_VP = BRW_DATA_GS_CC_VP, + BRW_CC_UNIT = BRW_DATA_GS_CC_UNIT, + BRW_WM_PROG = BRW_DATA_GS_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR = BRW_DATA_GS_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER = BRW_DATA_GS_SAMPLER, + BRW_WM_UNIT = BRW_DATA_GS_WM_UNIT, + BRW_SF_PROG = BRW_DATA_GS_SF_PROG, + BRW_SF_VP = BRW_DATA_GS_SF_VP, + BRW_SF_UNIT = BRW_DATA_GS_SF_UNIT, + BRW_VS_UNIT = BRW_DATA_GS_VS_UNIT, + BRW_VS_PROG = BRW_DATA_GS_VS_PROG, + BRW_GS_UNIT = BRW_DATA_GS_GS_UNIT, + BRW_GS_PROG = BRW_DATA_GS_GS_PROG, + BRW_CLIP_VP = BRW_DATA_GS_CLIP_VP, + BRW_CLIP_UNIT = BRW_DATA_GS_CLIP_UNIT, + BRW_CLIP_PROG = BRW_DATA_GS_CLIP_PROG, + BRW_SS_SURFACE = BRW_DATA_SS_SURFACE, + BRW_SS_SURF_BIND = BRW_DATA_SS_SURF_BIND, + + BRW_MAX_CACHE +}; + +struct brw_cache_item { + /** + * Effectively part of the key, cache_id identifies what kind of state + * buffer is involved, and also which brw->state.dirty.cache flag should + * be set when this cache item is chosen. + */ + enum brw_cache_id cache_id; + /** 32-bit hash of the key data */ + GLuint hash; + GLuint key_size; /* for variable-sized keys */ + const void *key; + struct brw_winsys_reloc *relocs; + GLuint nr_relocs; + + struct brw_winsys_buffer *bo; + GLuint data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + struct brw_context *brw; + struct brw_winsys_screen *sws; + + struct brw_cache_item **items; + GLuint size, n_items; + + enum brw_buffer_type buffer_type; + + GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ + GLuint aux_size[BRW_MAX_CACHE]; + char *name[BRW_MAX_CACHE]; + + + /* Record of the last BOs chosen for each cache_id. Used to set + * brw->state.dirty.cache when a new cache item is chosen. + */ + struct brw_winsys_buffer *last_bo[BRW_MAX_CACHE]; +}; + + +struct brw_tracked_state { + struct brw_state_flags dirty; + int (*prepare)( struct brw_context *brw ); + int (*emit)( struct brw_context *brw ); +}; + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) + +struct brw_cached_batch_item { + struct header *header; + GLuint sz; + struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where VERT_ATTRIB_MAX > 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define VS_INPUT_BITMASK_DWORDS ((PIPE_MAX_SHADER_INPUTS+31)/32) + + + + +struct brw_vertex_info { + GLuint sizes[VS_INPUT_BITMASK_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ +}; + + +struct brw_query_object { + /** Doubly linked list of active query objects in the context. */ + struct brw_query_object *prev, *next; + + /** Last query BO associated with this query. */ + struct brw_winsys_buffer *bo; + /** First index in bo with query data for this object. */ + int first_index; + /** Last index in bo with query data for this object. */ + int last_index; + + /* Total count of pixels from previous BOs */ + uint64_t result; +}; + +#define CC_RELOC_VP 0 + + +/** + * brw_context is derived from pipe_context + */ +struct brw_context +{ + struct pipe_context base; + struct brw_chipset chipset; + + struct brw_winsys_screen *sws; + + struct brw_batchbuffer *batch; + + GLuint primitive; + GLuint reduced_primitive; + + /* Active state from the state tracker: + */ + struct { + struct brw_vertex_shader *vertex_shader; + struct brw_fragment_shader *fragment_shader; + const struct brw_blend_state *blend; + const struct brw_rasterizer_state *rast; + const struct brw_depth_stencil_state *zstencil; + + const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_elements; + unsigned num_textures; + unsigned num_vertex_buffers; + + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state fb; + struct pipe_clip_state ucp; + struct pipe_buffer *vertex_constants; + struct pipe_buffer *fragment_constants; + + struct brw_blend_constant_color bcc; + struct brw_polygon_stipple bps; + struct brw_cc_viewport ccv; + + /** + * Index buffer for this draw_prims call. + * + * Updates are signaled by PIPE_NEW_INDEX_BUFFER. + */ + struct pipe_buffer *index_buffer; + unsigned index_size; + + /* Updates are signalled by PIPE_NEW_INDEX_RANGE: + */ + unsigned min_index; + unsigned max_index; + + } curr; + + struct { + struct brw_state_flags dirty; + + /** + * List of buffers accumulated in brw_validate_state to receive + * dri_bo_check_aperture treatment before exec, so we can know if we + * should flush the batch and try again before emitting primitives. + * + * This can be a fixed number as we only have a limited number of + * objects referenced from the batchbuffer in a primitive emit, + * consisting of the vertex buffers, pipelined state pointers, + * the CURBE, the depth buffer, and a query BO. + */ + struct brw_winsys_buffer *validated_bos[PIPE_MAX_SHADER_INPUTS + 16]; + int validated_bo_count; + } state; + + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ + struct brw_cached_batch_item *cached_batch_items; + + struct { + struct u_upload_mgr *upload_vertex; + struct u_upload_mgr *upload_index; + + /* Information on uploaded vertex buffers: + */ + struct { + unsigned stride; /* in bytes between successive vertices */ + unsigned offset; /* in bytes, of first vertex in bo */ + unsigned vertex_count; /* count of valid vertices which may be accessed */ + struct brw_winsys_buffer *bo; + } vb[PIPE_MAX_ATTRIBS]; + + unsigned nr_vb; /* currently the same as curr.num_vertex_buffers */ + } vb; + + struct { + /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ + struct brw_winsys_buffer *bo; + unsigned int offset; + unsigned int size; + /* Offset to index buffer index to use in CMD_3D_PRIM so that we can + * avoid re-uploading the IB packet over and over if we're actually + * referencing the same index buffer. + */ + unsigned int start_vertex_offset; + } ib; + + + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + GLuint vsize; /* vertex size plus header in urb registers */ + GLuint csize; /* constant buffer size in urb registers */ + GLuint sfsize; /* setup data size in urb registers */ + + GLboolean constrained; + + GLuint nr_vs_entries; + GLuint nr_gs_entries; + GLuint nr_clip_entries; + GLuint nr_sf_entries; + GLuint nr_cs_entries; + + GLuint vs_start; + GLuint gs_start; + GLuint clip_start; + GLuint sf_start; + GLuint cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + GLuint wm_start; /**< pos of first wm const in CURBE buffer */ + GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ + GLuint clip_start; + GLuint clip_size; + GLuint vs_start; + GLuint vs_size; + GLuint total_size; + + struct brw_winsys_buffer *curbe_bo; + /** Offset within curbe_bo of space for current curbe entry */ + GLuint curbe_offset; + /** Offset within curbe_bo of space for next curbe entry */ + GLuint curbe_next_offset; + + GLfloat *last_buf; + GLuint last_bufsz; + /** + * Whether we should create a new bo instead of reusing the old one + * (if we just dispatch the batch pointing at the old one. + */ + GLboolean need_new_bo; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + + /** Binding table of pointers to surf_bo entries */ + struct brw_winsys_buffer *bind_bo; + struct brw_winsys_buffer *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + GLboolean prog_active; + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + struct brw_winsys_buffer *vp_bo; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + struct brw_winsys_buffer *vp_bo; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + struct brw_wm_compile *compile_data; + + /** Input sizes, calculated from active vertex program. + * One bit per fragment program input attribute. + */ + /*GLbitfield input_size_masks[4];*/ + + /** Array of surface default colors (texture border color) */ + struct brw_winsys_buffer *sdc_bo[BRW_MAX_TEX_UNIT]; + + GLuint render_surf; + GLuint nr_surfaces; + + GLuint max_threads; + struct brw_winsys_buffer *scratch_bo; + + GLuint sampler_count; + struct brw_winsys_buffer *sampler_bo; + + /** Binding table of pointers to surf_bo entries */ + struct brw_winsys_buffer *bind_bo; + struct brw_winsys_buffer *surf_bo[BRW_WM_MAX_SURF]; + + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + } wm; + + + struct { + struct brw_winsys_buffer *state_bo; + + struct brw_cc_unit_state cc; + struct brw_winsys_reloc reloc[1]; + } cc; + + struct { + struct brw_query_object active_head; + struct brw_winsys_buffer *bo; + int index; + GLboolean active; + int stats_wm; + } query; + + struct { + unsigned always_emit_state:1; + unsigned always_flush_batch:1; + unsigned force_swtnl:1; + unsigned no_swtnl:1; + } flags; + + /* Used to give every program string a unique id + */ + GLuint program_id; +}; + + + +/*====================================================================== + * brw_queryobj.c + */ +void brw_init_query(struct brw_context *brw); +enum pipe_error brw_prepare_query_begin(struct brw_context *brw); +void brw_emit_query_begin(struct brw_context *brw); +void brw_emit_query_end(struct brw_context *brw); + +/*====================================================================== + * brw_state_dump.c + */ +void brw_debug_batch(struct brw_context *intel); + + +/*====================================================================== + * brw_pipe_*.c + */ +void brw_pipe_blend_init( struct brw_context *brw ); +void brw_pipe_depth_stencil_init( struct brw_context *brw ); +void brw_pipe_framebuffer_init( struct brw_context *brw ); +void brw_pipe_flush_init( struct brw_context *brw ); +void brw_pipe_misc_init( struct brw_context *brw ); +void brw_pipe_query_init( struct brw_context *brw ); +void brw_pipe_rast_init( struct brw_context *brw ); +void brw_pipe_sampler_init( struct brw_context *brw ); +void brw_pipe_shader_init( struct brw_context *brw ); +void brw_pipe_vertex_init( struct brw_context *brw ); +void brw_pipe_clear_init( struct brw_context *brw ); + + +void brw_pipe_blend_cleanup( struct brw_context *brw ); +void brw_pipe_depth_stencil_cleanup( struct brw_context *brw ); +void brw_pipe_framebuffer_cleanup( struct brw_context *brw ); +void brw_pipe_flush_cleanup( struct brw_context *brw ); +void brw_pipe_misc_cleanup( struct brw_context *brw ); +void brw_pipe_query_cleanup( struct brw_context *brw ); +void brw_pipe_rast_cleanup( struct brw_context *brw ); +void brw_pipe_sampler_cleanup( struct brw_context *brw ); +void brw_pipe_shader_cleanup( struct brw_context *brw ); +void brw_pipe_vertex_cleanup( struct brw_context *brw ); +void brw_pipe_clear_cleanup( struct brw_context *brw ); + +void brw_hw_cc_init( struct brw_context *brw ); +void brw_hw_cc_cleanup( struct brw_context *brw ); + + + +void brw_context_flush( struct brw_context *brw ); + + +/* brw_urb.c + */ +int brw_upload_urb_fence(struct brw_context *brw); + +/* brw_curbe.c + */ +int brw_upload_cs_urb_state(struct brw_context *brw); + + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + + +#define BRW_IS_965(brw) ((brw)->chipset.is_965) +#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng) +#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x) + + +#endif + diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c new file mode 100644 index 00000000000..3f031577d5a --- /dev/null +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -0,0 +1,390 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "brw_batchbuffer.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_util.h" +#include "brw_debug.h" +#include "brw_screen.h" + + +/** + * Partition the CURBE between the various users of constant values: + * Note that vertex and fragment shaders can now fetch constants out + * of constant buffers. We no longer allocatea block of the GRF for + * constants. That greatly reduces the demand for space in the CURBE. + * Some of the comments within are dated... + */ +static int calculate_curbe_offsets( struct brw_context *brw ) +{ + /* CACHE_NEW_WM_PROG */ + const GLuint nr_fp_regs = brw->wm.prog_data->curb_read_length; + + /* BRW_NEW_VERTEX_PROGRAM */ + const GLuint nr_vp_regs = brw->vs.prog_data->curb_read_length; + GLuint nr_clip_regs = 0; + GLuint total_regs; + + /* PIPE_NEW_CLIP */ + if (brw->curr.ucp.nr) { + GLuint nr_planes = 6 + brw->curr.ucp.nr; + nr_clip_regs = (nr_planes * 4 + 15) / 16; + } + + + total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; + + /* When this is > 32, want to use a true constant buffer to hold + * the extra constants. + */ + assert(total_regs <= 32); + + /* Lazy resize: + */ + if (nr_fp_regs > brw->curbe.wm_size || + nr_vp_regs > brw->curbe.vs_size || + nr_clip_regs != brw->curbe.clip_size || + (total_regs < brw->curbe.total_size / 4 && + brw->curbe.total_size > 16)) { + + GLuint reg = 0; + + /* Calculate a new layout: + */ + reg = 0; + brw->curbe.wm_start = reg; + brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; + brw->curbe.clip_start = reg; + brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; + brw->curbe.vs_start = reg; + brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; + brw->curbe.total_size = reg; + + if (BRW_DEBUG & DEBUG_CURBE) + debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); + + brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; + } + + return 0; +} + + +const struct brw_tracked_state brw_curbe_offsets = { + .dirty = { + .mesa = PIPE_NEW_CLIP, + .brw = BRW_NEW_VERTEX_PROGRAM, + .cache = CACHE_NEW_WM_PROG + }, + .prepare = calculate_curbe_offsets +}; + + + + +/* Define the number of curbes within CS's urb allocation. Multiple + * urb entries -> multiple curbes. These will be used by + * fixed-function hardware in a double-buffering scheme to avoid a + * pipeline stall each time the contents of the curbe is changed. + */ +int brw_upload_cs_urb_state(struct brw_context *brw) +{ + struct brw_cs_urb_state cs_urb; + memset(&cs_urb, 0, sizeof(cs_urb)); + + /* It appears that this is the state packet for the CS unit, ie. the + * urb entries detailed here are housed in the CS range from the + * URB_FENCE command. + */ + cs_urb.header.opcode = CMD_CS_URB_STATE; + cs_urb.header.length = sizeof(cs_urb)/4 - 2; + + /* BRW_NEW_URB_FENCE */ + cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; + + assert(brw->urb.nr_cs_entries); + BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); + return 0; +} + +static GLfloat fixed_plane[6][4] = { + { 0, 0, -1, 1 }, + { 0, 0, 1, 1 }, + { 0, -1, 0, 1 }, + { 0, 1, 0, 1 }, + {-1, 0, 0, 1 }, + { 1, 0, 0, 1 } +}; + +/* Upload a new set of constants. Too much variability to go into the + * cache mechanism, but maybe would benefit from a comparison against + * the current uploaded set of constants. + */ +static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) +{ + struct pipe_screen *screen = brw->base.screen; + const GLuint sz = brw->curbe.total_size; + const GLuint bufsz = sz * 16 * sizeof(GLfloat); + enum pipe_error ret; + GLfloat *buf; + GLuint i; + + if (sz == 0) { + if (brw->curbe.last_buf) { + free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + brw->curbe.last_bufsz = 0; + } + return 0; + } + + buf = (GLfloat *) CALLOC(bufsz, 1); + + /* fragment shader constants */ + if (brw->curbe.wm_size) { + const struct brw_fragment_shader *fs = brw->curr.fragment_shader; + GLuint offset = brw->curbe.wm_start * 16; + GLuint nr_immediate, nr_const; + + nr_immediate = fs->immediates.nr; + if (nr_immediate) { + memcpy(&buf[offset], + fs->immediates.data, + nr_immediate * 4 * sizeof(float)); + + offset += nr_immediate * 4; + } + + nr_const = fs->info.file_max[TGSI_FILE_CONSTANT] + 1; +/* nr_const = brw->wm.prog_data->nr_params; */ + if (nr_const) { + const GLfloat *value = screen->buffer_map( screen, + brw->curr.fragment_constants, + PIPE_BUFFER_USAGE_CPU_READ); + + memcpy(&buf[offset], value, + nr_const * 4 * sizeof(float)); + + screen->buffer_unmap( screen, + brw->curr.fragment_constants ); + } + } + + + /* The clipplanes are actually delivered to both CLIP and VS units. + * VS uses them to calculate the outcode bitmasks. + */ + if (brw->curbe.clip_size) { + GLuint offset = brw->curbe.clip_start * 16; + GLuint j; + + /* If any planes are going this way, send them all this way: + */ + for (i = 0; i < 6; i++) { + buf[offset + i * 4 + 0] = fixed_plane[i][0]; + buf[offset + i * 4 + 1] = fixed_plane[i][1]; + buf[offset + i * 4 + 2] = fixed_plane[i][2]; + buf[offset + i * 4 + 3] = fixed_plane[i][3]; + } + + /* Clip planes: + */ + assert(brw->curr.ucp.nr <= 6); + for (j = 0; j < brw->curr.ucp.nr; j++) { + buf[offset + i * 4 + 0] = brw->curr.ucp.ucp[j][0]; + buf[offset + i * 4 + 1] = brw->curr.ucp.ucp[j][1]; + buf[offset + i * 4 + 2] = brw->curr.ucp.ucp[j][2]; + buf[offset + i * 4 + 3] = brw->curr.ucp.ucp[j][3]; + i++; + } + } + + /* vertex shader constants */ + if (brw->curbe.vs_size) { + GLuint offset = brw->curbe.vs_start * 16; + const struct brw_vertex_shader *vs = brw->curr.vertex_shader; + GLuint nr_immediate, nr_const; + + nr_immediate = vs->immediates.nr; + if (nr_immediate) { + memcpy(&buf[offset], + vs->immediates.data, + nr_immediate * 4 * sizeof(float)); + + offset += nr_immediate * 4; + } + + nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1; + if (nr_const) { + /* XXX: note that constant buffers are currently *already* in + * buffer objects. If we want to keep on putting them into the + * curbe, makes sense to treat constbuf's specially with malloc. + */ + const GLfloat *value = screen->buffer_map( screen, + brw->curr.vertex_constants, + PIPE_BUFFER_USAGE_CPU_READ); + + /* XXX: what if user's constant buffer is too small? + */ + memcpy(&buf[offset], value, nr_const * 4 * sizeof(float)); + + screen->buffer_unmap( screen, brw->curr.vertex_constants ); + } + } + + if (BRW_DEBUG & DEBUG_CURBE) { + for (i = 0; i < sz*16; i+=4) + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + (void *)brw->curbe.last_buf, (void *)buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + } + + if (brw->curbe.curbe_bo != NULL && + brw->curbe.last_buf && + bufsz == brw->curbe.last_bufsz && + memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { + /* constants have not changed */ + FREE(buf); + } + else { + /* constants have changed */ + FREE(brw->curbe.last_buf); + + brw->curbe.last_buf = buf; + brw->curbe.last_bufsz = bufsz; + + if (brw->curbe.curbe_bo != NULL && + (brw->curbe.need_new_bo || + brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)) + { + bo_reference(&brw->curbe.curbe_bo, NULL); + } + + if (brw->curbe.curbe_bo == NULL) { + /* Allocate a single page for CURBE entries for this + * batchbuffer. They're generally around 64b. We will + * discard the curbe buffer after the batch is flushed to + * avoid synchronous updates. + */ + ret = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_CURBE, + 4096, 1 << 6, + &brw->curbe.curbe_bo); + if (ret) + return ret; + + brw->curbe.curbe_next_offset = 0; + } + + brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; + brw->curbe.curbe_next_offset += bufsz; + brw->curbe.curbe_next_offset = align(brw->curbe.curbe_next_offset, 64); + + /* Copy data to the buffer: + */ + brw->sws->bo_subdata(brw->curbe.curbe_bo, + BRW_DATA_CONSTANT_BUFFER, + brw->curbe.curbe_offset, + bufsz, + buf, + NULL, 0); + } + + brw_add_validated_bo(brw, brw->curbe.curbe_bo); + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ + + return 0; +} + +static enum pipe_error emit_curbe_buffer(struct brw_context *brw) +{ + GLuint sz = brw->curbe.total_size; + + BEGIN_BATCH(2, IGNORE_CLIPRECTS); + if (sz == 0) { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); + OUT_BATCH(0); + } else { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); + OUT_RELOC(brw->curbe.curbe_bo, + BRW_USAGE_STATE, + (sz - 1) + brw->curbe.curbe_offset); + } + ADVANCE_BATCH(); + return 0; +} + +const struct brw_tracked_state brw_curbe_buffer = { + .dirty = { + .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS | + PIPE_NEW_VERTEX_CONSTANTS | + PIPE_NEW_CLIP), + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ + BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_BATCH), + .cache = (CACHE_NEW_WM_PROG) + }, + .prepare = prepare_curbe_buffer, + .emit = emit_curbe_buffer, +}; + diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h new file mode 100644 index 00000000000..ae8e9254a68 --- /dev/null +++ b/src/gallium/drivers/i965/brw_debug.h @@ -0,0 +1,43 @@ +#ifndef BRW_DEBUG_H +#define BRW_DEBUG_H + +/* ================================================================ + * Debugging: + */ + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_BLIT 0x8 +#define DEBUG_CURBE 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_BATCH 0x80 +#define DEBUG_PIXEL 0x100 +#define DEBUG_WINSYS 0x200 +#define DEBUG_MIN_URB 0x400 +#define DEBUG_DISASSEM 0x800 +#define DEBUG_unused3 0x1000 +#define DEBUG_SYNC 0x2000 +#define DEBUG_PRIMS 0x4000 +#define DEBUG_VERTS 0x8000 +#define DEBUG_unused4 0x10000 +#define DEBUG_DMA 0x20000 +#define DEBUG_SANITY 0x40000 +#define DEBUG_SLEEP 0x80000 +#define DEBUG_STATS 0x100000 +#define DEBUG_unused5 0x200000 +#define DEBUG_SINGLE_THREAD 0x400000 +#define DEBUG_WM 0x800000 +#define DEBUG_URB 0x1000000 +#define DEBUG_VS 0x2000000 + +#ifdef DEBUG +extern int BRW_DEBUG; +#else +#define BRW_DEBUG 0 +#endif + + + +#endif diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h new file mode 100644 index 00000000000..e201ce4d7ce --- /dev/null +++ b/src/gallium/drivers/i965/brw_defines.h @@ -0,0 +1,847 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 +#define BRW_CLIPMODE_KERNEL_CLIP 5 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_SPRITE_POINT_ENABLE 16 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 +/* These are listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * These appear to be supported on at least some + * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT + * is useful when using OpenGL to render to a FBO + * (which has the pixel coordinate Y orientation inverted + * with respect to the normal OpenGL pixel coordinate system). + */ +#define BRW_RASTRULE_LOWER_LEFT 2 +#define BRW_RASTRULE_LOWER_RIGHT 3 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F +#define BRW_SURFACEFORMAT_INVALID 0xFFF + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_R 7 +#define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_CMPN 17 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 + +/* for IGDNG only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CS_URB_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT_965 0x6104 +#define CMD_PIPELINE_SELECT_GM45 0x6904 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 + +#define CMD_VERTEX_BUFFER 0x7808 +# define BRW_VB0_INDEX_SHIFT 27 +# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) +# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define BRW_VB0_PITCH_SHIFT 0 + +#define CMD_VERTEX_ELEMENT 0x7809 +# define BRW_VE0_INDEX_SHIFT 27 +# define BRW_VE0_FORMAT_SHIFT 16 +# define BRW_VE0_VALID (1 << 26) +# define BRW_VE0_SRC_OFFSET_SHIFT 0 +# define BRW_VE1_COMPONENT_NOSTORE 0 +# define BRW_VE1_COMPONENT_STORE_SRC 1 +# define BRW_VE1_COMPONENT_STORE_0 2 +# define BRW_VE1_COMPONENT_STORE_1_FLT 3 +# define BRW_VE1_COMPONENT_STORE_1_INT 4 +# define BRW_VE1_COMPONENT_STORE_VID 5 +# define BRW_VE1_COMPONENT_STORE_IID 6 +# define BRW_VE1_COMPONENT_STORE_PID 7 +# define BRW_VE1_COMPONENT_0_SHIFT 28 +# define BRW_VE1_COMPONENT_1_SHIFT 24 +# define BRW_VE1_COMPONENT_2_SHIFT 20 +# define BRW_VE1_COMPONENT_3_SHIFT 16 +# define BRW_VE1_DST_OFFSET_SHIFT 0 + +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS_965 0x780b +#define CMD_VF_STATISTICS_GM45 0x680b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 +#define CMD_AA_LINE_PARAMETERS 0x790a + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + +#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ + (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ + + + +#endif diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c new file mode 100644 index 00000000000..65db27248b1 --- /dev/null +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -0,0 +1,922 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <stdarg.h> + +#include "brw_disasm.h" +#include "brw_structs.h" +#include "brw_reg.h" +#include "brw_defines.h" + +struct { + char *name; + int nsrc; + int ndst; +} opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +char *conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +char *negate[2] = { + [0] = "", + [1] = "-", +}; + +char *_abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +char *vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +char *width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +char *horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +char *chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +char *dest_condmod[16] = { + [0] = NULL +}; + +char *debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +char *saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +char *exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +char *pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +char *pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +char *pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +char *thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +char *compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", +}; + +char *dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +char *mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +char *access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +char *reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +char *imm_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [5] = "VF", + [5] = "V", + [7] = "F" +}; + +char *reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +char *writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +char *end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +char *target_function[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read", + [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv", +}; + +char *math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +char *math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +char *math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +char *math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +char *urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +char *urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +char *urb_used[2] = { + [0] = "", + [1] = "used" +}; + +char *urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +char *sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + + +static int column; + +static int string (FILE *file, char *string) +{ + fputs (string, file); + column += strlen (string); + return 0; +} + +static int format (FILE *f, char *format, ...) +{ + char buf[1024]; + va_list args; + va_start (args, format); + + vsnprintf (buf, sizeof (buf) - 1, format, args); + string (f, buf); + return 0; +} + +static int newline (FILE *f) +{ + putc ('\n', f); + column = 0; + return 0; +} + +static int pad (FILE *f, int c) +{ + do + string (f, " "); + while (column < c); + return 0; +} + +static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space) +{ + if (!ctrl[id]) { + fprintf (file, "*** invalid %s value %d ", + name, id); + return 1; + } + if (ctrl[id][0]) + { + if (space && *space) + string (file, " "); + string (file, ctrl[id]); + if (space) + *space = 1; + } + return 0; +} + +static int print_opcode (FILE *file, int id) +{ + if (!opcode[id].name) { + format (file, "*** invalid opcode value %d ", id); + return 1; + } + string (file, opcode[id].name); + return 0; +} + +static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) +{ + int err = 0; + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string (file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format (file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format (file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format (file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format (file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format (file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format (file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format (file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string (file, "ip"); + return -1; + break; + default: + format (file, "ARF%d", _reg_nr); + break; + } + } else { + err |= control (file, "src reg file", reg_file, _reg_file, NULL); + format (file, "%d", _reg_nr); + } + return err; +} + +static int dest (FILE *file, const struct brw_instruction *inst) +{ + int err = 0; + + if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da1.dest_subreg_nr) + format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } + else + { + string (file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + if (inst->bits1.ia1.dest_indirect_offset) + format (file, " %d", inst->bits1.ia1.dest_indirect_offset); + string (file, "]"); + format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } + else + { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da16.dest_subreg_nr) + format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } + else + { + err = 1; + string (file, "Indirect align16 address mode not supported"); + } + } + + return 0; +} + +static int src_align1_region (FILE *file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) +{ + int err = 0; + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ","); + err |= control (file, "width", width, _width, NULL); + string (file, ","); + err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string (file, ">"); + return err; +} + +static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride, + GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, reg_num); + if (err == -1) + return 0; + if (sub_reg_num) + format (file, ".%d", sub_reg_num); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_ia1 (FILE *file, + GLuint type, + GLuint _reg_file, + GLint _addr_imm, + GLuint _addr_subreg_nr, + GLuint _negate, + GLuint __abs, + GLuint _addr_mode, + GLuint _horiz_stride, + GLuint _width, + GLuint _vert_stride) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + string (file, "g[a0"); + if (_addr_subreg_nr) + format (file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format (file, " %d", _addr_imm); + string (file, "]"); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_da16 (FILE *file, + GLuint _reg_type, + GLuint _reg_file, + GLuint _vert_stride, + GLuint _reg_nr, + GLuint _subreg_nr, + GLuint __abs, + GLuint _negate, + GLuint swz_x, + GLuint swz_y, + GLuint swz_z, + GLuint swz_w) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, _reg_nr); + if (err == -1) + return 0; + if (_subreg_nr) + format (file, ".%d", _subreg_nr); + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ",1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) { + switch (type) { + case BRW_REGISTER_TYPE_UD: + format (file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format (file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format (file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format (file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format (file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format (file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format (file, "%-gF", inst->bits3.f); + } + return 0; +} + +static int src0 (FILE *file, const struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src0_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } + else + { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +static int src1 (FILE *file, const struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src1_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } + else + { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) +{ + int err = 0; + int space = 0; + + if (inst->header.predicate_control) { + string (file, "("); + err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + string (file, "f0"); + if (inst->bits2.da1.flag_reg_nr) + format (file, ".%d", inst->bits2.da1.flag_reg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + err |= control (file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + err |= control (file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string (file, ") "); + } + + err |= print_opcode (file, inst->header.opcode); + err |= control (file, "saturate", saturate, inst->header.saturate, NULL); + err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode != BRW_OPCODE_SEND) + err |= control (file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "("); + err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); + string (file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) + format (file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) { + newline (file); + pad (file, 16); + space = 0; + err |= control (file, "target function", target_function, + inst->bits3.generic.msg_target, &space); + switch (inst->bits3.generic.msg_target) { + case BRW_MESSAGE_TARGET_MATH: + err |= control (file, "math function", math_function, + inst->bits3.math.function, &space); + err |= control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + err |= control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + err |= control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + err |= control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_MESSAGE_TARGET_SAMPLER: + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + break; + case BRW_MESSAGE_TARGET_URB: + format (file, " %d", inst->bits3.urb.offset); + space = 1; + err |= control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + err |= control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + err |= control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + err |= control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_MESSAGE_TARGET_THREAD_SPAWNER: + break; + default: + format (file, "unsupported target %d", inst->bits3.generic.msg_target); + break; + } + if (space) + string (file, " "); + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + pad (file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "{"); + space = 1; + err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND) + err |= control (file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string (file, " "); + string (file, "}"); + } + string (file, ";"); + newline (file); + return err; +} + + +int brw_disasm (FILE *file, + const struct brw_instruction *inst, + unsigned count) +{ + int i, err; + + for (i = 0; i < count; i++) { + err = brw_disasm_insn(stderr, &inst[i]); + if (err) + return err; + } + + fprintf(file, "\n"); + return 0; +} + diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h new file mode 100644 index 00000000000..ba5b109c483 --- /dev/null +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -0,0 +1,36 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#ifndef BRW_DISASM_H +#define BRW_DISASM_H + +#include <stdio.h> + +struct brw_instruction; + +int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); +int brw_disasm (FILE *file, + const struct brw_instruction *inst, + unsigned count); + +#endif + diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c new file mode 100644 index 00000000000..852fd229828 --- /dev/null +++ b/src/gallium/drivers/i965/brw_draw.c @@ -0,0 +1,291 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_prim.h" +#include "util/u_upload_mgr.h" + +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_debug.h" +#include "brw_screen.h" + +#include "brw_batchbuffer.h" + + +static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { + _3DPRIM_POINTLIST, + _3DPRIM_LINELIST, + _3DPRIM_LINELOOP, + _3DPRIM_LINESTRIP, + _3DPRIM_TRILIST, + _3DPRIM_TRISTRIP, + _3DPRIM_TRIFAN, + _3DPRIM_QUADLIST, + _3DPRIM_QUADSTRIP, + _3DPRIM_POLYGON +}; + + + +/* When the primitive changes, set a state bit and re-validate. Not + * the nicest and would rather deal with this by having all the + * programs be immune to the active primitive (ie. cope with all + * possibilities). That may not be realistic however. + */ +static int brw_set_prim(struct brw_context *brw, unsigned prim ) +{ + + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("PRIM: %s\n", u_prim_name(prim)); + + if (prim != brw->primitive) { + unsigned reduced_prim; + + brw->primitive = prim; + brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; + + reduced_prim = u_reduced_prim(prim); + if (reduced_prim != brw->reduced_primitive) { + brw->reduced_primitive = reduced_prim; + brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; + } + } + + return prim_to_hw_prim[prim]; +} + + + +static int brw_emit_prim(struct brw_context *brw, + unsigned start, + unsigned count, + boolean indexed, + uint32_t hw_prim) +{ + struct brw_3d_primitive prim_packet; + int ret; + + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("%s start %d count %d indexed %d hw_prim %d\n", + __FUNCTION__, start, count, indexed, hw_prim); + + prim_packet.header.opcode = CMD_3D_PRIM; + prim_packet.header.length = sizeof(prim_packet)/4 - 2; + prim_packet.header.pad = 0; + prim_packet.header.topology = hw_prim; + prim_packet.header.indexed = indexed; + + prim_packet.verts_per_instance = count; + prim_packet.start_vert_location = start; + if (indexed) + prim_packet.start_vert_location += brw->ib.start_vertex_offset; + prim_packet.instance_count = 1; + prim_packet.start_instance_location = 0; + prim_packet.base_vert_location = 0; /* prim->basevertex; XXX: add this to gallium */ + + + /* If we're set to always flush, do it before and after the primitive emit. + * We want to catch both missed flushes that hurt instruction/state cache + * and missed flushes of the render cache as it heads to other parts of + * the besides the draw code. + */ + if (0) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); + ADVANCE_BATCH(); + } + if (prim_packet.verts_per_instance) { + ret = brw_batchbuffer_data( brw->batch, &prim_packet, + sizeof(prim_packet), LOOP_CLIPRECTS); + if (ret) + return ret; + } + if (0) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); + ADVANCE_BATCH(); + } + + return 0; +} + + +/* May fail if out of video memory for texture or vbo upload, or on + * fallback conditions. + */ +static int +try_draw_range_elements(struct brw_context *brw, + struct pipe_buffer *index_buffer, + unsigned hw_prim, + unsigned start, unsigned count) +{ + int ret; + + ret = brw_validate_state(brw); + if (ret) + return ret; + + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. + */ + ret = brw->sws->check_aperture_space(brw->sws, + brw->state.validated_bos, + brw->state.validated_bo_count); + if (ret) + return ret; + + ret = brw_upload_state(brw); + if (ret) + return ret; + + ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim); + if (ret) + return ret; + + if (brw->flags.always_flush_batch) + brw_context_flush( brw ); + + return 0; +} + + +static boolean +brw_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct brw_context *brw = brw_context(pipe); + int ret; + uint32_t hw_prim; + + hw_prim = brw_set_prim(brw, mode); + + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("PRIM: %s start %d count %d index_buffer %p\n", + u_prim_name(mode), start, count, (void *)index_buffer); + + /* Potentially trigger upload of new index buffer. + * + * XXX: do we need to go through state validation to achieve this? + * Could just call upload code directly. + */ + if (brw->curr.index_buffer != index_buffer || + brw->curr.index_size != index_size) { + pipe_buffer_reference( &brw->curr.index_buffer, index_buffer ); + brw->curr.index_size = index_size; + brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; + } + + /* XXX: do we really care? + */ + if (brw->curr.min_index != min_index || + brw->curr.max_index != max_index) + { + brw->curr.min_index = min_index; + brw->curr.max_index = max_index; + brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE; + } + + + /* Make a first attempt at drawing: + */ + ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + + /* Otherwise, flush and retry: + */ + if (ret != 0) { + brw_context_flush( brw ); + ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + assert(ret == 0); + } + + return TRUE; +} + +static boolean +brw_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned mode, + unsigned start, unsigned count) +{ + return brw_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + mode, + start, count ); +} + +static boolean +brw_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return brw_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +boolean brw_draw_init( struct brw_context *brw ) +{ + /* Register our drawing function: + */ + brw->base.draw_arrays = brw_draw_arrays; + brw->base.draw_elements = brw_draw_elements; + brw->base.draw_range_elements = brw_draw_range_elements; + + /* Create helpers for uploading data in user buffers: + */ + brw->vb.upload_vertex = u_upload_create( brw->base.screen, + 128 * 1024, + 64, + PIPE_BUFFER_USAGE_VERTEX ); + if (brw->vb.upload_vertex == NULL) + return FALSE; + + brw->vb.upload_index = u_upload_create( brw->base.screen, + 32 * 1024, + 64, + PIPE_BUFFER_USAGE_INDEX ); + if (brw->vb.upload_index == NULL) + return FALSE; + + return TRUE; +} + +void brw_draw_cleanup( struct brw_context *brw ) +{ + u_upload_destroy( brw->vb.upload_vertex ); + u_upload_destroy( brw->vb.upload_index ); + + bo_reference(&brw->ib.bo, NULL); +} diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h new file mode 100644 index 00000000000..8dc5dbce622 --- /dev/null +++ b/src/gallium/drivers/i965/brw_draw.h @@ -0,0 +1,39 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_DRAW_H +#define BRW_DRAW_H + +#include "brw_types.h" + +struct brw_context; + +boolean brw_draw_init( struct brw_context *brw ); +void brw_draw_cleanup( struct brw_context *brw ); + + +#endif diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c new file mode 100644 index 00000000000..a27da5f1c17 --- /dev/null +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -0,0 +1,542 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_context.h" + +#include "util/u_upload_mgr.h" +#include "util/u_math.h" + +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_screen.h" +#include "brw_batchbuffer.h" +#include "brw_debug.h" + + + + +static unsigned brw_translate_surface_format( unsigned id ) +{ + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; + } +} + +static unsigned get_index_type(int type) +{ + switch (type) { + case 1: return BRW_INDEX_BYTE; + case 2: return BRW_INDEX_WORD; + case 4: return BRW_INDEX_DWORD; + default: assert(0); return 0; + } +} + + +static int brw_prepare_vertices(struct brw_context *brw) +{ + unsigned int min_index = brw->curr.min_index; + unsigned int max_index = brw->curr.max_index; + GLuint i; + int ret; + + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + + + for (i = 0; i < brw->curr.num_vertex_buffers; i++) { + struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i]; + struct brw_winsys_buffer *bo; + struct pipe_buffer *upload_buf = NULL; + unsigned offset; + + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n", + __FUNCTION__, i, + brw_buffer_is_user_buffer(vb->buffer), + vb->buffer_offset, + vb->buffer->size, + vb->stride); + + if (brw_buffer_is_user_buffer(vb->buffer)) { + + /* XXX: simplify this. Stop the state trackers from generating + * zero-stride buffers & have them use additional constants (or + * add support for >1 constant buffer) instead. + */ + unsigned size = (vb->stride == 0 ? + vb->buffer->size - vb->buffer_offset : + MAX2(vb->buffer->size - vb->buffer_offset, + vb->stride * (max_index + 1 - min_index))); + + ret = u_upload_buffer( brw->vb.upload_vertex, + vb->buffer_offset + min_index * vb->stride, + size, + vb->buffer, + &offset, + &upload_buf ); + if (ret) + return ret; + + bo = brw_buffer(upload_buf)->bo; + + assert(offset + size <= bo->size); + } + else + { + offset = vb->buffer_offset; + bo = brw_buffer(vb->buffer)->bo; + } + + assert(offset < bo->size); + + /* Set up post-upload info about this vertex buffer: + */ + brw->vb.vb[i].offset = offset; + brw->vb.vb[i].stride = vb->stride; + brw->vb.vb[i].vertex_count = (vb->stride == 0 ? + 1 : + (bo->size - offset) / vb->stride); + + bo_reference( &brw->vb.vb[i].bo, bo ); + + /* Don't need to retain this reference. We have a reference on + * the underlying winsys buffer: + */ + pipe_buffer_reference( &upload_buf, NULL ); + } + + brw->vb.nr_vb = i; + brw_prepare_query_begin(brw); + + for (i = 0; i < brw->vb.nr_vb; i++) { + brw_add_validated_bo(brw, brw->vb.vb[i].bo); + } + + return 0; +} + +static int brw_emit_vertex_buffers( struct brw_context *brw ) +{ + int i; + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), just bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_vb == 0) { + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s: no active vertex buffers\n", __FUNCTION__); + + return 0; + } + + /* Emit VB state packets. + */ + BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_BUFFER << 16) | + ((1 + brw->vb.nr_vb * 4) - 2)); + + for (i = 0; i < brw->vb.nr_vb; i++) { + OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | + BRW_VB0_ACCESS_VERTEXDATA | + (brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(brw->vb.vb[i].bo, + BRW_USAGE_VERTEX, + brw->vb.vb[i].offset); + if (BRW_IS_IGDNG(brw)) { + OUT_RELOC(brw->vb.vb[i].bo, + BRW_USAGE_VERTEX, + brw->vb.vb[i].bo->size - 1); + } else + OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0); + OUT_BATCH(0); /* Instance data step rate */ + } + ADVANCE_BATCH(); + return 0; +} + + + + +static int brw_emit_vertex_elements(struct brw_context *brw) +{ + GLuint nr = brw->curr.num_vertex_elements; + GLuint i; + + brw_emit_query_begin(brw); + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (nr == 0) { + BEGIN_BATCH(3, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); + ADVANCE_BATCH(); + return 0; + } + + /* Now emit vertex element (VEP) state packets. + * + */ + BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2)); + for (i = 0; i < nr; i++) { + const struct pipe_vertex_element *input = &brw->curr.vertex_element[i]; + uint32_t format = brw_translate_surface_format( input->src_format ); + uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; + + switch (input->nr_components) { + case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; + case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; + case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; + case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; + break; + } + + OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT)); + + if (BRW_IS_IGDNG(brw)) + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); + else + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | + ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); + } + ADVANCE_BATCH(); + return 0; +} + + +static int brw_emit_vertices( struct brw_context *brw ) +{ + int ret; + + ret = brw_emit_vertex_buffers( brw ); + if (ret) + return ret; + + ret = brw_emit_vertex_elements( brw ); + if (ret) + return ret; + + return 0; +} + + +const struct brw_tracked_state brw_vertices = { + .dirty = { + .mesa = (PIPE_NEW_INDEX_RANGE | + PIPE_NEW_VERTEX_BUFFER), + .brw = BRW_NEW_BATCH, + .cache = 0, + }, + .prepare = brw_prepare_vertices, + .emit = brw_emit_vertices, +}; + + +static int brw_prepare_indices(struct brw_context *brw) +{ + struct pipe_buffer *index_buffer = brw->curr.index_buffer; + struct pipe_buffer *upload_buf = NULL; + struct brw_winsys_buffer *bo = NULL; + GLuint offset; + GLuint index_size; + GLuint ib_size; + int ret; + + if (index_buffer == NULL) + return 0; + + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s: index_size:%d index_buffer->size:%d\n", + __FUNCTION__, + brw->curr.index_size, + brw->curr.index_buffer->size); + + ib_size = index_buffer->size; + index_size = brw->curr.index_size; + + /* Turn userbuffer into a proper hardware buffer? + */ + if (brw_buffer_is_user_buffer(index_buffer)) { + + ret = u_upload_buffer( brw->vb.upload_index, + 0, + ib_size, + index_buffer, + &offset, + &upload_buf ); + if (ret) + return ret; + + bo = brw_buffer(upload_buf)->bo; + + /* XXX: annotate the userbuffer with the upload information so + * that successive calls don't get re-uploaded. + */ + } + else { + bo = brw_buffer(index_buffer)->bo; + ib_size = bo->size; + offset = 0; + } + + /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the + * index buffer state when we're just moving the start index of our + * drawing. + * + * In gallium this will happen in the case where successive draw + * calls are made with (distinct?) userbuffers, but the upload_mgr + * places the data into a single winsys buffer. + * + * This statechange doesn't raise any state flags and is always + * just merged into the final draw packet: + */ + if (1) { + assert((offset & (index_size - 1)) == 0); + brw->ib.start_vertex_offset = offset / index_size; + } + + /* These statechanges trigger a new CMD_INDEX_BUFFER packet: + */ + if (brw->ib.bo != bo || + brw->ib.size != ib_size) + { + bo_reference(&brw->ib.bo, bo); + brw->ib.size = ib_size; + brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; + } + + pipe_buffer_reference( &upload_buf, NULL ); + brw_add_validated_bo(brw, brw->ib.bo); + return 0; +} + +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = PIPE_NEW_INDEX_BUFFER, + .brw = 0, + .cache = 0, + }, + .prepare = brw_prepare_indices, +}; + +static int brw_emit_index_buffer(struct brw_context *brw) +{ + /* Emit the indexbuffer packet: + */ + if (brw->ib.bo) + { + struct brw_indexbuffer ib; + + memset(&ib, 0, sizeof(ib)); + + ib.header.bits.opcode = CMD_INDEX_BUFFER; + ib.header.bits.length = sizeof(ib)/4 - 2; + ib.header.bits.index_format = get_index_type(brw->ib.size); + ib.header.bits.cut_index_enable = 0; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH( ib.header.dword ); + OUT_RELOC(brw->ib.bo, + BRW_USAGE_VERTEX, + brw->ib.offset); + OUT_RELOC(brw->ib.bo, + BRW_USAGE_VERTEX, + brw->ib.offset + brw->ib.size - 1); + OUT_BATCH( 0 ); + ADVANCE_BATCH(); + } + + return 0; +} + +const struct brw_tracked_state brw_index_buffer = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER, + .cache = 0, + }, + .emit = brw_emit_index_buffer, +}; diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c new file mode 100644 index 00000000000..a8fcb5f97eb --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu.c @@ -0,0 +1,262 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_memory.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) +{ + p->current->header.destreg__conditionalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, GLuint value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, GLuint value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + + +/*********************************************************************** + */ +void brw_init_compile( struct brw_context *brw, struct brw_compile *p ) +{ + p->brw = brw; + p->nr_insn = 0; + p->current = p->stack; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); +} + + +enum pipe_error brw_get_program( struct brw_compile *p, + const GLuint **data, + GLuint *sz ) +{ + GLuint i; + + for (i = 0; i < 8; i++) + brw_NOP(p); + + /* Is the generated program malformed for some reason? + */ + if (p->error) + return PIPE_ERROR_BAD_INPUT; + + *sz = p->nr_insn * sizeof(struct brw_instruction); + *data = (const GLuint *)p->store; + return PIPE_OK; +} + + + +/** + * Subroutine calls require special attention. + * Mesa instructions may be expanded into multiple hardware instructions + * so the prog_instruction::BranchTarget field can't be used as an index + * into the hardware instructions. + * + * The BranchTarget field isn't needed, however. Mesa's GLSL compiler + * emits CAL and BGNSUB instructions with labels that can be used to map + * subroutine calls to actual subroutine code blocks. + * + * The structures and function here implement patching of CAL instructions + * so they jump to the right subroutine code... + */ + + +/** + * For each OPCODE_BGNSUB we create one of these. + */ +struct brw_eu_label +{ + GLuint label; /**< the label number */ + GLuint position; /**< the position of the brw instruction for this label */ + struct brw_eu_label *next; /**< next in linked list */ +}; + + +/** + * For each OPCODE_CAL we create one of these. + */ +struct brw_eu_call +{ + GLuint call_inst_pos; /**< location of the CAL instruction */ + GLuint label; + struct brw_eu_call *next; /**< next in linked list */ +}; + + +/** + * Called for each OPCODE_BGNSUB. + */ +void +brw_save_label(struct brw_compile *c, unsigned l, GLuint position) +{ + struct brw_eu_label *label = CALLOC_STRUCT(brw_eu_label); + label->label = l; + label->position = position; + label->next = c->first_label; + c->first_label = label; +} + + +/** + * Called for each OPCODE_CAL. + */ +void +brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos) +{ + struct brw_eu_call *call = CALLOC_STRUCT(brw_eu_call); + call->call_inst_pos = call_pos; + call->label = label; + call->next = c->first_call; + c->first_call = call; +} + + +/** + * Lookup a label, return label's position/offset. + */ +static GLuint +brw_lookup_label(struct brw_compile *c, unsigned l) +{ + const struct brw_eu_label *label; + for (label = c->first_label; label; label = label->next) { + if (l == label->label) { + return label->position; + } + } + abort(); /* should never happen */ + return ~0; +} + + +/** + * When we're done generating code, this function is called to resolve + * subroutine calls. + */ +void +brw_resolve_cals(struct brw_compile *c) +{ + const struct brw_eu_call *call; + + for (call = c->first_call; call; call = call->next) { + const GLuint sub_loc = brw_lookup_label(c, call->label); + struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; + struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; + GLint offset = brw_sub_inst - brw_call_inst; + + /* patch brw_inst1 to point to brw_inst2 */ + brw_set_src1(brw_call_inst, brw_imm_d(offset * 16)); + } + + /* free linked list of calls */ + { + struct brw_eu_call *call, *next; + for (call = c->first_call; call; call = next) { + next = call->next; + FREE(call); + } + c->first_call = NULL; + } + + /* free linked list of labels */ + { + struct brw_eu_label *label, *next; + for (label = c->first_label; label; label = next) { + next = label->next; + FREE(label); + } + c->first_label = NULL; + } +} diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h new file mode 100644 index 00000000000..af509b2e5f4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu.h @@ -0,0 +1,992 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include "util/u_debug.h" +#include "pipe/p_defines.h" + +#include "brw_structs.h" +#include "brw_defines.h" + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + +#define BRW_WRITEMASK_NONE 0x00 +#define BRW_WRITEMASK_X 0x01 +#define BRW_WRITEMASK_Y 0x02 +#define BRW_WRITEMASK_XY 0x03 +#define BRW_WRITEMASK_Z 0x04 +#define BRW_WRITEMASK_XZ 0x05 +#define BRW_WRITEMASK_YZ 0x06 +#define BRW_WRITEMASK_XYZ 0x07 +#define BRW_WRITEMASK_W 0x08 +#define BRW_WRITEMASK_XW 0x09 +#define BRW_WRITEMASK_YW 0x0A +#define BRW_WRITEMASK_XYW 0x0B +#define BRW_WRITEMASK_ZW 0x0C +#define BRW_WRITEMASK_XZW 0x0D +#define BRW_WRITEMASK_YZW 0x0E +#define BRW_WRITEMASK_XYZW 0x0F + + +#define REG_SIZE (8*4) + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg +{ + GLuint type:4; + GLuint file:2; + GLuint nr:8; + GLuint subnr:5; /* :1 in align16 */ + GLuint negate:1; /* source only */ + GLuint abs:1; /* source only */ + GLuint vstride:4; /* source only */ + GLuint width:3; /* src only, align1 only */ + GLuint hstride:2; /* align1 only */ + GLuint address_mode:1; /* relative addressing, hopefully! */ + GLuint pad0:1; + + union { + struct { + GLuint swizzle:8; /* src only, align16 only */ + GLuint writemask:4; /* dest only, align16 only */ + GLint indirect_offset:10; /* relative addressing offset */ + GLuint pad1:10; /* two dwords total */ + } bits; + + GLfloat f; + GLint d; + GLuint ud; + } dw1; +}; + + +struct brw_indirect { + GLuint addr_subnr:4; + GLint addr_offset:10; + GLuint pad:18; +}; + + +struct brw_eu_label; +struct brw_eu_call; + + + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 10000 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + GLuint nr_insn; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + GLuint flag_value; + GLboolean single_program_flow; + struct brw_context *brw; + + struct brw_eu_label *first_label; /**< linked list of labels */ + struct brw_eu_call *first_call; /**< linked list of CALs */ + + boolean error; +}; + + +void +brw_save_label(struct brw_compile *c, unsigned label, GLuint position); + +void +brw_save_call(struct brw_compile *c, unsigned label, GLuint call_pos); + +void +brw_resolve_cals(struct brw_compile *c); + + + +static INLINE int type_sz( GLuint type ) +{ + switch( type ) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield + */ +static INLINE struct brw_reg brw_reg( GLuint file, + GLuint nr, + GLuint subnr, + GLuint type, + GLuint vstride, + GLuint width, + GLuint hstride, + GLuint swizzle, + GLuint writemask ) +{ + struct brw_reg reg; + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < BRW_MAX_GRF); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < BRW_MAX_MRF); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +/** Construct float[16] register */ +static INLINE struct brw_reg brw_vec16_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static INLINE struct brw_reg brw_vec8_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static INLINE struct brw_reg brw_vec4_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static INLINE struct brw_reg brw_vec2_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + BRW_WRITEMASK_XY); +} + +/** Construct float[1] register */ +static INLINE struct brw_reg brw_vec1_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + BRW_WRITEMASK_X); +} + + +static INLINE struct brw_reg retype( struct brw_reg reg, + GLuint type ) +{ + reg.type = type; + return reg; +} + +static INLINE struct brw_reg suboffset( struct brw_reg reg, + GLuint delta ) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static INLINE struct brw_reg offset( struct brw_reg reg, + GLuint delta ) +{ + reg.nr += delta; + return reg; +} + + +static INLINE struct brw_reg byte_offset( struct brw_reg reg, + GLuint bytes ) +{ + GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +/** Construct unsigned word[16] register */ +static INLINE struct brw_reg brw_uw16_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static INLINE struct brw_reg brw_uw8_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static INLINE struct brw_reg brw_uw1_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static INLINE struct brw_reg brw_imm_reg( GLuint type ) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static INLINE struct brw_reg brw_imm_f( GLfloat f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static INLINE struct brw_reg brw_imm_d( GLint d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static INLINE struct brw_reg brw_imm_ud( GLuint ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static INLINE struct brw_reg brw_imm_uw( GLushort uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static INLINE struct brw_reg brw_imm_w( GLshort w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static INLINE struct brw_reg brw_imm_v( GLuint v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static INLINE struct brw_reg brw_imm_vf( GLuint v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static INLINE struct brw_reg brw_imm_vf4( GLuint v0, + GLuint v1, + GLuint v2, + GLuint v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + + +static INLINE struct brw_reg brw_address( struct brw_reg reg ) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr ) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr ) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +/** Construct null register (usually used for setting condition codes) */ +static INLINE struct brw_reg brw_null_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static INLINE struct brw_reg brw_address_reg( GLuint subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static INLINE struct brw_reg brw_ip_reg( void ) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + BRW_WRITEMASK_XYZW); /* NOTE! */ +} + +static INLINE struct brw_reg brw_acc_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + + +static INLINE struct brw_reg brw_flag_reg( void ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + + +static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static INLINE struct brw_reg brw_message_reg( GLuint nr ) +{ + assert(nr < BRW_MAX_MRF); + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + + + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static INLINE GLuint cvt( GLuint val ) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static INLINE struct brw_reg stride( struct brw_reg reg, + GLuint vstride, + GLuint width, + GLuint hstride ) +{ + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + + +static INLINE struct brw_reg vec16( struct brw_reg reg ) +{ + return stride(reg, 16,16,1); +} + +static INLINE struct brw_reg vec8( struct brw_reg reg ) +{ + return stride(reg, 8,8,1); +} + +static INLINE struct brw_reg vec4( struct brw_reg reg ) +{ + return stride(reg, 4,4,1); +} + +static INLINE struct brw_reg vec2( struct brw_reg reg ) +{ + return stride(reg, 2,2,1); +} + +static INLINE struct brw_reg vec1( struct brw_reg reg ) +{ + return stride(reg, 0,1,0); +} + + +static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt ) +{ + return vec1(suboffset(reg, elt)); +} + +static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + + +static INLINE struct brw_reg brw_swizzle( struct brw_reg reg, + GLuint x, + GLuint y, + GLuint z, + GLuint w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg, + GLuint x ) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static INLINE struct brw_reg brw_writemask( struct brw_reg reg, + GLuint mask ) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg, + GLuint mask ) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static INLINE struct brw_reg negate( struct brw_reg reg ) +{ + reg.negate ^= 1; + return reg; +} + +static INLINE struct brw_reg brw_abs( struct brw_reg reg ) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** + */ +static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr, + GLint offset ) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr, + GLint offset ) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset ) +{ + ptr.addr_offset += offset; + return ptr; +} + +static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset ) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +/** Do two brw_regs refer to the same register? */ +static INLINE GLboolean +brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + +static INLINE struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, GLuint value ); +void brw_set_saturate( struct brw_compile *p, GLuint value ); +void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ); +void brw_set_compression_control( struct brw_compile *p, GLboolean control ); +void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ); +void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); +void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); + +void brw_init_compile( struct brw_context *, struct brw_compile *p ); + +enum pipe_error brw_get_program( struct brw_compile *p, + const GLuint **program, + GLuint *sz ); + + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU1(RNDZ) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + + + +/* Helpers for SEND instruction: + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint sampler, + GLuint writemask, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode); + +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint precision ); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint data_type, + GLuint precision ); + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + GLuint scratch_offset ); + +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ); + +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index ); + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + GLuint scratch_offset ); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + GLuint execute_size); + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn); + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + GLuint execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + + + +void brw_NOP(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + GLuint conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg( struct brw_reg reg ); + + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); +#endif diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c new file mode 100644 index 00000000000..5989f5a04ee --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu_debug.c @@ -0,0 +1,94 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_debug.h" + +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + debug_printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ + debug_printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ + debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + debug_printf("imm %f", hwreg.dw1.f); + } + else { + debug_printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1<<hwreg.width, + hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0, + type[hwreg.type]); + } +} + + + diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c new file mode 100644 index 00000000000..00d8eaccbc4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -0,0 +1,1433 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_debug.h" +#include "brw_disasm.h" + + + + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +static void brw_set_dest( struct brw_instruction *insn, + struct brw_reg dest ) +{ + if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(dest.nr < 128); + + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } + else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +static void brw_set_src0( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } + else + { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } + else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } + else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + assert(reg.nr < 128); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + /*assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_context *brw, + struct brw_instruction *insn, + GLuint msg_length, + GLuint response_length, + GLuint function, + GLuint integer_type, + GLboolean low_precision, + GLboolean saturate, + GLuint dataType ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.math_igdng.function = function; + insn->bits3.math_igdng.int_type = integer_type; + insn->bits3.math_igdng.precision = low_precision; + insn->bits3.math_igdng.saturate = saturate; + insn->bits3.math_igdng.data_type = dataType; + insn->bits3.math_igdng.snapshot = 0; + insn->bits3.math_igdng.header_present = 0; + insn->bits3.math_igdng.response_length = response_length; + insn->bits3.math_igdng.msg_length = msg_length; + insn->bits3.math_igdng.end_of_thread = 0; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; + insn->bits2.send_igdng.end_of_thread = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; + } +} + + +static void brw_set_ff_sync_message( struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb_igdng.opcode = 1; + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; +} + +static void brw_set_urb_message( struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.urb_igdng.opcode = 0; /* ? */ + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; /* ? */ + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; + } +} + +static void brw_set_dp_write_message( struct brw_context *brw, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_write_igdng.msg_control = msg_control; + insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write_igdng.msg_type = msg_type; + insn->bits3.dp_write_igdng.send_commit_msg = 0; + insn->bits3.dp_write_igdng.header_present = 1; + insn->bits3.dp_write_igdng.response_length = response_length; + insn->bits3.dp_write_igdng.msg_length = msg_length; + insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.dp_write.end_of_thread = end_of_thread; + } +} + +static void brw_set_dp_read_message( struct brw_context *brw, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length, + GLuint end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_read_igdng.msg_control = msg_control; + insn->bits3.dp_read_igdng.msg_type = msg_type; + insn->bits3.dp_read_igdng.target_cache = target_cache; + insn->bits3.dp_read_igdng.header_present = 1; + insn->bits3.dp_read_igdng.response_length = response_length; + insn->bits3.dp_read_igdng.msg_length = msg_length; + insn->bits3.dp_read_igdng.pad1 = 0; + insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read.response_length = response_length; /*16:19*/ + insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read.pad1 = 0; /*28:30*/ + insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ + } +} + +static void brw_set_sampler_message(struct brw_context *brw, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode) +{ + assert(eot == 0); + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.sampler_igdng.binding_table_index = binding_table_index; + insn->bits3.sampler_igdng.sampler = sampler; + insn->bits3.sampler_igdng.msg_type = msg_type; + insn->bits3.sampler_igdng.simd_mode = simd_mode; + insn->bits3.sampler_igdng.header_present = header_present; + insn->bits3.sampler_igdng.response_length = response_length; + insn->bits3.sampler_igdng.msg_length = msg_length; + insn->bits3.sampler_igdng.end_of_thread = eot; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_igdng.end_of_thread = eot; + } else if (BRW_IS_G4X(brw)) { + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + insn->bits3.sampler_g4x.response_length = response_length; + insn->bits3.sampler_g4x.msg_length = msg_length; + insn->bits3.sampler_g4x.end_of_thread = eot; + insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } +} + + + +static struct brw_instruction *next_insn( struct brw_compile *p, + GLuint opcode ) +{ + struct brw_instruction *insn; + + if (0 && (BRW_DEBUG & DEBUG_DISASSEM)) + { + if (p->nr_insn) + brw_disasm_insn(stderr, &p->store[p->nr_insn-1]); + } + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: + */ + + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + + +static struct brw_instruction *brw_alu1( struct brw_compile *p, + GLuint opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + return insn; +} + +static struct brw_instruction *brw_alu2(struct brw_compile *p, + GLuint opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU1(RNDZ) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = next_insn(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = next_insn(p, BRW_OPCODE_IF); + } + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + } else { + insn = next_insn(p, BRW_OPCODE_ELSE); + } + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + /* Patch the if instruction to point at this instruction. + */ + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); + + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = br * (insn - if_insn); + if_insn->bits3.if_else.pop_count = 0; + if_insn->bits3.if_else.pad0 = 0; + } + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; + + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +/* DO/WHILE loop: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) +{ + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + + return insn; + } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + + if (p->single_program_flow) + insn = next_insn(p, BRW_OPCODE_ADD); + else + insn = next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + +/* insn->header.mask_control = BRW_MASK_ENABLE; */ + + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + GLuint jmpi = 1; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + GLuint conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditionalmod = conditional; + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + +/* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/** Extended math function, float[8]. + */ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint data_type, + GLuint precision ) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/** + * Extended math function, float[16]. + * Use 2 send instructions. + */ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint precision ) +{ + struct brw_instruction *insn; + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditionalmod = msg_reg_nr+1; + + brw_set_dest(insn, offset(dest,1)); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + +/** + * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + GLuint scratch_offset ) +{ + GLuint msg_reg_nr = 1; + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + GLuint msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + + brw_set_dp_write_message(p->brw, + insn, + 255, /* binding table index (255=stateless) */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } +} + + +/** + * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + GLuint scratch_offset ) +{ + GLuint msg_reg_nr = 1; + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); /* UW? */ + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(p->brw, + insn, + 255, /* binding table index (255=stateless) */ + 3, /* msg_control (3 means 4 Owords) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache (render/scratch) */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + * If relAddr is true, we'll do an indirect fetch using the address register. + */ +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ) +{ + /* XXX: relAddr not implemented */ + GLuint msg_reg_nr = 1; + { + struct brw_reg b; + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* Setup MRF[1] with location/offset into const buffer */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + brw_MOV(p, b, brw_imm_ud(location)); + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + /* cast dest to a uword[8] vector */ + dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + 0, /* msg_control (0 means 1 Oword) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index) +{ + GLuint msg_reg_nr = 1; + + assert(oword < 2); + /* + printf("vs const read msg, location %u, msg_reg_nr %d\n", + location, msg_reg_nr); + */ + + /* Setup MRF[1] with location/offset into const buffer */ + { + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + oword, /* 0 = lower Oword, 1 = upper Oword */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_dp_write_message(p->brw, + insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint sampler, + GLuint writemask, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode) +{ + GLboolean need_stall = 0; + + if (writemask == 0) { + /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != BRW_WRITEMASK_XYZW) { + GLuint dst_offset = 0; + GLuint i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) + break; + dst_offset += 2; + } + for (; i < 4; i++) { + if (!(writemask & (1<<i))) + break; + newmask |= 1<<i; + len++; + } + + if (newmask != writemask) { + need_stall = 1; + /* debug_printf("need stall %x %x\n", newmask , writemask); */ + } + else { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + newmask = ~newmask & BRW_WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, m1, brw_vec8_grf(0,0)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + + brw_pop_insn_state(p); + + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); + response_length = len * 2; + } + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_sampler_message(p->brw, insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot, + header_present, + simd_mode); + } + + if (need_stall) { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < BRW_MAX_MRF); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_urb_message(p->brw, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_ff_sync_message(p->brw, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} diff --git a/src/gallium/drivers/i965/brw_eu_util.c b/src/gallium/drivers/i965/brw_eu_util.c new file mode 100644 index 00000000000..5405cf17a4e --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count) +{ + GLuint i; + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c new file mode 100644 index 00000000000..921b201bae2 --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs.c @@ -0,0 +1,216 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "brw_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_gs.h" + + + +static enum pipe_error compile_gs_prog( struct brw_context *brw, + struct brw_gs_prog_key *key, + struct brw_winsys_buffer **bo_out ) +{ + struct brw_gs_compile c; + enum pipe_error ret; + const GLuint *program; + GLuint program_size; + + memset(&c, 0, sizeof(c)); + + c.key = *key; + c.need_ff_sync = BRW_IS_IGDNG(brw); + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.nr_attrs = c.key.nr_attrs; + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + c.nr_bytes = c.nr_regs * REG_SIZE; + + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func); + + c.func.single_program_flow = 1; + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Note that primitives which don't require a GS program have + * already been weeded out by this stage: + */ + switch (key->primitive) { + case PIPE_PRIM_QUADS: + brw_gs_quads( &c ); + break; + case PIPE_PRIM_QUAD_STRIP: + brw_gs_quad_strip( &c ); + break; + case PIPE_PRIM_LINE_LOOP: + brw_gs_lines( &c ); + break; + case PIPE_PRIM_LINES: + if (key->hint_gs_always) + brw_gs_lines( &c ); + else { + return PIPE_OK; + } + break; + case PIPE_PRIM_TRIANGLES: + if (key->hint_gs_always) + brw_gs_tris( &c ); + else { + return PIPE_OK; + } + break; + case PIPE_PRIM_POINTS: + if (key->hint_gs_always) + brw_gs_points( &c ); + else { + return PIPE_OK; + } + break; + default: + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + /* get the program + */ + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; + + /* Upload + */ + ret = brw_upload_cache( &brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->gs.prog_data, + bo_out ); + if (ret) + return ret; + + return PIPE_OK; +} + +static const unsigned gs_prim[PIPE_PRIM_MAX] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_LOOP, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_TRIANGLES +}; + +static void populate_key( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature; + + memset(key, 0, sizeof(*key)); + + /* PIPE_NEW_FRAGMENT_SIGNATURE */ + key->nr_attrs = sig->nr_inputs + 1; + + /* BRW_NEW_PRIMITIVE */ + key->primitive = gs_prim[brw->primitive]; + + key->hint_gs_always = 0; /* debug code? */ + + key->need_gs_prog = (key->hint_gs_always || + brw->primitive == PIPE_PRIM_QUADS || + brw->primitive == PIPE_PRIM_QUAD_STRIP || + brw->primitive == PIPE_PRIM_LINE_LOOP); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static int prepare_gs_prog(struct brw_context *brw) +{ + struct brw_gs_prog_key key; + enum pipe_error ret; + + /* Populate the key: + */ + populate_key(brw, &key); + + if (brw->gs.prog_active != key.need_gs_prog) { + brw->state.dirty.cache |= CACHE_NEW_GS_PROG; + brw->gs.prog_active = key.need_gs_prog; + } + + if (!brw->gs.prog_active) + return PIPE_OK; + + if (brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->gs.prog_data, + &brw->gs.prog_bo)) + return PIPE_OK; + + ret = compile_gs_prog( brw, &key, &brw->gs.prog_bo ); + if (ret) + return ret; + + return PIPE_OK; +} + + +const struct brw_tracked_state brw_gs_prog = { + .dirty = { + .mesa = PIPE_NEW_FRAGMENT_SIGNATURE, + .brw = BRW_NEW_PRIMITIVE, + .cache = 0, + }, + .prepare = prepare_gs_prog +}; diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h new file mode 100644 index 00000000000..6e616dcb875 --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs.h @@ -0,0 +1,76 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4) + +struct brw_gs_prog_key { + GLuint nr_attrs:8; + GLuint primitive:4; + GLuint hint_gs_always:1; + GLuint need_gs_prog:1; + GLuint pad:18; +}; + +struct brw_gs_compile { + struct brw_compile func; + struct brw_gs_prog_key key; + struct brw_gs_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_GS_VERTS]; + } reg; + + /* 3 different ways of expressing vertex size: + */ + GLuint nr_attrs; + GLuint nr_regs; + GLuint nr_bytes; + GLboolean need_ff_sync; +}; + +#define ATTR_SIZE (4*4) + +void brw_gs_quads( struct brw_gs_compile *c ); +void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_tris( struct brw_gs_compile *c ); +void brw_gs_lines( struct brw_gs_compile *c ); +void brw_gs_points( struct brw_gs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c new file mode 100644 index 00000000000..fd8e2accedd --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs_emit.c @@ -0,0 +1,181 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_gs.h" + +static void brw_gs_alloc_regs( struct brw_gs_compile *c, + GLuint nr_verts ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->prog_data.urb_read_length = c->nr_regs; + c->prog_data.total_grf = i; +} + + +static void brw_gs_emit_vue(struct brw_gs_compile *c, + struct brw_reg vert, + GLboolean last, + GLuint header) +{ + struct brw_compile *p = &c->func; + GLboolean allocate = !last; + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); + + /* Send each vertex as a seperate write to the urb. This is + * different to the concept in brw_sf_emit.c, where subsequent + * writes are used to build up a single urb entry. Each of these + * writes instantiates a seperate urb entry, and a new one must be + * allocated each time. + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response length */ + allocate ? 0 : 1, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + +static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + +void brw_gs_quads( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + /* Use polygons for correct edgeflag behaviour. Note that vertex 3 + * is the PV for quads, but vertex 0 for polygons: + */ + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_quad_strip( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_tris( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 3); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); +} + +void brw_gs_lines( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 2); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); +} + +void brw_gs_points( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 1); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); +} + + + + + + + + diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c new file mode 100644 index 00000000000..b64ec286cea --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -0,0 +1,169 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_math.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_debug.h" + +struct brw_gs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + + unsigned int curbe_offset; + + unsigned int nr_urb_entries, urb_size; + GLboolean prog_active; +}; + +static void +gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_GS_PROG */ + key->prog_active = brw->gs.prog_active; + if (key->prog_active) { + key->total_grf = brw->gs.prog_data->total_grf; + key->urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } else { + key->total_grf = 1; + key->urb_entry_read_length = 1; + } + + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.clip_start; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_gs_entries; + key->urb_size = brw->urb.vsize; +} + +static enum pipe_error +gs_unit_create_from_key(struct brw_context *brw, + struct brw_gs_unit_key *key, + struct brw_winsys_reloc *reloc, + unsigned nr_reloc, + struct brw_winsys_buffer **bo_out) +{ + struct brw_gs_unit_state gs; + enum pipe_error ret; + + + memset(&gs, 0, sizeof(gs)); + + /* reloc */ + gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + gs.thread0.kernel_start_pointer = 0; + + gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + gs.thread1.single_program_flow = 1; + + gs.thread3.dispatch_grf_start_reg = 1; + gs.thread3.const_urb_entry_read_offset = 0; + gs.thread3.const_urb_entry_read_length = 0; + gs.thread3.urb_entry_read_offset = 0; + gs.thread3.urb_entry_read_length = key->urb_entry_read_length; + + gs.thread4.nr_urb_entries = key->nr_urb_entries; + gs.thread4.urb_entry_allocation_size = key->urb_size - 1; + + if (key->nr_urb_entries >= 8) + gs.thread4.max_threads = 1; + else + gs.thread4.max_threads = 0; + + if (BRW_IS_IGDNG(brw)) + gs.thread4.rendering_enable = 1; + + if (BRW_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT, + key, sizeof(*key), + reloc, nr_reloc, + &gs, sizeof(gs), + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + +static enum pipe_error prepare_gs_unit(struct brw_context *brw) +{ + struct brw_gs_unit_key key; + enum pipe_error ret; + struct brw_winsys_reloc reloc[1]; + unsigned nr_reloc = 0; + unsigned grf_reg_count; + + gs_unit_populate_key(brw, &key); + + grf_reg_count = (align(key.total_grf, 16) / 16 - 1); + + /* GS program relocation */ + if (key.prog_active) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); + } + + if (brw_search_cache(&brw->cache, BRW_GS_UNIT, + &key, sizeof(key), + reloc, nr_reloc, + NULL, + &brw->gs.state_bo)) + return PIPE_OK; + + ret = gs_unit_create_from_key(brw, &key, + reloc, nr_reloc, + &brw->gs.state_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_gs_unit = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_GS_PROG + }, + .prepare = prepare_gs_unit, +}; diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c new file mode 100644 index 00000000000..e4b24229db3 --- /dev/null +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -0,0 +1,513 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_debug.h" +#include "brw_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_screen.h" +#include "brw_pipe_rast.h" + + + + + +/*********************************************************************** + * Blend color + */ + +static int upload_blend_constant_color(struct brw_context *brw) +{ + BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bcc); + return 0; +} + + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .mesa = PIPE_NEW_BLEND_COLOR, + .brw = 0, + .cache = 0 + }, + .emit = upload_blend_constant_color +}; + +/*********************************************************************** + * Drawing rectangle - framebuffer dimensions + */ +static int upload_drawing_rect(struct brw_context *brw) +{ + BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); + OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); + OUT_BATCH(0); + OUT_BATCH(((brw->curr.fb.width - 1) & 0xffff) | + ((brw->curr.fb.height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); + return 0; +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .mesa = PIPE_NEW_FRAMEBUFFER_DIMENSIONS, + .brw = 0, + .cache = 0 + }, + .emit = upload_drawing_rect +}; + + +/*********************************************************************** + * Binding table pointers + */ + +static int prepare_binding_table_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->vs.bind_bo); + brw_add_validated_bo(brw, brw->wm.bind_bo); + return 0; +} + +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is 0. + */ +static int upload_binding_table_pointers(struct brw_context *brw) +{ + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, + BRW_USAGE_SAMPLER, + 0); /* vs */ + else + OUT_BATCH(0); + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + OUT_RELOC(brw->wm.bind_bo, + BRW_USAGE_SAMPLER, + 0); /* wm/ps */ + ADVANCE_BATCH(); + return 0; +} + +const struct brw_tracked_state brw_binding_table_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SURF_BIND, + }, + .prepare = prepare_binding_table_pointers, + .emit = upload_binding_table_pointers, +}; + + +/********************************************************************** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is 0. + */ +static int upload_pipelined_state_pointers(struct brw_context *brw ) +{ + BEGIN_BATCH(7, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); + OUT_RELOC(brw->vs.state_bo, + BRW_USAGE_STATE, + 0); + if (brw->gs.prog_active) + OUT_RELOC(brw->gs.state_bo, + BRW_USAGE_STATE, + 1); + else + OUT_BATCH(0); + OUT_RELOC(brw->clip.state_bo, + BRW_USAGE_STATE, + 1); + OUT_RELOC(brw->sf.state_bo, + BRW_USAGE_STATE, + 0); + OUT_RELOC(brw->wm.state_bo, + BRW_USAGE_STATE, + 0); + OUT_RELOC(brw->cc.state_bo, + BRW_USAGE_STATE, + 0); + ADVANCE_BATCH(); + + brw->state.dirty.brw |= BRW_NEW_PSP; + return 0; +} + + +static int prepare_psp_urb_cbs(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->vs.state_bo); + brw_add_validated_bo(brw, brw->gs.state_bo); + brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->sf.state_bo); + brw_add_validated_bo(brw, brw->wm.state_bo); + brw_add_validated_bo(brw, brw->cc.state_bo); + return 0; +} + +static int upload_psp_urb_cbs(struct brw_context *brw ) +{ + int ret; + + ret = upload_pipelined_state_pointers(brw); + if (ret) + return ret; + + ret = brw_upload_urb_fence(brw); + if (ret) + return ret; + + ret = brw_upload_cs_urb_state(brw); + if (ret) + return ret; + + return 0; +} + +const struct brw_tracked_state brw_psp_urb_cbs = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .prepare = prepare_psp_urb_cbs, + .emit = upload_psp_urb_cbs, +}; + + +/*********************************************************************** + * Depth buffer + */ + +static int prepare_depthbuffer(struct brw_context *brw) +{ + struct pipe_surface *zsbuf = brw->curr.fb.zsbuf; + + if (zsbuf) + brw_add_validated_bo(brw, brw_surface(zsbuf)->bo); + + return 0; +} + +static int emit_depthbuffer(struct brw_context *brw) +{ + struct pipe_surface *surface = brw->curr.fb.zsbuf; + unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; + + if (surface == NULL) { + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } else { + struct brw_winsys_buffer *bo; + unsigned int format; + unsigned int pitch; + unsigned int cpp; + + switch (surface->format) { + case PIPE_FORMAT_Z16_UNORM: + format = BRW_DEPTHFORMAT_D16_UNORM; + cpp = 2; + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + cpp = 4; + break; + case PIPE_FORMAT_Z32_FLOAT: + format = BRW_DEPTHFORMAT_D32_FLOAT; + cpp = 4; + break; + default: + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + bo = brw_surface(surface)->bo; + pitch = brw_surface(surface)->pitch; + + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH(((pitch * cpp) - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | + ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(bo, + BRW_USAGE_DEPTH_BUFFER, + surface->offset); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((pitch - 1) << 6) | + ((surface->height - 1) << 19)); + OUT_BATCH(0); + + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } + + return 0; +} + +const struct brw_tracked_state brw_depthbuffer = { + .dirty = { + .mesa = PIPE_NEW_DEPTH_BUFFER, + .brw = BRW_NEW_BATCH, + .cache = 0, + }, + .prepare = prepare_depthbuffer, + .emit = emit_depthbuffer, +}; + + + +/*********************************************************************** + * Polygon stipple packet + */ + +static int upload_polygon_stipple(struct brw_context *brw) +{ + BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bps); + return 0; +} + +const struct brw_tracked_state brw_polygon_stipple = { + .dirty = { + .mesa = PIPE_NEW_POLYGON_STIPPLE, + .brw = 0, + .cache = 0 + }, + .emit = upload_polygon_stipple +}; + + +/*********************************************************************** + * Line stipple packet + */ + +static int upload_line_stipple(struct brw_context *brw) +{ + const struct brw_line_stipple *bls = &brw->curr.rast->bls; + if (bls->header.opcode) { + BRW_CACHED_BATCH_STRUCT(brw, bls); + } + return 0; +} + +const struct brw_tracked_state brw_line_stipple = { + .dirty = { + .mesa = PIPE_NEW_RAST, + .brw = 0, + .cache = 0 + }, + .emit = upload_line_stipple +}; + + +/*********************************************************************** + * Misc invarient state packets + */ + +static int upload_invarient_state( struct brw_context *brw ) +{ + { + /* 0x61040000 Pipeline Select */ + /* PipelineSelect : 0 */ + struct brw_pipeline_select ps; + + memset(&ps, 0, sizeof(ps)); + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + ps.header.opcode = CMD_PIPELINE_SELECT_GM45; + else + ps.header.opcode = CMD_PIPELINE_SELECT_965; + ps.header.pipeline_select = 0; + BRW_BATCH_STRUCT(brw, &ps); + } + + { + struct brw_global_depth_offset_clamp gdo; + memset(&gdo, 0, sizeof(gdo)); + + /* Disable depth offset clamping. + */ + gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.length = sizeof(gdo)/4 - 2; + gdo.depth_offset_clamp = 0.0; + + BRW_BATCH_STRUCT(brw, &gdo); + } + + + /* 0x61020000 State Instruction Pointer */ + { + struct brw_system_instruction_pointer sip; + memset(&sip, 0, sizeof(sip)); + + sip.header.opcode = CMD_STATE_INSN_POINTER; + sip.header.length = 0; + sip.bits0.pad = 0; + sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); + } + + /* VF Statistics */ + { + struct brw_vf_statistics vfs; + memset(&vfs, 0, sizeof(vfs)); + + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + vfs.opcode = CMD_VF_STATISTICS_GM45; + else + vfs.opcode = CMD_VF_STATISTICS_965; + + if (BRW_DEBUG & DEBUG_STATS) + vfs.statistics_enable = 1; + + BRW_BATCH_STRUCT(brw, &vfs); + } + + if (!BRW_IS_965(brw)) + { + struct brw_aa_line_parameters balp; + + /* use legacy aa line coverage computation */ + memset(&balp, 0, sizeof(balp)); + balp.header.opcode = CMD_AA_LINE_PARAMETERS; + balp.header.length = sizeof(balp) / 4 - 2; + + BRW_BATCH_STRUCT(brw, &balp); + } + + { + struct brw_polygon_stipple_offset bpso; + + /* This is invarient state in gallium: + */ + memset(&bpso, 0, sizeof(bpso)); + bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.length = sizeof(bpso)/4-2; + bpso.bits0.y_offset = 0; + bpso.bits0.x_offset = 0; + + BRW_BATCH_STRUCT(brw, &bpso); + } + + return 0; +} + +const struct brw_tracked_state brw_invarient_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_invarient_state +}; + + +/*********************************************************************** + * State base address + */ + +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. + */ +static int upload_state_base_address( struct brw_context *brw ) +{ + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + if (BRW_IS_IGDNG(brw)) { + BEGIN_BATCH(8, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); + } + return 0; +} + +const struct brw_tracked_state brw_state_base_address = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0, + }, + .emit = upload_state_base_address +}; diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c new file mode 100644 index 00000000000..b759a910b63 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -0,0 +1,208 @@ + +#include "util/u_memory.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_debug.h" + +static int translate_logicop(unsigned logicop) +{ + switch (logicop) { + case PIPE_LOGICOP_CLEAR: + return BRW_LOGICOPFUNCTION_CLEAR; + case PIPE_LOGICOP_AND: + return BRW_LOGICOPFUNCTION_AND; + case PIPE_LOGICOP_AND_REVERSE: + return BRW_LOGICOPFUNCTION_AND_REVERSE; + case PIPE_LOGICOP_COPY: + return BRW_LOGICOPFUNCTION_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return BRW_LOGICOPFUNCTION_COPY_INVERTED; + case PIPE_LOGICOP_AND_INVERTED: + return BRW_LOGICOPFUNCTION_AND_INVERTED; + case PIPE_LOGICOP_NOOP: + return BRW_LOGICOPFUNCTION_NOOP; + case PIPE_LOGICOP_XOR: + return BRW_LOGICOPFUNCTION_XOR; + case PIPE_LOGICOP_OR: + return BRW_LOGICOPFUNCTION_OR; + case PIPE_LOGICOP_OR_INVERTED: + return BRW_LOGICOPFUNCTION_OR_INVERTED; + case PIPE_LOGICOP_NOR: + return BRW_LOGICOPFUNCTION_NOR; + case PIPE_LOGICOP_EQUIV: + return BRW_LOGICOPFUNCTION_EQUIV; + case PIPE_LOGICOP_INVERT: + return BRW_LOGICOPFUNCTION_INVERT; + case PIPE_LOGICOP_OR_REVERSE: + return BRW_LOGICOPFUNCTION_OR_REVERSE; + case PIPE_LOGICOP_NAND: + return BRW_LOGICOPFUNCTION_NAND; + case PIPE_LOGICOP_SET: + return BRW_LOGICOPFUNCTION_SET; + default: + assert(0); + return BRW_LOGICOPFUNCTION_SET; + } +} + + +static unsigned translate_blend_equation( unsigned mode ) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BRW_BLENDFUNCTION_ADD; + case PIPE_BLEND_MIN: + return BRW_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: + return BRW_BLENDFUNCTION_MAX; + case PIPE_BLEND_SUBTRACT: + return BRW_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + default: + assert(0); + return BRW_BLENDFUNCTION_ADD; + } +} + +static unsigned translate_blend_factor( unsigned factor ) +{ + switch(factor) { + case PIPE_BLENDFACTOR_ZERO: + return BRW_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BRW_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BRW_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BRW_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BRW_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BRW_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BRW_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BRW_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BRW_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BRW_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BRW_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BRW_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BRW_BLENDFACTOR_INV_CONST_ALPHA; + default: + assert(0); + return BRW_BLENDFACTOR_ZERO; + } +} + +static void *brw_create_blend_state( struct pipe_context *pipe, + const struct pipe_blend_state *templ ) +{ + struct brw_blend_state *blend = CALLOC_STRUCT(brw_blend_state); + if (blend == NULL) + return NULL; + + if (templ->logicop_enable) { + blend->cc2.logicop_enable = 1; + blend->cc5.logicop_func = translate_logicop(templ->logicop_func); + } + else if (templ->blend_enable) { + blend->cc6.dest_blend_factor = translate_blend_factor(templ->rgb_dst_factor); + blend->cc6.src_blend_factor = translate_blend_factor(templ->rgb_src_factor); + blend->cc6.blend_function = translate_blend_equation(templ->rgb_func); + + blend->cc5.ia_dest_blend_factor = translate_blend_factor(templ->alpha_dst_factor); + blend->cc5.ia_src_blend_factor = translate_blend_factor(templ->alpha_src_factor); + blend->cc5.ia_blend_function = translate_blend_equation(templ->alpha_func); + + blend->cc3.blend_enable = 1; + blend->cc3.ia_blend_enable = + (blend->cc6.dest_blend_factor != blend->cc5.ia_dest_blend_factor || + blend->cc6.src_blend_factor != blend->cc5.ia_src_blend_factor || + blend->cc6.blend_function != blend->cc5.ia_blend_function); + + /* Per-surface blend enables, currently just follow global + * state: + */ + blend->ss0.color_blend = 1; + } + + blend->cc5.dither_enable = templ->dither; + + if (BRW_DEBUG & DEBUG_STATS) + blend->cc5.statistics_enable = 1; + + /* Per-surface color mask -- just follow global state: + */ + blend->ss0.writedisable_red = (templ->colormask & PIPE_MASK_R) ? 0 : 1; + blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 0 : 1; + blend->ss0.writedisable_blue = (templ->colormask & PIPE_MASK_B) ? 0 : 1; + blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 0 : 1; + + return (void *)blend; +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.blend = (const struct brw_blend_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_BLEND; +} + +static void brw_delete_blend_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + assert((const void *)cso != (const void *)brw->curr.blend); + FREE(cso); +} + + +static void brw_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_blend_constant_color *bcc = &brw->curr.bcc; + + bcc->blend_constant_color[0] = blend_color->color[0]; + bcc->blend_constant_color[1] = blend_color->color[1]; + bcc->blend_constant_color[2] = blend_color->color[2]; + bcc->blend_constant_color[3] = blend_color->color[3]; + + brw->state.dirty.mesa |= PIPE_NEW_BLEND_COLOR; +} + + +void brw_pipe_blend_init( struct brw_context *brw ) +{ + brw->base.set_blend_color = brw_set_blend_color; + brw->base.create_blend_state = brw_create_blend_state; + brw->base.bind_blend_state = brw_bind_blend_state; + brw->base.delete_blend_state = brw_delete_blend_state; + + { + struct brw_blend_constant_color *bcc = &brw->curr.bcc; + + memset(bcc, 0, sizeof(*bcc)); + bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc->header.length = sizeof(*bcc)/4-2; + } + +} + +void brw_pipe_blend_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c new file mode 100644 index 00000000000..452e1e89f93 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_pack_color.h" + +#include "pipe/p_state.h" + +#include "brw_batchbuffer.h" +#include "brw_screen.h" +#include "brw_context.h" + +#define MASK16 0xffff +#define MASK24 0xffffff + + +/** + * Use blitting to clear the renderbuffers named by 'flags'. + * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field + * since that might include software renderbuffers or renderbuffers + * which we're clearing with triangles. + * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear + */ +static enum pipe_error +try_clear( struct brw_context *brw, + struct brw_surface *surface, + unsigned value ) +{ + uint32_t BR13, CMD; + int x1 = 0; + int y1 = 0; + int x2 = surface->base.width; + int y2 = surface->base.height; + int pitch = surface->pitch; + int cpp = surface->cpp; + + if (x2 == 0 || y2 == 0) + return 0; + + debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + (void *)surface->bo, pitch * cpp, + surface->base.offset, + x1, y1, x2 - x1, y2 - y1); + + BR13 = 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_RGB | XY_BLT_WRITE_ALPHA; + + /* Setup the blit command */ + if (cpp == 4) { + BR13 |= BR13_8888; + CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + } + else { + assert(cpp == 2); + BR13 |= BR13_565; + } + + /* XXX: nasty hack for clearing depth buffers + */ + if (surface->tiling == BRW_TILING_Y) { + x2 = pitch; + } + + if (surface->tiling == BRW_TILING_X) { + CMD |= XY_DST_TILED; + pitch /= 4; + } + + BR13 |= (pitch * cpp); + + BEGIN_BATCH(6, 0); + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH((y2 << 16) | x2); + OUT_RELOC(surface->bo, + BRW_USAGE_BLIT_DEST, + surface->base.offset); + OUT_BATCH(value); + ADVANCE_BATCH(); + + return 0; +} + + + + +static void color_clear(struct brw_context *brw, + struct brw_surface *bsurface, + const float *rgba ) +{ + enum pipe_error ret; + union util_color value; + + util_pack_color( rgba, bsurface->base.format, &value ); + + if (bsurface->cpp == 2) + value.ui |= value.ui << 16; + + ret = try_clear( brw, bsurface, value.ui ); + + if (ret != 0) { + brw_context_flush( brw ); + ret = try_clear( brw, bsurface, value.ui ); + assert( ret == 0 ); + } +} + +static void zstencil_clear(struct brw_context *brw, + struct brw_surface *bsurface, + double depth, + unsigned stencil ) +{ + enum pipe_error ret; + unsigned value; + + switch (bsurface->base.format) { + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + value = ((unsigned)(depth * MASK24) & MASK24); + break; + case PIPE_FORMAT_Z16_UNORM: + value = ((unsigned)(depth * MASK16) & MASK16); + break; + default: + assert(0); + return; + } + + switch (bsurface->base.format) { + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + value = value | (stencil << 24); + break; + + case PIPE_FORMAT_Z16_UNORM: + value = value | (value << 16); + break; + + default: + break; + } + + ret = try_clear( brw, bsurface, value ); + + if (ret != 0) { + brw_context_flush( brw ); + ret = try_clear( brw, bsurface, value ); + assert( ret == 0 ); + } +} + + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +static void brw_clear(struct pipe_context *pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) +{ + struct brw_context *brw = brw_context( pipe ); + int i; + + if (buffers & PIPE_CLEAR_COLOR) { + for (i = 0; i < brw->curr.fb.nr_cbufs; i++) { + color_clear( brw, + brw_surface(brw->curr.fb.cbufs[i]), + rgba ); + } + } + + if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { + if (brw->curr.fb.zsbuf) { + zstencil_clear( brw, + brw_surface(brw->curr.fb.zsbuf), + depth, stencil ); + } + } +} + + +void brw_pipe_clear_init( struct brw_context *brw ) +{ + brw->base.clear = brw_clear; +} + + +void brw_pipe_clear_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c new file mode 100644 index 00000000000..e010d76e0d3 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -0,0 +1,172 @@ + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "brw_context.h" +#include "brw_defines.h" + +/* XXX: Fixme - include this to get IZ_ defines + */ +#include "brw_wm.h" + +static unsigned brw_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_ALWAYS; + default: + assert(0); + return BRW_COMPAREFUNCTION_ALWAYS; + } +} + +static unsigned translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return BRW_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return BRW_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return BRW_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return BRW_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return BRW_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return BRW_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return BRW_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return BRW_STENCILOP_INVERT; + default: + assert(0); + return BRW_STENCILOP_ZERO; + } +} + +static void create_bcc_state( struct brw_depth_stencil_state *zstencil, + const struct pipe_depth_stencil_alpha_state *templ ) +{ + if (templ->stencil[0].enabled) { + zstencil->cc0.stencil_enable = 1; + zstencil->cc0.stencil_func = + brw_translate_compare_func(templ->stencil[0].func); + zstencil->cc0.stencil_fail_op = + translate_stencil_op(templ->stencil[0].fail_op); + zstencil->cc0.stencil_pass_depth_fail_op = + translate_stencil_op(templ->stencil[0].zfail_op); + zstencil->cc0.stencil_pass_depth_pass_op = + translate_stencil_op(templ->stencil[0].zpass_op); + zstencil->cc1.stencil_ref = templ->stencil[0].ref_value; + zstencil->cc1.stencil_write_mask = templ->stencil[0].writemask; + zstencil->cc1.stencil_test_mask = templ->stencil[0].valuemask; + + if (templ->stencil[1].enabled) { + zstencil->cc0.bf_stencil_enable = 1; + zstencil->cc0.bf_stencil_func = + brw_translate_compare_func(templ->stencil[1].func); + zstencil->cc0.bf_stencil_fail_op = + translate_stencil_op(templ->stencil[1].fail_op); + zstencil->cc0.bf_stencil_pass_depth_fail_op = + translate_stencil_op(templ->stencil[1].zfail_op); + zstencil->cc0.bf_stencil_pass_depth_pass_op = + translate_stencil_op(templ->stencil[1].zpass_op); + zstencil->cc1.bf_stencil_ref = templ->stencil[1].ref_value; + zstencil->cc2.bf_stencil_write_mask = templ->stencil[1].writemask; + zstencil->cc2.bf_stencil_test_mask = templ->stencil[1].valuemask; + } + + zstencil->cc0.stencil_write_enable = (zstencil->cc1.stencil_write_mask || + zstencil->cc2.bf_stencil_write_mask); + } + + + if (templ->alpha.enabled) { + zstencil->cc3.alpha_test = 1; + zstencil->cc3.alpha_test_func = brw_translate_compare_func(templ->alpha.func); + zstencil->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + zstencil->cc7.alpha_ref.ub[0] = float_to_ubyte(templ->alpha.ref_value); + } + + if (templ->depth.enabled) { + zstencil->cc2.depth_test = 1; + zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func); + zstencil->cc2.depth_write_enable = templ->depth.writemask; + } +} + +static void create_wm_iz_state( struct brw_depth_stencil_state *zstencil ) +{ + if (zstencil->cc3.alpha_test) + zstencil->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (zstencil->cc2.depth_test) + zstencil->iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (zstencil->cc2.depth_write_enable) + zstencil->iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + if (zstencil->cc0.stencil_enable) + zstencil->iz_lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (zstencil->cc0.stencil_write_enable) + zstencil->iz_lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + +} + + +static void * +brw_create_depth_stencil_state( struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ ) +{ + struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state); + + create_bcc_state( zstencil, templ ); + create_wm_iz_state( zstencil ); + + return (void *)zstencil; +} + + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.zstencil = (const struct brw_depth_stencil_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_DEPTH_STENCIL_ALPHA; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + assert((const void *)cso != (const void *)brw->curr.zstencil); + FREE(cso); +} + + +void brw_pipe_depth_stencil_init( struct brw_context *brw ) +{ + brw->base.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->base.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->base.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; +} + +void brw_pipe_depth_stencil_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c new file mode 100644 index 00000000000..5d4e5025f97 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -0,0 +1,84 @@ +#include "util/u_math.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "brw_context.h" +#include "brw_debug.h" + +/** + * called from intelDrawBuffer() + */ +static void brw_set_framebuffer_state( struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb ) +{ + struct brw_context *brw = brw_context(pipe); + unsigned i; + + /* Dimensions: + */ + if (brw->curr.fb.width != fb->width || + brw->curr.fb.height != fb->height) { + brw->curr.fb.width = fb->width; + brw->curr.fb.height = fb->height; + brw->state.dirty.mesa |= PIPE_NEW_FRAMEBUFFER_DIMENSIONS; + } + + /* Z/Stencil + */ + if (brw->curr.fb.zsbuf != fb->zsbuf) { + pipe_surface_reference(&brw->curr.fb.zsbuf, fb->zsbuf); + brw->state.dirty.mesa |= PIPE_NEW_DEPTH_BUFFER; + } + + /* Color buffers: + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) { + brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS; + pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]); + } + } + + if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) { + brw->curr.fb.nr_cbufs = MIN2(BRW_MAX_DRAW_BUFFERS, fb->nr_cbufs); + brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS; + } +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.viewport = *viewport; + brw->curr.ccv.min_depth = viewport->scale[2] * -1.0 + viewport->translate[2]; + brw->curr.ccv.max_depth = viewport->scale[2] * 1.0 + viewport->translate[2]; + + if (0) + debug_printf("%s depth range %f .. %f\n", + __FUNCTION__, + brw->curr.ccv.min_depth, + brw->curr.ccv.max_depth); + + brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; +} + + +void brw_pipe_framebuffer_init( struct brw_context *brw ) +{ + brw->base.set_framebuffer_state = brw_set_framebuffer_state; + brw->base.set_viewport_state = brw_set_viewport_state; +} + +void brw_pipe_framebuffer_cleanup( struct brw_context *brw ) +{ + struct pipe_framebuffer_state *fb = &brw->curr.fb; + int i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + + pipe_surface_reference(&fb->zsbuf, NULL); +} diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c new file mode 100644 index 00000000000..fdc4814b221 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -0,0 +1,83 @@ + +#include "util/u_upload_mgr.h" + +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_batchbuffer.h" + + + +/* All batchbuffer flushes must go through this function. + */ +void brw_context_flush( struct brw_context *brw ) +{ + /* + * + */ + brw_emit_query_end(brw); + + /* Move to the end of the current upload buffer so that we'll force choosing + * a new buffer next time. + */ + u_upload_flush( brw->vb.upload_vertex ); + u_upload_flush( brw->vb.upload_index ); + + _brw_batchbuffer_flush( brw->batch, __FILE__, __LINE__ ); + + /* Mark all context state as needing to be re-emitted. + * This is probably not as severe as on 915, since almost all of our state + * is just in referenced buffers. + */ + brw->state.dirty.brw |= BRW_NEW_CONTEXT; + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; + + brw->curbe.need_new_bo = GL_TRUE; +} + +static void +brw_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + brw_context_flush( brw_context( pipe ) ); + if (fence) + *fence = NULL; +} + +static unsigned brw_is_buffer_referenced(struct pipe_context *pipe, + struct pipe_buffer *buffer) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_screen *bscreen = brw_screen(brw->base.screen); + + return brw_is_buffer_referenced_by_bo( bscreen, + buffer, + brw->batch->buf ); +} + +static unsigned brw_is_texture_referenced(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, + unsigned level) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_screen *bscreen = brw_screen(brw->base.screen); + + return brw_is_texture_referenced_by_bo( bscreen, + texture, face, level, + brw->batch->buf ); +} + +void brw_pipe_flush_init( struct brw_context *brw ) +{ + brw->base.flush = brw_flush; + brw->base.is_buffer_referenced = brw_is_buffer_referenced; + brw->base.is_texture_referenced = brw_is_texture_referenced; +} + + +void brw_pipe_flush_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c new file mode 100644 index 00000000000..30359078079 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_misc.c @@ -0,0 +1,54 @@ + +#include "brw_context.h" +#include "brw_structs.h" +#include "brw_defines.h" + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stip ) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_polygon_stipple *bps = &brw->curr.bps; + GLuint i; + + memset(bps, 0, sizeof *bps); + bps->header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps->header.length = sizeof *bps/4-2; + + for (i = 0; i < 32; i++) + bps->stipple[i] = stip->stipple[i]; /* don't invert */ + + brw->state.dirty.mesa |= PIPE_NEW_POLYGON_STIPPLE; +} + + +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.scissor = *scissor; + brw->state.dirty.mesa |= PIPE_NEW_SCISSOR; +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.ucp = *clip; + brw->state.dirty.mesa |= PIPE_NEW_CLIP; +} + + +void brw_pipe_misc_init( struct brw_context *brw ) +{ + brw->base.set_polygon_stipple = brw_set_polygon_stipple; + brw->base.set_scissor_state = brw_set_scissor_state; + brw->base.set_clip_state = brw_set_clip_state; +} + + +void brw_pipe_misc_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c new file mode 100644 index 00000000000..2eb862635cc --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -0,0 +1,263 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** @file support for ARB_query_object + * + * ARB_query_object is implemented by using the PIPE_CONTROL command to stall + * execution on the completion of previous depth tests, and write the + * current PS_DEPTH_COUNT to a buffer object. + * + * We use before and after counts when drawing during a query so that + * we don't pick up other clients' query data in ours. To reduce overhead, + * a single BO is used to record the query data for all active queries at + * once. This also gives us a simple bound on how much batchbuffer space is + * required for handling queries, so that we can be sure that we won't + * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. + */ +#include "util/u_simple_list.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_batchbuffer.h" +#include "brw_reg.h" + +/** Waits on the query object's BO and totals the results for this query */ +static boolean +brw_query_get_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Map and count the pixels from the current query BO */ + if (query->bo) { + int i; + uint64_t *map; + + if (brw->sws->bo_is_busy(query->bo) && !wait) + return FALSE; + + map = bo_map_read(brw->sws, query->bo); + if (map == NULL) + return FALSE; + + for (i = query->first_index; i <= query->last_index; i++) { + query->result += map[i * 2 + 1] - map[i * 2]; + } + + brw->sws->bo_unmap(query->bo); + bo_reference(&query->bo, NULL); + } + + *result = query->result; + return TRUE; +} + +static struct pipe_query * +brw_query_create(struct pipe_context *pipe, unsigned type ) +{ + struct brw_query_object *query; + + switch (type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + query = CALLOC_STRUCT( brw_query_object ); + if (query == NULL) + return NULL; + return (struct pipe_query *)query; + + default: + return NULL; + } +} + +static void +brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + bo_reference(&query->bo, NULL); + FREE(query); +} + +static void +brw_query_begin(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Reset our driver's tracking of query state. */ + bo_reference(&query->bo, NULL); + query->result = 0; + query->first_index = -1; + query->last_index = -1; + + insert_at_head(&brw->query.active_head, query); + brw->query.stats_wm++; + brw->state.dirty.mesa |= PIPE_NEW_QUERY; +} + +static void +brw_query_end(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Flush the batchbuffer in case it has writes to our query BO. + * Have later queries write to a new query BO so that further rendering + * doesn't delay the collection of our results. + */ + if (query->bo) { + brw_emit_query_end(brw); + brw_context_flush( brw ); + + bo_reference(&brw->query.bo, NULL); + } + + remove_from_list(query); + brw->query.stats_wm--; + brw->state.dirty.mesa |= PIPE_NEW_QUERY; +} + +/*********************************************************************** + * Internal functions and callbacks to implement queries + */ + +/** Called to set up the query BO and account for its aperture space */ +enum pipe_error +brw_prepare_query_begin(struct brw_context *brw) +{ + enum pipe_error ret; + + /* Skip if we're not doing any queries. */ + if (is_empty_list(&brw->query.active_head)) + return PIPE_OK; + + /* Get a new query BO if we're going to need it. */ + if (brw->query.bo == NULL || + brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) { + + ret = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1, + &brw->query.bo); + if (ret) + return ret; + + brw->query.index = 0; + } + + brw_add_validated_bo(brw, brw->query.bo); + + return PIPE_OK; +} + +/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ +void +brw_emit_query_begin(struct brw_context *brw) +{ + struct brw_query_object *query; + + /* Skip if we're not doing any queries, or we've emitted the start. */ + if (brw->query.active || is_empty_list(&brw->query.active_head)) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + /* This object could be mapped cacheable, but we don't have an exposed + * mechanism to support that. Since it's going uncached, tell GEM that + * we're writing to it. The usual clflush should be all that's required + * to pick up the results. + */ + OUT_RELOC(brw->query.bo, + BRW_USAGE_QUERY_RESULT, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + foreach(query, &brw->query.active_head) { + if (query->bo != brw->query.bo) { + uint64_t tmp; + + /* Propogate the results from this buffer to all of the + * active queries, as the bo is going away. + */ + if (query->bo != NULL) + brw_query_get_result( &brw->base, + (struct pipe_query *)query, + FALSE, + &tmp ); + + bo_reference( &query->bo, brw->query.bo ); + query->first_index = brw->query.index; + } + query->last_index = brw->query.index; + } + brw->query.active = GL_TRUE; +} + +/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */ +void +brw_emit_query_end(struct brw_context *brw) +{ + if (!brw->query.active) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + OUT_RELOC(brw->query.bo, + BRW_USAGE_QUERY_RESULT, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2 + 1) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + brw->query.active = GL_FALSE; + brw->query.index++; +} + +void brw_pipe_query_init( struct brw_context *brw ) +{ + brw->base.create_query = brw_query_create; + brw->base.destroy_query = brw_query_destroy; + brw->base.begin_query = brw_query_begin; + brw->base.end_query = brw_query_end; + brw->base.get_query_result = brw_query_get_result; +} + + +void brw_pipe_query_cleanup( struct brw_context *brw ) +{ + /* Unreference brw->query.bo ?? + */ +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c new file mode 100644 index 00000000000..2117e91a9e4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -0,0 +1,161 @@ + +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_pipe_rast.h" +#include "brw_wm.h" + + +static unsigned translate_fill( unsigned fill ) +{ + switch (fill) { + case PIPE_POLYGON_MODE_FILL: + return CLIP_FILL; + case PIPE_POLYGON_MODE_LINE: + return CLIP_LINE; + case PIPE_POLYGON_MODE_POINT: + return CLIP_POINT; + default: + assert(0); + return CLIP_FILL; + } +} + + +/* Calculates the key for triangle-mode clipping. Non-triangle + * clipping keys use much less information and are computed on the + * fly. + */ +static void +calculate_clip_key_rast( const struct brw_context *brw, + const struct pipe_rasterizer_state *templ, + const struct brw_rasterizer_state *rast, + struct brw_clip_prog_key *key) +{ + memset(key, 0, sizeof *key); + + if (brw->chipset.is_igdng) + key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP; + else + key->clip_mode = BRW_CLIPMODE_NORMAL; + + key->do_flat_shading = templ->flatshade; + + if (templ->cull_mode == PIPE_WINDING_BOTH) { + key->clip_mode = BRW_CLIPMODE_REJECT_ALL; + return; + } + + key->fill_ccw = CLIP_CULL; + key->fill_cw = CLIP_CULL; + + if (!(templ->cull_mode & PIPE_WINDING_CCW)) { + key->fill_ccw = translate_fill(templ->fill_ccw); + } + + if (!(templ->cull_mode & PIPE_WINDING_CW)) { + key->fill_cw = translate_fill(templ->fill_cw); + } + + if (key->fill_cw == CLIP_LINE || + key->fill_ccw == CLIP_LINE || + key->fill_cw == CLIP_POINT || + key->fill_ccw == CLIP_POINT) { + key->do_unfilled = 1; + key->clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + } + + key->offset_ccw = templ->offset_ccw; + key->offset_cw = templ->offset_cw; + + if (templ->light_twoside && key->fill_cw != CLIP_CULL) + key->copy_bfc_cw = 1; + + if (templ->light_twoside && key->fill_ccw != CLIP_CULL) + key->copy_bfc_ccw = 1; +} + + +static void +calculate_line_stipple_rast( const struct pipe_rasterizer_state *templ, + struct brw_line_stipple *bls ) +{ + GLfloat tmp = 1.0f / (templ->line_stipple_factor + 1); + GLint tmpi = tmp * (1<<13); + + bls->header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls->header.length = sizeof(*bls)/4 - 2; + bls->bits0.pattern = templ->line_stipple_pattern; + bls->bits1.repeat_count = templ->line_stipple_factor + 1; + bls->bits1.inverse_repeat_count = tmpi; +} + +static void *brw_create_rasterizer_state( struct pipe_context *pipe, + const struct pipe_rasterizer_state *templ ) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_rasterizer_state *rast; + + rast = CALLOC_STRUCT(brw_rasterizer_state); + if (rast == NULL) + return NULL; + + rast->templ = *templ; + + calculate_clip_key_rast( brw, templ, rast, &rast->clip_key ); + + if (templ->line_stipple_enable) + calculate_line_stipple_rast( templ, &rast->bls ); + + /* Caclculate lookup value for WM IZ table. + */ + if (templ->line_smooth) { + if (templ->fill_cw == PIPE_POLYGON_MODE_LINE && + templ->fill_ccw == PIPE_POLYGON_MODE_LINE) { + rast->unfilled_aa_line = AA_ALWAYS; + } + else if (templ->fill_cw == PIPE_POLYGON_MODE_LINE || + templ->fill_ccw == PIPE_POLYGON_MODE_LINE) { + rast->unfilled_aa_line = AA_SOMETIMES; + } + else { + rast->unfilled_aa_line = AA_NEVER; + } + } + else { + rast->unfilled_aa_line = AA_NEVER; + } + + return (void *)rast; +} + + +static void brw_bind_rasterizer_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.rast = (const struct brw_rasterizer_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_RAST; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + assert((const void *)cso != (const void *)brw->curr.rast); + FREE(cso); +} + + + +void brw_pipe_rast_init( struct brw_context *brw ) +{ + brw->base.create_rasterizer_state = brw_create_rasterizer_state; + brw->base.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->base.delete_rasterizer_state = brw_delete_rasterizer_state; +} + +void brw_pipe_rast_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h new file mode 100644 index 00000000000..9354f01e18a --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_rast.h @@ -0,0 +1,16 @@ +#ifndef BRW_PIPE_RAST_H +#define BRW_PIPE_RAST_H + +#include "brw_clip.h" + +struct brw_rasterizer_state { + struct pipe_rasterizer_state templ; /* for draw module */ + + /* Precalculated hardware state: + */ + struct brw_clip_prog_key clip_key; + struct brw_line_stipple bls; + unsigned unfilled_aa_line; +}; + +#endif diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c new file mode 100644 index 00000000000..81712798a5d --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -0,0 +1,231 @@ + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_debug.h" + + + +/* The brw (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static GLuint translate_wrap_mode( unsigned wrap ) +{ + switch( wrap ) { + case PIPE_TEX_WRAP_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return BRW_TEXCOORDMODE_CLAMP; + + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return BRW_TEXCOORDMODE_MIRROR; + + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_MIRROR_ONCE; + + default: + return BRW_TEXCOORDMODE_WRAP; + } +} + +static GLuint translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return BRW_MAPFILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return BRW_MAPFILTER_LINEAR; + default: + assert(0); + return BRW_MAPFILTER_NEAREST; + } +} + +static GLuint translate_mip_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: + return BRW_MIPFILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: + return BRW_MIPFILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: + return BRW_MIPFILTER_LINEAR; + default: + assert(0); + return BRW_MIPFILTER_NONE; + } +} + +/* XXX: not sure why there are special translations for the shadow tex + * compare functions. In particular ALWAYS is translated to NEVER. + * Is this a hardware issue? Does i965 really suffer from this? + */ +static GLuint translate_shadow_compare_func( unsigned func ) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_ALWAYS; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_NEVER; + default: + assert(0); + return BRW_COMPAREFUNCTION_NEVER; + } +} + + + + +static void * +brw_create_sampler_state( struct pipe_context *pipe, + const struct pipe_sampler_state *template ) +{ + struct brw_sampler *sampler = CALLOC_STRUCT(brw_sampler); + + sampler->ss0.min_filter = translate_img_filter( template->min_img_filter ); + sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter ); + sampler->ss0.mip_filter = translate_mip_filter( template->min_mip_filter ); + + + /* XXX: anisotropy logic slightly changed: + */ + if (template->max_anisotropy > 1.0) { + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; + + if (template->max_anisotropy > 2.0) { + sampler->ss3.max_aniso = MIN2((template->max_anisotropy - 2) / 2, + BRW_ANISORATIO_16); + } + } + + sampler->ss1.r_wrap_mode = translate_wrap_mode(template->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(template->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(template->wrap_t); + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = + util_signed_fixed(CLAMP(template->lod_bias, -16, 15), 6); + + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set shadow function: + */ + if (template->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = + translate_shadow_compare_func(template->compare_func); + } + + /* Set BaseMipLevel, MaxLOD, MinLOD: + */ + sampler->ss0.base_level = + util_unsigned_fixed(0, 1); + + sampler->ss1.max_lod = + util_unsigned_fixed(CLAMP(template->max_lod, 0, 13), 6); + + sampler->ss1.min_lod = + util_unsigned_fixed(CLAMP(template->min_lod, 0, 13), 6); + + return (void *)sampler; +} + +static void brw_bind_sampler_state(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct brw_context *brw = brw_context(pipe); + int i; + + for (i = 0; i < num; i++) + brw->curr.sampler[i] = sampler[i]; + + for (i = num; i < brw->curr.num_samplers; i++) + brw->curr.sampler[i] = NULL; + + brw->curr.num_samplers = num; + brw->state.dirty.mesa |= PIPE_NEW_SAMPLERS; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *cso) +{ + FREE(cso); +} + +static void brw_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct brw_context *brw = brw_context(pipe); + int i; + + for (i = 0; i < num; i++) + pipe_texture_reference(&brw->curr.texture[i], texture[i]); + + for (i = num; i < brw->curr.num_textures; i++) + pipe_texture_reference(&brw->curr.texture[i], NULL); + + brw->curr.num_textures = num; + brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES; +} + +static void brw_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ +} + +static void brw_bind_vertex_sampler_state(struct pipe_context *pipe, + unsigned num, void **sampler) +{ +} + + +void brw_pipe_sampler_init( struct brw_context *brw ) +{ + brw->base.create_sampler_state = brw_create_sampler_state; + brw->base.delete_sampler_state = brw_delete_sampler_state; + + brw->base.set_fragment_sampler_textures = brw_set_sampler_textures; + brw->base.bind_fragment_sampler_states = brw_bind_sampler_state; + + brw->base.set_vertex_sampler_textures = brw_set_vertex_sampler_textures; + brw->base.bind_vertex_sampler_states = brw_bind_vertex_sampler_state; + +} +void brw_pipe_sampler_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c new file mode 100644 index 00000000000..bb32d90e331 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -0,0 +1,303 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_memory.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" + + +/** + * Determine if the given shader uses complex features such as flow + * conditionals, loops, subroutines. + */ +static GLboolean has_flow_control(const struct tgsi_shader_info *info) +{ + return (info->opcode_count[TGSI_OPCODE_ARL] > 0 || + info->opcode_count[TGSI_OPCODE_IF] > 0 || + info->opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ + info->opcode_count[TGSI_OPCODE_CAL] > 0 || + info->opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ + info->opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ + info->opcode_count[TGSI_OPCODE_BGNLOOP] > 0); +} + + +static void scan_immediates(const struct tgsi_token *tokens, + const struct tgsi_shader_info *info, + struct brw_immediate_data *imm) +{ + struct tgsi_parse_context parse; + boolean done = FALSE; + + imm->nr = 0; + imm->data = MALLOC(info->immediate_count * 4 * sizeof(float)); + + tgsi_parse_init( &parse, tokens ); + while (!tgsi_parse_end_of_tokens( &parse ) && !done) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: { + static const float id[4] = {0,0,0,1}; + const float *value = &parse.FullToken.FullImmediate.u[0].Float; + unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + unsigned i; + + for (i = 0; i < size; i++) + imm->data[imm->nr][i] = value[i]; + + for (; i < 4; i++) + imm->data[imm->nr][i] = id[i]; + + imm->nr++; + break; + } + + case TGSI_TOKEN_TYPE_INSTRUCTION: + done = 1; + break; + } + } +} + + +static void brw_bind_fs_state( struct pipe_context *pipe, void *prog ) +{ + struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog; + struct brw_context *brw = brw_context(pipe); + + if (brw->curr.fragment_shader == fs) + return; + + if (brw->curr.fragment_shader == NULL || + fs == NULL || + memcmp(&brw->curr.fragment_shader->signature, &fs->signature, + brw_fs_signature_size(&fs->signature)) != 0) { + brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SIGNATURE; + } + + brw->curr.fragment_shader = fs; + brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SHADER; +} + +static void brw_bind_vs_state( struct pipe_context *pipe, void *prog ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.vertex_shader = (struct brw_vertex_shader *)prog; + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_SHADER; +} + + + +static void *brw_create_fs_state( struct pipe_context *pipe, + const struct pipe_shader_state *shader ) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_fragment_shader *fs; + int i; + + fs = CALLOC_STRUCT(brw_fragment_shader); + if (fs == NULL) + return NULL; + + /* Duplicate tokens, scan shader + */ + fs->id = brw->program_id++; + fs->has_flow_control = has_flow_control(&fs->info); + + fs->tokens = tgsi_dup_tokens(shader->tokens); + if (fs->tokens == NULL) + goto fail; + + tgsi_scan_shader(fs->tokens, &fs->info); + scan_immediates(fs->tokens, &fs->info, &fs->immediates); + + fs->signature.nr_inputs = fs->info.num_inputs; + for (i = 0; i < fs->info.num_inputs; i++) { + fs->signature.input[i].interp = fs->info.input_interpolate[i]; + fs->signature.input[i].semantic = fs->info.input_semantic_name[i]; + fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i]; + } + + for (i = 0; i < fs->info.num_inputs; i++) + if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION) + fs->uses_depth = 1; + + if (fs->info.uses_kill) + fs->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fs->info.writes_z) + fs->iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + return (void *)fs; + +fail: + FREE(fs); + return NULL; +} + + +static void *brw_create_vs_state( struct pipe_context *pipe, + const struct pipe_shader_state *shader ) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_vertex_shader *vs; + unsigned i; + + vs = CALLOC_STRUCT(brw_vertex_shader); + if (vs == NULL) + return NULL; + + /* Duplicate tokens, scan shader + */ + vs->tokens = tgsi_dup_tokens(shader->tokens); + if (vs->tokens == NULL) + goto fail; + + tgsi_scan_shader(vs->tokens, &vs->info); + scan_immediates(vs->tokens, &vs->info, &vs->immediates); + + vs->id = brw->program_id++; + vs->has_flow_control = has_flow_control(&vs->info); + + vs->output_hpos = BRW_OUTPUT_NOT_PRESENT; + vs->output_color0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_color1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_edgeflag = BRW_OUTPUT_NOT_PRESENT; + + for (i = 0; i < vs->info.num_outputs; i++) { + int index = vs->info.output_semantic_index[i]; + switch (vs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + vs->output_hpos = i; + break; + case TGSI_SEMANTIC_COLOR: + if (index == 0) + vs->output_color0 = i; + else + vs->output_color1 = i; + break; + case TGSI_SEMANTIC_BCOLOR: + if (index == 0) + vs->output_bfc0 = i; + else + vs->output_bfc1 = i; + break; + case TGSI_SEMANTIC_EDGEFLAG: + vs->output_edgeflag = i; + break; + } + } + + + /* Done: + */ + return (void *)vs; + +fail: + FREE(vs); + return NULL; +} + + +static void brw_delete_fs_state( struct pipe_context *pipe, void *prog ) +{ + struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog; + + bo_reference(&fs->const_buffer, NULL); + FREE( (void *)fs->tokens ); + FREE( fs ); +} + + +static void brw_delete_vs_state( struct pipe_context *pipe, void *prog ) +{ + struct brw_fragment_shader *vs = (struct brw_fragment_shader *)prog; + + /* Delete draw shader + */ + FREE( (void *)vs->tokens ); + FREE( vs ); +} + + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(index == 0); + + if (shader == PIPE_SHADER_FRAGMENT) { + pipe_buffer_reference( &brw->curr.fragment_constants, + buf->buffer ); + + brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS; + } + else { + pipe_buffer_reference( &brw->curr.vertex_constants, + buf->buffer ); + + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS; + } +} + + +void brw_pipe_shader_init( struct brw_context *brw ) +{ + brw->base.set_constant_buffer = brw_set_constant_buffer; + + brw->base.create_vs_state = brw_create_vs_state; + brw->base.bind_vs_state = brw_bind_vs_state; + brw->base.delete_vs_state = brw_delete_vs_state; + + brw->base.create_fs_state = brw_create_fs_state; + brw->base.bind_fs_state = brw_bind_fs_state; + brw->base.delete_fs_state = brw_delete_fs_state; +} + +void brw_pipe_shader_cleanup( struct brw_context *brw ) +{ + pipe_buffer_reference( &brw->curr.fragment_constants, NULL ); + pipe_buffer_reference( &brw->curr.vertex_constants, NULL ); +} diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c new file mode 100644 index 00000000000..e3c48e31493 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -0,0 +1,71 @@ +#include "brw_context.h" + + +static void brw_set_vertex_elements( struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0])); + brw->curr.num_vertex_elements = count; + + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT; +} + + +static void brw_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct brw_context *brw = brw_context(pipe); + unsigned i; + + /* Check for no change */ + if (count == brw->curr.num_vertex_buffers && + memcmp(brw->curr.vertex_buffer, + buffers, + count * sizeof buffers[0]) == 0) + return; + + /* Adjust refcounts */ + for (i = 0; i < count; i++) + pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer, + buffers[i].buffer); + + for ( ; i < brw->curr.num_vertex_buffers; i++) + pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer, + NULL); + + /* Copy remaining data */ + memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]); + brw->curr.num_vertex_buffers = count; + + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER; +} + + +void +brw_pipe_vertex_init( struct brw_context *brw ) +{ + brw->base.set_vertex_buffers = brw_set_vertex_buffers; + brw->base.set_vertex_elements = brw_set_vertex_elements; +} + + +void +brw_pipe_vertex_cleanup( struct brw_context *brw ) +{ + + /* Release bound pipe vertex_buffers + */ + + /* Release some other stuff + */ +#if 0 + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + bo_reference(&brw->vb.inputs[i].bo, NULL); + brw->vb.inputs[i].bo = NULL; + } +#endif +} diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h new file mode 100644 index 00000000000..a63403b6afd --- /dev/null +++ b/src/gallium/drivers/i965/brw_reg.h @@ -0,0 +1,115 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_REG_H +#define BRW_REG_H + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_NOOP (CMD_MI | 0) +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) +#define MI_FLUSH (CMD_MI | (4 << 23)) + +#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2) + +/** @{ + * + * PIPE_CONTROL operation, a combination MI_FLUSH and register write with + * additional flushing control. + */ +#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | 2) +#define PIPE_CONTROL_NO_WRITE (0 << 14) +#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WRITE_FLUSH (1 << 12) +#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11) +#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) +#define PIPE_CONTROL_PPGTT_WRITE (0 << 2) +#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) + +/** @} */ + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 + + + +/* PCI IDs + */ +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +struct brw_chipset { + unsigned pci_id:16; + unsigned is_965:1; + unsigned is_igdng:1; + unsigned is_g4x:1; + unsigned pad:13; +}; + + +/* XXX: hacks + */ +#define VERT_RESULT_HPOS 0 /* not always true */ +#define VERT_RESULT_PSIZ 10000 /* disabled */ + + +#endif diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c new file mode 100644 index 00000000000..0ecacac9a3a --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen.c @@ -0,0 +1,403 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_string.h" + +#include "brw_reg.h" +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_winsys.h" +#include "brw_debug.h" + +#ifdef DEBUG +static const struct debug_named_value debug_names[] = { + { "tex", DEBUG_TEXTURE}, + { "state", DEBUG_STATE}, + { "ioctl", DEBUG_IOCTL}, + { "blit", DEBUG_BLIT}, + { "curbe", DEBUG_CURBE}, + { "fall", DEBUG_FALLBACKS}, + { "verb", DEBUG_VERBOSE}, + { "bat", DEBUG_BATCH}, + { "pix", DEBUG_PIXEL}, + { "wins", DEBUG_WINSYS}, + { "min", DEBUG_MIN_URB}, + { "dis", DEBUG_DISASSEM}, + { "sync", DEBUG_SYNC}, + { "prim", DEBUG_PRIMS }, + { "vert", DEBUG_VERTS }, + { "dma", DEBUG_DMA }, + { "san", DEBUG_SANITY }, + { "sleep", DEBUG_SLEEP }, + { "stats", DEBUG_STATS }, + { "sing", DEBUG_SINGLE_THREAD }, + { "thre", DEBUG_SINGLE_THREAD }, + { "wm", DEBUG_WM }, + { "urb", DEBUG_URB }, + { "vs", DEBUG_VS }, + { NULL, 0 } +}; + +static const struct debug_named_value dump_names[] = { + { "asm", DUMP_ASM}, + { "state", DUMP_STATE}, + { "batch", DUMP_BATCH}, + { NULL, 0 } +}; + +int BRW_DEBUG = 0; +int BRW_DUMP = 0; + +#endif + + +/* + * Probe functions + */ + + +static const char * +brw_get_vendor(struct pipe_screen *screen) +{ + return "VMware, Inc."; +} + +static const char * +brw_get_name(struct pipe_screen *screen) +{ + static char buffer[128]; + const char *chipset; + + switch (brw_screen(screen)->chipset.pci_id) { + case PCI_CHIP_I965_G: + chipset = "I965_G"; + break; + case PCI_CHIP_I965_Q: + chipset = "I965_Q"; + break; + case PCI_CHIP_I965_G_1: + chipset = "I965_G_1"; + break; + case PCI_CHIP_I946_GZ: + chipset = "I946_GZ"; + break; + case PCI_CHIP_I965_GM: + chipset = "I965_GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "I965_GME"; + break; + case PCI_CHIP_GM45_GM: + chipset = "GM45_GM"; + break; + case PCI_CHIP_IGD_E_G: + chipset = "IGD_E_G"; + break; + case PCI_CHIP_Q45_G: + chipset = "Q45_G"; + break; + case PCI_CHIP_G45_G: + chipset = "G45_G"; + break; + case PCI_CHIP_G41_G: + chipset = "G41_G"; + break; + case PCI_CHIP_B43_G: + chipset = "B43_G"; + break; + case PCI_CHIP_ILD_G: + chipset = "ILD_G"; + break; + case PCI_CHIP_ILM_G: + chipset = "ILM_G"; + break; + } + + util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); + return buffer; +} + +static int +brw_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + +static float +brw_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + +static boolean +brw_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_L16_UNORM, + /*PIPE_FORMAT_I16_UNORM,*/ + /*PIPE_FORMAT_A16_UNORM,*/ + PIPE_FORMAT_A8L8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_A1R5G5B5_UNORM, + PIPE_FORMAT_A4R4G4B4_UNORM, + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + /* video */ + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + /* compressed */ + /*PIPE_FORMAT_FXT1_RGBA,*/ + PIPE_FORMAT_DXT1_RGB, + PIPE_FORMAT_DXT1_RGBA, + PIPE_FORMAT_DXT3_RGBA, + PIPE_FORMAT_DXT5_RGBA, + /* sRGB */ + PIPE_FORMAT_R8G8B8A8_SRGB, + PIPE_FORMAT_A8L8_SRGB, + PIPE_FORMAT_L8_SRGB, + PIPE_FORMAT_DXT1_SRGB, + /* depth */ + PIPE_FORMAT_Z32_FLOAT, + PIPE_FORMAT_X8Z24_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z16_UNORM, + /* signed */ + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8B8A8_SNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format render_supported[] = { + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format depth_supported[] = { + PIPE_FORMAT_Z32_FLOAT, + PIPE_FORMAT_X8Z24_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) + list = depth_supported; + else if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + list = render_supported; + else + list = tex_supported; + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + +/* + * Fence functions + */ + + +static void +brw_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + +static int +brw_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + return 0; /* XXX shouldn't this be a boolean? */ +} + +static int +brw_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + return 0; +} + + +/* + * Generic functions + */ + + +static void +brw_destroy_screen(struct pipe_screen *screen) +{ + struct brw_screen *bscreen = brw_screen(screen); + + if (bscreen->sws) + bscreen->sws->destroy(bscreen->sws); + + FREE(bscreen); +} + +/** + * Create a new brw_screen object + */ +struct pipe_screen * +brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) +{ + struct brw_screen *bscreen; + struct brw_chipset chipset; + +#ifdef DEBUG + BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); + BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); + BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM; + + BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0); +#endif + + memset(&chipset, 0, sizeof chipset); + + chipset.pci_id = pci_id; + + switch (pci_id) { + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_Q: + case PCI_CHIP_I965_G_1: + case PCI_CHIP_I946_GZ: + case PCI_CHIP_I965_GM: + case PCI_CHIP_I965_GME: + chipset.is_965 = TRUE; + break; + + case PCI_CHIP_GM45_GM: + case PCI_CHIP_IGD_E_G: + case PCI_CHIP_Q45_G: + case PCI_CHIP_G45_G: + case PCI_CHIP_G41_G: + case PCI_CHIP_B43_G: + chipset.is_g4x = TRUE; + break; + + case PCI_CHIP_ILD_G: + case PCI_CHIP_ILM_G: + chipset.is_igdng = TRUE; + break; + + default: + debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", + __FUNCTION__, pci_id); + return NULL; + } + + + bscreen = CALLOC_STRUCT(brw_screen); + if (!bscreen) + return NULL; + + bscreen->chipset = chipset; + bscreen->sws = sws; + bscreen->base.winsys = NULL; + bscreen->base.destroy = brw_destroy_screen; + bscreen->base.get_name = brw_get_name; + bscreen->base.get_vendor = brw_get_vendor; + bscreen->base.get_param = brw_get_param; + bscreen->base.get_paramf = brw_get_paramf; + bscreen->base.is_format_supported = brw_is_format_supported; + bscreen->base.fence_reference = brw_fence_reference; + bscreen->base.fence_signalled = brw_fence_signalled; + bscreen->base.fence_finish = brw_fence_finish; + + brw_screen_tex_init(bscreen); + brw_screen_tex_surface_init(bscreen); + brw_screen_buffer_init(bscreen); + + bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL; + + + return &bscreen->base; +} diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h new file mode 100644 index 00000000000..7226d9228b7 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen.h @@ -0,0 +1,199 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_SCREEN_H +#define BRW_SCREEN_H + +#include "pipe/p_state.h" +#include "pipe/p_screen.h" + +#include "brw_reg.h" +#include "brw_structs.h" + +struct brw_winsys_screen; + + +/** + * Subclass of pipe_screen + */ +struct brw_screen +{ + struct pipe_screen base; + struct brw_chipset chipset; + struct brw_winsys_screen *sws; + boolean no_tiling; +}; + +/** + * Subclass of pipe_transfer + */ +struct brw_transfer +{ + struct pipe_transfer base; + + unsigned offset; +}; + +struct brw_buffer +{ + struct pipe_buffer base; + + /* One of either bo or user_buffer will be non-null, depending on + * whether this is a hardware or user buffer. + */ + struct brw_winsys_buffer *bo; + void *user_buffer; + + /* Mapped pointer?? + */ + void *ptr; +}; + + +union brw_surface_id { + struct { + unsigned face:3; + unsigned zslice:13; + unsigned level:16; + } bits; + unsigned value; +}; + + +struct brw_surface +{ + struct pipe_surface base; + + union brw_surface_id id; + unsigned cpp; + unsigned pitch; + unsigned draw_offset; + unsigned tiling; + + struct brw_surface_state ss; + struct brw_winsys_buffer *bo; + struct brw_surface *next, *prev; +}; + + + +struct brw_texture +{ + struct pipe_texture base; + struct brw_winsys_buffer *bo; + struct brw_surface_state ss; + + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + boolean compressed; + unsigned brw_target; + unsigned pitch; + unsigned tiling; + unsigned cpp; + unsigned total_height; + + struct brw_surface views[2]; +}; + + + +/* + * Cast wrappers + */ +static INLINE struct brw_screen * +brw_screen(struct pipe_screen *pscreen) +{ + return (struct brw_screen *) pscreen; +} + +static INLINE struct brw_transfer * +brw_transfer(struct pipe_transfer *transfer) +{ + return (struct brw_transfer *)transfer; +} + +static INLINE struct brw_surface * +brw_surface(struct pipe_surface *surface) +{ + return (struct brw_surface *)surface; +} + +static INLINE struct brw_buffer * +brw_buffer(struct pipe_buffer *buffer) +{ + return (struct brw_buffer *)buffer; +} + +static INLINE struct brw_texture * +brw_texture(struct pipe_texture *texture) +{ + return (struct brw_texture *)texture; +} + + +/* Pipe buffer helpers + */ +static INLINE boolean +brw_buffer_is_user_buffer( const struct pipe_buffer *buf ) +{ + return ((const struct brw_buffer *)buf)->user_buffer != NULL; +} + +unsigned +brw_surface_pitch( const struct pipe_surface *surface ); + +/*********************************************************************** + * Internal functions + */ +GLboolean brw_texture_layout(struct brw_screen *brw_screen, + struct brw_texture *tex ); + +void brw_update_texture( struct brw_screen *brw_screen, + struct brw_texture *tex ); + + +void brw_screen_tex_init( struct brw_screen *brw_screen ); +void brw_screen_tex_surface_init( struct brw_screen *brw_screen ); + +void brw_screen_buffer_init(struct brw_screen *brw_screen); + + +boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_texture *texture, + unsigned face, + unsigned level, + struct brw_winsys_buffer *bo ); + +boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_buffer *buffer, + struct brw_winsys_buffer *bo ); + + + +#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c new file mode 100644 index 00000000000..d8141a3f5b9 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_buffers.c @@ -0,0 +1,202 @@ + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "brw_screen.h" +#include "brw_winsys.h" + + + +static void * +brw_buffer_map_range( struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned offset, + unsigned length, + unsigned usage ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->user_buffer) + return buf->user_buffer; + + return sws->bo_map( buf->bo, + BRW_DATA_OTHER, + offset, + length, + (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE, + (usage & PIPE_BUFFER_USAGE_DISCARD) ? TRUE : FALSE, + (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) ? TRUE : FALSE); +} + +static void * +brw_buffer_map( struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned usage ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->user_buffer) + return buf->user_buffer; + + return sws->bo_map( buf->bo, + BRW_DATA_OTHER, + 0, + buf->base.size, + (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE, + FALSE, + FALSE); +} + + +static void +brw_buffer_flush_mapped_range( struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned offset, + unsigned length ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->user_buffer) + return; + + sws->bo_flush_range( buf->bo, + offset, + length ); +} + + +static void +brw_buffer_unmap( struct pipe_screen *screen, + struct pipe_buffer *buffer ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->bo) + sws->bo_unmap(buf->bo); +} + +static void +brw_buffer_destroy( struct pipe_buffer *buffer ) +{ + struct brw_buffer *buf = brw_buffer( buffer ); + + assert(!p_atomic_read(&buffer->reference.count)); + + bo_reference(&buf->bo, NULL); + FREE(buf); +} + + +static struct pipe_buffer * +brw_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf; + unsigned buffer_type; + enum pipe_error ret; + + buf = CALLOC_STRUCT(brw_buffer); + if (!buf) + return NULL; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.screen = screen; + buf->base.alignment = alignment; + buf->base.usage = usage; + buf->base.size = size; + + switch (usage & (PIPE_BUFFER_USAGE_VERTEX | + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_PIXEL | + PIPE_BUFFER_USAGE_CONSTANT)) + { + case PIPE_BUFFER_USAGE_VERTEX: + case PIPE_BUFFER_USAGE_INDEX: + case (PIPE_BUFFER_USAGE_VERTEX|PIPE_BUFFER_USAGE_INDEX): + buffer_type = BRW_BUFFER_TYPE_VERTEX; + break; + + case PIPE_BUFFER_USAGE_PIXEL: + buffer_type = BRW_BUFFER_TYPE_PIXEL; + break; + + case PIPE_BUFFER_USAGE_CONSTANT: + buffer_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS; + break; + + default: + buffer_type = BRW_BUFFER_TYPE_GENERIC; + break; + } + + ret = sws->bo_alloc( sws, buffer_type, + size, alignment, + &buf->bo ); + if (ret != PIPE_OK) + return NULL; + + return &buf->base; +} + + +static struct pipe_buffer * +brw_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct brw_buffer *buf; + + buf = CALLOC_STRUCT(brw_buffer); + if (!buf) + return NULL; + + buf->user_buffer = ptr; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.screen = screen; + buf->base.alignment = 1; + buf->base.usage = 0; + buf->base.size = bytes; + + return &buf->base; +} + + +boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_buffer *buffer, + struct brw_winsys_buffer *bo ) +{ + struct brw_buffer *buf = brw_buffer(buffer); + if (buf->bo == NULL) + return FALSE; + + return brw_screen->sws->bo_references( bo, buf->bo ); +} + + +void brw_screen_buffer_init(struct brw_screen *brw_screen) +{ + brw_screen->base.buffer_create = brw_buffer_create; + brw_screen->base.user_buffer_create = brw_user_buffer_create; + brw_screen->base.buffer_map = brw_buffer_map; + brw_screen->base.buffer_map_range = brw_buffer_map_range; + brw_screen->base.buffer_flush_mapped_range = brw_buffer_flush_mapped_range; + brw_screen->base.buffer_unmap = brw_buffer_unmap; + brw_screen->base.buffer_destroy = brw_buffer_destroy; +} diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c new file mode 100644 index 00000000000..e2b9954e596 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -0,0 +1,262 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "util/u_math.h" + +#include "pipe/p_screen.h" +#include "brw_screen.h" +#include "brw_defines.h" +#include "brw_winsys.h" + +enum { + BRW_VIEW_LINEAR, + BRW_VIEW_IN_PLACE +}; + + +static boolean need_linear_view( struct brw_screen *brw_screen, + struct brw_texture *brw_texture, + union brw_surface_id id, + unsigned usage ) +{ +#if 0 + /* XXX: what about IDGNG? + */ + if (!BRW_IS_G4X(brw->brw_screen->pci_id)) + { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * + * This is easy in Gallium as surfaces are all backed by + * textures, but there's also a nasty requirement that the depth + * and the color surfaces all be of the same LOD, which is + * harder to get around as we can't look at a surface in + * isolation and decide if it's legal. + * + * Instead, end up being pessimistic and say that for i965, + * ... ?? + */ + if (brw_tex->tiling != I915_TILING_NONE && + (brw_tex_image_offset(brw_tex, face, level, zslize) & 4095)) { + if (BRW_DEBUG & DEBUG_VIEW) + debug_printf("%s: need surface view for non-aligned tex image\n", + __FUNCTION__); + return GL_TRUE; + } + } +#endif + + /* Tiled 3d textures don't have subsets that look like 2d surfaces: + */ + + /* Everything else should be fine to render to in-place: + */ + return GL_FALSE; +} + +/* Look at all texture views and figure out if any of them need to be + * back-copied into the texture for sampling + */ +void brw_update_texture( struct brw_screen *brw_screen, + struct brw_texture *tex ) +{ + /* currently nothing to do */ +} + + +/* Create a new surface with linear layout to serve as a render-target + * where it would be illegal (perhaps due to tiling constraints) to do + * this in-place. + * + * Currently not implmented, not sure if it's needed. + */ +static struct brw_surface *create_linear_view( struct brw_screen *brw_screen, + struct brw_texture *tex, + union brw_surface_id id, + unsigned usage ) +{ + return NULL; +} + + +/* Create a pipe_surface that just points directly into the existing + * texture's storage. + */ +static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, + struct brw_texture *tex, + union brw_surface_id id, + unsigned usage ) +{ + struct brw_surface *surface; + + surface = CALLOC_STRUCT(brw_surface); + if (surface == NULL) + return NULL; + + pipe_reference_init(&surface->base.reference, 1); + + /* XXX: ignoring render-to-slice-of-3d-texture + */ + assert(id.bits.zslice == 0); + + surface->base.format = tex->base.format; + surface->base.width = u_minify(tex->base.width0, id.bits.level); + surface->base.height = u_minify(tex->base.height0, id.bits.level); + surface->base.offset = tex->image_offset[id.bits.level][id.bits.face]; + surface->base.usage = usage; + surface->base.zslice = id.bits.zslice; + surface->base.face = id.bits.face; + surface->base.level = id.bits.level; + surface->id = id; + surface->cpp = tex->cpp; + surface->pitch = tex->pitch; + surface->tiling = tex->tiling; + + bo_reference( &surface->bo, tex->bo ); + pipe_texture_reference( &surface->base.texture, &tex->base ); + + surface->ss.ss0.surface_format = tex->ss.ss0.surface_format; + surface->ss.ss0.surface_type = BRW_SURFACE_2D; + + if (tex->tiling == BRW_TILING_NONE) { + surface->ss.ss1.base_addr = surface->base.offset; + } else { + uint32_t tile_offset = surface->base.offset % 4096; + + surface->ss.ss1.base_addr = surface->base.offset - tile_offset; + + if (brw_screen->chipset.is_g4x) { + if (tex->tiling == BRW_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4; + surface->ss.ss5.y_offset = tile_offset / 512 / 2; + } else { + surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4; + surface->ss.ss5.y_offset = tile_offset / 128 / 2; + } + } + else { + assert(tile_offset == 0); + } + } + +#if 0 + if (region_bo != NULL) + surface->ss.ss1.base_addr += region_bo->offset; /* reloc */ +#endif + + surface->ss.ss2.width = surface->base.width - 1; + surface->ss.ss2.height = surface->base.height - 1; + surface->ss.ss3.tiled_surface = tex->ss.ss3.tiled_surface; + surface->ss.ss3.tile_walk = tex->ss.ss3.tile_walk; + surface->ss.ss3.pitch = tex->ss.ss3.pitch; + + return surface; +} + +/* Get a surface which is view into a texture + */ +static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, + unsigned zslice, + unsigned usage ) +{ + struct brw_texture *tex = brw_texture(pt); + struct brw_screen *bscreen = brw_screen(screen); + struct brw_surface *surface; + union brw_surface_id id; + int type; + + id.bits.face = face; + id.bits.level = level; + id.bits.zslice = zslice; + + if (need_linear_view(bscreen, tex, id, usage)) + type = BRW_VIEW_LINEAR; + else + type = BRW_VIEW_IN_PLACE; + + + foreach (surface, &tex->views[type]) { + if (id.value == surface->id.value) + return &surface->base; + } + + switch (type) { + case BRW_VIEW_LINEAR: + surface = create_linear_view( bscreen, tex, id, usage ); + break; + case BRW_VIEW_IN_PLACE: + surface = create_in_place_view( bscreen, tex, id, usage ); + break; + default: + return NULL; + } + + insert_at_head( &tex->views[type], surface ); + return &surface->base; +} + + +static void brw_tex_surface_destroy( struct pipe_surface *surf ) +{ + struct brw_surface *surface = brw_surface(surf); + + /* Unreference texture, shared buffer: + */ + remove_from_list(surface); + bo_reference(&surface->bo, NULL); + pipe_texture_reference( &surface->base.texture, NULL ); + + + FREE(surface); +} + + +void brw_screen_tex_surface_init( struct brw_screen *brw_screen ) +{ + brw_screen->base.get_tex_surface = brw_get_tex_surface; + brw_screen->base.tex_surface_destroy = brw_tex_surface_destroy; +} diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c new file mode 100644 index 00000000000..894f4bea401 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c @@ -0,0 +1,414 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + +#include "pipe/p_format.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "brw_screen.h" +#include "brw_debug.h" +#include "brw_winsys.h" + +/* Code to layout images in a mipmap tree for i965. + */ + +static int +brw_tex_pitch_align (struct brw_texture *tex, + int pitch) +{ + if (!tex->compressed) { + int pitch_align; + + switch (tex->tiling) { + case BRW_TILING_X: + pitch_align = 512; + break; + case BRW_TILING_Y: + pitch_align = 128; + break; + default: + /* XXX: Untiled pitch alignment of 64 bytes for now to allow + * render-to-texture to work in all cases. This should + * probably be replaced at some point by some scheme to only + * do this when really necessary, for example standalone + * render target views. + */ + pitch_align = 64; + break; + } + + pitch = align(pitch * tex->cpp, pitch_align); + pitch /= tex->cpp; + } + + return pitch; +} + + +static void +brw_tex_alignment_unit(enum pipe_format pf, + GLuint *w, GLuint *h) +{ + switch (pf) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: + *w = 4; + *h = 4; + break; + + default: + *w = 4; + *h = 2; + break; + } +} + + +static void +brw_tex_set_level_info(struct brw_texture *tex, + GLuint level, + GLuint nr_images, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d) +{ + + if (BRW_DEBUG & DEBUG_TEXTURE) + debug_printf("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + + assert(tex->image_offset[level] == NULL); + assert(nr_images >= 1); + + tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp; + tex->nr_images[level] = nr_images; + + tex->image_offset[level] = MALLOC(nr_images * sizeof(GLuint)); + tex->image_offset[level][0] = 0; +} + + +static void +brw_tex_set_image_offset(struct brw_texture *tex, + GLuint level, GLuint img, + GLuint x, GLuint y, + GLuint offset) +{ + assert((x == 0 && y == 0) || img != 0 || level != 0); + assert(img < tex->nr_images[level]); + + if (BRW_DEBUG & DEBUG_TEXTURE) + debug_printf("%s level %d img %d pos %d,%d image_offset %x\n", + __FUNCTION__, level, img, x, y, + tex->image_offset[level][img]); + + tex->image_offset[level][img] = (x + y * tex->pitch) * tex->cpp + offset; +} + + + +static void brw_layout_2d( struct brw_texture *tex ) +{ + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = tex->base.width0; + GLuint height = tex->base.height0; + + tex->pitch = tex->base.width0; + brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); + + if (tex->compressed) { + tex->pitch = align(tex->base.width0, align_w); + } + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap out past the width of its parent. + */ + if (tex->base.last_level > 0) { + GLuint mip1_width; + + if (tex->compressed) { + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + align(u_minify(tex->base.width0, 2), align_w)); + } else { + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + u_minify(tex->base.width0, 2)); + } + + if (mip1_width > tex->pitch) { + tex->pitch = mip1_width; + } + } + + /* Pitch must be a whole number of dwords, even though we + * express it in texels. + */ + tex->pitch = brw_tex_pitch_align (tex, tex->pitch); + tex->total_height = 0; + + for ( level = 0 ; level <= tex->base.last_level ; level++ ) { + GLuint img_height; + + brw_tex_set_level_info(tex, level, 1, x, y, width, height, 1); + + if (tex->compressed) + img_height = MAX2(1, height/4); + else + img_height = align(height, align_h); + + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_height = MAX2(tex->total_height, y + img_height); + + /* Layout_below: step right after second mipmap. + */ + if (level == 1) { + x += align(width, align_w); + } + else { + y += img_height; + } + + width = u_minify(width, 1); + height = u_minify(height, 1); + } +} + + +static boolean +brw_layout_cubemap_idgng( struct brw_texture *tex ) +{ + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = tex->base.width0; + GLuint height = tex->base.height0; + GLuint qpitch = 0; + GLuint y_pitch = 0; + + tex->pitch = tex->base.width0; + brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); + y_pitch = align(height, align_h); + + if (tex->compressed) { + tex->pitch = align(tex->base.width0, align_w); + } + + if (tex->base.last_level != 0) { + GLuint mip1_width; + + if (tex->compressed) { + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + align(u_minify(tex->base.width0, 2), align_w)); + } else { + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + u_minify(tex->base.width0, 2)); + } + + if (mip1_width > tex->pitch) { + tex->pitch = mip1_width; + } + } + + tex->pitch = brw_tex_pitch_align(tex, tex->pitch); + + if (tex->compressed) { + qpitch = ((y_pitch + + align(u_minify(y_pitch, 1), align_h) + + 11 * align_h) / 4) * tex->pitch * tex->cpp; + + tex->total_height = ((y_pitch + + align(u_minify(y_pitch, 1), align_h) + + 11 * align_h) / 4) * 6; + } else { + qpitch = (y_pitch + + align(u_minify(y_pitch, 1), align_h) + + 11 * align_h) * tex->pitch * tex->cpp; + + tex->total_height = (y_pitch + + align(u_minify(y_pitch, 1), align_h) + + 11 * align_h) * 6; + } + + for (level = 0; level <= tex->base.last_level; level++) { + GLuint img_height; + GLuint nr_images = 6; + GLuint q = 0; + + brw_tex_set_level_info(tex, level, nr_images, x, y, width, height, 1); + + for (q = 0; q < nr_images; q++) + brw_tex_set_image_offset(tex, level, q, x, y, q * qpitch); + + if (tex->compressed) + img_height = MAX2(1, height/4); + else + img_height = align(height, align_h); + + if (level == 1) { + x += align(width, align_w); + } + else { + y += img_height; + } + + width = u_minify(width, 1); + height = u_minify(height, 1); + } + + return TRUE; +} + + +static boolean +brw_layout_3d_cube( struct brw_texture *tex ) +{ + GLuint width = tex->base.width0; + GLuint height = tex->base.height0; + GLuint depth = tex->base.depth0; + GLuint pack_x_pitch, pack_x_nr; + GLuint pack_y_pitch; + GLuint level; + GLuint align_h = 2; + GLuint align_w = 4; + + tex->total_height = 0; + brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); + + if (tex->compressed) { + tex->pitch = align(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else { + tex->pitch = brw_tex_pitch_align(tex, tex->base.width0); + pack_y_pitch = align(tex->base.height0, align_h); + } + + pack_x_pitch = width; + pack_x_nr = 1; + + for (level = 0 ; level <= tex->base.last_level ; level++) { + GLuint nr_images = tex->base.target == PIPE_TEXTURE_3D ? depth : 6; + GLint x = 0; + GLint y = 0; + GLint q, j; + + brw_tex_set_level_info(tex, level, nr_images, + 0, tex->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + brw_tex_set_image_offset(tex, level, q, x, y, 0); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_height += y; + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + + if (tex->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > align(width, align_w)) { + pack_x_pitch = align(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= tex->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = align(pack_y_pitch, align_h); + } + } + } + + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + */ + if (tex->base.target == PIPE_TEXTURE_CUBE) + tex->total_height += 2; + + return TRUE; +} + + + +GLboolean brw_texture_layout(struct brw_screen *brw_screen, + struct brw_texture *tex ) +{ + switch (tex->base.target) { + case PIPE_TEXTURE_CUBE: + if (brw_screen->chipset.is_igdng) + brw_layout_cubemap_idgng( tex ); + else + brw_layout_3d_cube( tex ); + break; + + case PIPE_TEXTURE_3D: + brw_layout_3d_cube( tex ); + break; + + default: + brw_layout_2d( tex ); + break; + } + + if (BRW_DEBUG & DEBUG_TEXTURE) + debug_printf("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_height, + tex->cpp, + tex->pitch * tex->total_height * tex->cpp ); + + return GL_TRUE; +} diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c new file mode 100644 index 00000000000..feb9d5f7657 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -0,0 +1,573 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "util/u_format.h" + +#include "brw_screen.h" +#include "brw_defines.h" +#include "brw_structs.h" +#include "brw_winsys.h" + + + +static GLuint translate_tex_target( unsigned target ) +{ + switch (target) { + case PIPE_TEXTURE_1D: + return BRW_SURFACE_1D; + + case PIPE_TEXTURE_2D: + return BRW_SURFACE_2D; + + case PIPE_TEXTURE_3D: + return BRW_SURFACE_3D; + + case PIPE_TEXTURE_CUBE: + return BRW_SURFACE_CUBE; + + default: + assert(0); + return BRW_SURFACE_1D; + } +} + + +static GLuint translate_tex_format( enum pipe_format pf ) +{ + switch( pf ) { + case PIPE_FORMAT_L8_UNORM: + return BRW_SURFACEFORMAT_L8_UNORM; + + case PIPE_FORMAT_I8_UNORM: + return BRW_SURFACEFORMAT_I8_UNORM; + + case PIPE_FORMAT_A8_UNORM: + return BRW_SURFACEFORMAT_A8_UNORM; + + case PIPE_FORMAT_L16_UNORM: + return BRW_SURFACEFORMAT_L16_UNORM; + + /* XXX: Add these to gallium + case PIPE_FORMAT_I16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; + + case PIPE_FORMAT_A16_UNORM: + return BRW_SURFACEFORMAT_A16_UNORM; + */ + + case PIPE_FORMAT_A8L8_UNORM: + return BRW_SURFACEFORMAT_L8A8_UNORM; + + case PIPE_FORMAT_R5G6B5_UNORM: + return BRW_SURFACEFORMAT_B5G6R5_UNORM; + + case PIPE_FORMAT_A1R5G5B5_UNORM: + return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + + case PIPE_FORMAT_A4R4G4B4_UNORM: + return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + + case PIPE_FORMAT_X8R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + /* + * Video formats + */ + + case PIPE_FORMAT_YCBCR_REV: + return BRW_SURFACEFORMAT_YCRCB_NORMAL; + + case PIPE_FORMAT_YCBCR: + return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; + + /* + * Compressed formats. + */ + /* XXX: Add FXT to gallium? + case PIPE_FORMAT_FXT1_RGBA: + return BRW_SURFACEFORMAT_FXT1; + */ + + case PIPE_FORMAT_DXT1_RGB: + return BRW_SURFACEFORMAT_DXT1_RGB; + + case PIPE_FORMAT_DXT1_RGBA: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case PIPE_FORMAT_DXT3_RGBA: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case PIPE_FORMAT_DXT5_RGBA: + return BRW_SURFACEFORMAT_BC3_UNORM; + + /* + * sRGB formats + */ + + case PIPE_FORMAT_R8G8B8A8_SRGB: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; + + case PIPE_FORMAT_A8L8_SRGB: + return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; + + case PIPE_FORMAT_L8_SRGB: + return BRW_SURFACEFORMAT_L8_UNORM_SRGB; + + case PIPE_FORMAT_DXT1_SRGB: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; + + /* + * Depth formats + */ + + case PIPE_FORMAT_Z16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; + + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return BRW_SURFACEFORMAT_I24X8_UNORM; + + case PIPE_FORMAT_Z32_FLOAT: + return BRW_SURFACEFORMAT_I32_FLOAT; + + /* XXX: presumably for bump mapping. Add this to mesa state + * tracker? + * + * XXX: Add flipped versions of these formats to Gallium. + */ + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + default: + return BRW_SURFACEFORMAT_INVALID; + } +} + + + + + +static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, + const struct pipe_texture *templ ) + +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_texture *tex; + enum brw_buffer_type buffer_type; + enum pipe_error ret; + + tex = CALLOC_STRUCT(brw_texture); + if (tex == NULL) + return NULL; + + memcpy(&tex->base, templ, sizeof *templ); + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + /* XXX: compressed textures need special treatment here + */ + tex->cpp = util_format_get_blocksize(tex->base.format); + tex->compressed = util_format_is_compressed(tex->base.format); + + make_empty_list(&tex->views[0]); + make_empty_list(&tex->views[1]); + + /* XXX: No tiling with compressed textures?? + */ + if (tex->compressed == 0 && + !bscreen->no_tiling) + { + if (bscreen->chipset.is_965 && + util_format_is_depth_or_stencil(templ->format)) + tex->tiling = BRW_TILING_Y; + else + tex->tiling = BRW_TILING_X; + } + else { + tex->tiling = BRW_TILING_NONE; + } + + + + + if (!brw_texture_layout( bscreen, tex )) + goto fail; + + + if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY)) { + buffer_type = BRW_BUFFER_TYPE_SCANOUT; + } + else { + buffer_type = BRW_BUFFER_TYPE_TEXTURE; + } + + ret = bscreen->sws->bo_alloc( bscreen->sws, + buffer_type, + tex->pitch * tex->total_height * tex->cpp, + 64, + &tex->bo ); + if (ret) + goto fail; + + tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); + tex->ss.ss0.surface_format = translate_tex_format(tex->base.format); + assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + + /* XXX: what happens when tex->bo->offset changes??? + */ + tex->ss.ss1.base_addr = 0; /* reloc */ + tex->ss.ss2.mip_count = tex->base.last_level; + tex->ss.ss2.width = tex->base.width0 - 1; + tex->ss.ss2.height = tex->base.height0 - 1; + + switch (tex->tiling) { + case BRW_TILING_NONE: + tex->ss.ss3.tiled_surface = 0; + tex->ss.ss3.tile_walk = 0; + break; + case BRW_TILING_X: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case BRW_TILING_Y: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } + + tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; + tex->ss.ss3.depth = tex->base.depth0 - 1; + + tex->ss.ss4.min_lod = 0; + + if (tex->base.target == PIPE_TEXTURE_CUBE) { + tex->ss.ss0.cube_pos_x = 1; + tex->ss.ss0.cube_pos_y = 1; + tex->ss.ss0.cube_pos_z = 1; + tex->ss.ss0.cube_neg_x = 1; + tex->ss.ss0.cube_neg_y = 1; + tex->ss.ss0.cube_neg_z = 1; + } + + return &tex->base; + +fail: + bo_reference(&tex->bo, NULL); + FREE(tex); + return NULL; +} + +static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen, + const struct pipe_texture *templ, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + return NULL; +} + +static void brw_texture_destroy(struct pipe_texture *pt) +{ + struct brw_texture *tex = brw_texture(pt); + bo_reference(&tex->bo, NULL); + FREE(pt); +} + + +static boolean brw_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + return translate_tex_format(format) != BRW_SURFACEFORMAT_INVALID; +} + + +boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_texture *texture, + unsigned face, + unsigned level, + struct brw_winsys_buffer *bo ) +{ + struct brw_texture *tex = brw_texture(texture); + struct brw_surface *surf; + int i; + + /* XXX: this is subject to false positives if the underlying + * texture BO is referenced, we can't tell whether the sub-region + * we care about participates in that. + */ + if (brw_screen->sws->bo_references( bo, tex->bo )) + return TRUE; + + /* Find any view on this texture for this face/level and see if it + * is referenced: + */ + for (i = 0; i < 2; i++) { + foreach (surf, &tex->views[i]) { + if (surf->bo == tex->bo) + continue; + + if (surf->id.bits.face != face || + surf->id.bits.level != level) + continue; + + if (brw_screen->sws->bo_references( bo, surf->bo)) + return TRUE; + } + } + + return FALSE; +} + + +/* + * Transfer functions + */ + +static struct pipe_transfer* +brw_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct brw_texture *tex = brw_texture(texture); + struct brw_transfer *trans; + unsigned offset; /* in bytes */ + + if (texture->target == PIPE_TEXTURE_CUBE) { + offset = tex->image_offset[level][face]; + } else if (texture->target == PIPE_TEXTURE_3D) { + offset = tex->image_offset[level][zslice]; + } else { + offset = tex->image_offset[level][0]; + assert(face == 0); + assert(zslice == 0); + } + + trans = CALLOC_STRUCT(brw_transfer); + if (trans) { + pipe_texture_reference(&trans->base.texture, texture); + trans->base.x = x; + trans->base.y = y; + trans->base.width = w; + trans->base.height = h; + trans->base.stride = tex->pitch * tex->cpp; + trans->offset = offset; + trans->base.usage = usage; + } + return &trans->base; +} + +static void * +brw_transfer_map(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct brw_texture *tex = brw_texture(transfer->texture); + struct brw_winsys_screen *sws = brw_screen(screen)->sws; + char *map; + unsigned usage = transfer->usage; + + map = sws->bo_map(tex->bo, + BRW_DATA_OTHER, + 0, + tex->bo->size, + (usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE, + (usage & 0) ? TRUE : FALSE, + (usage & 0) ? TRUE : FALSE); + + if (!map) + return NULL; + + /* XXX: blocksize and compressed textures + */ + return map + brw_transfer(transfer)->offset + + transfer->y /* / transfer->block.height */ * transfer->stride + + transfer->x /* / transfer->block.width */ * brw_texture(transfer->texture)->cpp; +} + +static void +brw_transfer_unmap(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct brw_texture *tex = brw_texture(transfer->texture); + struct brw_winsys_screen *sws = brw_screen(screen)->sws; + + sws->bo_unmap(tex->bo); +} + +static void +brw_tex_transfer_destroy(struct pipe_transfer *trans) +{ + pipe_texture_reference(&trans->texture, NULL); + FREE(trans); +} + + +/* + * Functions exported to the winsys + */ + +boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, + struct brw_winsys_buffer **buffer, + unsigned *stride) +{ + struct brw_texture *tex = brw_texture(texture); + + *buffer = tex->bo; + if (stride) + *stride = tex->pitch * tex->cpp; + + return TRUE; +} + +struct pipe_texture * +brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, + const struct pipe_texture *templ, + unsigned pitch, + unsigned tiling, + struct brw_winsys_buffer *buffer) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_texture *tex; + + if (templ->target != PIPE_TEXTURE_2D || + templ->last_level != 0 || + templ->depth0 != 1) + return NULL; + + if (util_format_is_compressed(templ->format)) + return NULL; + + tex = CALLOC_STRUCT(brw_texture); + if (!tex) + return NULL; + + memcpy(&tex->base, templ, sizeof *templ); + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + /* XXX: cpp vs. blocksize + */ + tex->cpp = util_format_get_blocksize(tex->base.format); + tex->tiling = tiling; + + make_empty_list(&tex->views[0]); + make_empty_list(&tex->views[1]); + + if (!brw_texture_layout(bscreen, tex)) + goto fail; + + /* XXX Maybe some more checks? */ + if ((pitch / tex->cpp) < tex->pitch) + goto fail; + + tex->pitch = pitch / tex->cpp; + + tex->bo = buffer; + + /* fix this warning */ +#if 0 + if (tex->size > buffer->size) + goto fail; +#endif + + tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); + tex->ss.ss0.surface_format = translate_tex_format(tex->base.format); + assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + + /* XXX: what happens when tex->bo->offset changes??? + */ + tex->ss.ss1.base_addr = 0; /* reloc */ + tex->ss.ss2.mip_count = tex->base.last_level; + tex->ss.ss2.width = tex->base.width0 - 1; + tex->ss.ss2.height = tex->base.height0 - 1; + + switch (tex->tiling) { + case BRW_TILING_NONE: + tex->ss.ss3.tiled_surface = 0; + tex->ss.ss3.tile_walk = 0; + break; + case BRW_TILING_X: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case BRW_TILING_Y: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } + + tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; + tex->ss.ss3.depth = tex->base.depth0 - 1; + + tex->ss.ss4.min_lod = 0; + + return &tex->base; + +fail: + FREE(tex); + return NULL; +} + +void brw_screen_tex_init( struct brw_screen *brw_screen ) +{ + brw_screen->base.is_format_supported = brw_is_format_supported; + brw_screen->base.texture_create = brw_texture_create; + brw_screen->base.texture_destroy = brw_texture_destroy; + brw_screen->base.texture_blanket = brw_texture_blanket; + brw_screen->base.get_tex_transfer = brw_get_tex_transfer; + brw_screen->base.transfer_map = brw_transfer_map; + brw_screen->base.transfer_unmap = brw_transfer_unmap; + brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy; +} diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c new file mode 100644 index 00000000000..e1986a9dbbd --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf.c @@ -0,0 +1,216 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "pipe/p_state.h" + +#include "brw_batchbuffer.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_pipe_rast.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" +#include "brw_state.h" + +static enum pipe_error compile_sf_prog( struct brw_context *brw, + struct brw_sf_prog_key *key, + struct brw_winsys_buffer **bo_out ) +{ + enum pipe_error ret; + struct brw_sf_compile c; + const GLuint *program; + GLuint program_size; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func); + + c.key = *key; + c.nr_attrs = c.key.nr_attrs; + c.nr_attr_regs = (c.nr_attrs+1)/2; + c.nr_setup_attrs = c.key.nr_attrs; + c.nr_setup_regs = (c.nr_setup_attrs+1)/2; + + c.prog_data.urb_read_length = c.nr_attr_regs; + c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + /* Special case when there are no attributes to setup. + * + * XXX: should be able to set nr_setup_attrs to nr_attrs-1 -- but + * breaks vp-tris.c + */ + if (c.nr_attrs - 1 == 0) { + c.nr_verts = 0; + brw_emit_null_setup( &c ); + } + else { + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case SF_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c, GL_TRUE ); + break; + case SF_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c, GL_TRUE ); + break; + case SF_POINTS: + c.nr_verts = 1; + if (key->do_point_sprite) + brw_emit_point_sprite_setup( &c, GL_TRUE ); + else + brw_emit_point_setup( &c, GL_TRUE ); + break; + case SF_UNFILLED_TRIS: + c.nr_verts = 3; + brw_emit_anyprim_setup( &c ); + break; + default: + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + } + + /* get the program + */ + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; + + /* Upload + */ + ret = brw_upload_cache( &brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->sf.prog_data, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static enum pipe_error upload_sf_prog(struct brw_context *brw) +{ + const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature; + struct brw_sf_prog_key key; + enum pipe_error ret; + unsigned i; + + memset(&key, 0, sizeof(key)); + + /* Populate the key, noting state dependencies: + */ + + /* XXX: Add one to account for the position input. + */ + /* PIPE_NEW_FRAGMENT_SIGNATURE */ + key.nr_attrs = sig->nr_inputs + 1; + + + /* XXX: why is position required to be linear? why do we care + * about it at all? + */ + key.linear_attrs = 1; /* position -- but why? */ + + for (i = 0; i < sig->nr_inputs; i++) { + switch (sig->input[i].interp) { + case TGSI_INTERPOLATE_CONSTANT: + break; + case TGSI_INTERPOLATE_LINEAR: + key.linear_attrs |= 1 << (i+1); + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + key.persp_attrs |= 1 << (i+1); + break; + } + } + + /* BRW_NEW_REDUCED_PRIMITIVE */ + switch (brw->reduced_primitive) { + case PIPE_PRIM_TRIANGLES: + /* PIPE_NEW_RAST + */ + if (brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL || + brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL) + key.primitive = SF_UNFILLED_TRIS; + else + key.primitive = SF_TRIANGLES; + break; + case PIPE_PRIM_LINES: + key.primitive = SF_LINES; + break; + case PIPE_PRIM_POINTS: + key.primitive = SF_POINTS; + break; + } + + key.do_point_sprite = brw->curr.rast->templ.point_sprite; + key.sprite_origin_lower_left = 0; /* XXX: ctx->Point.SpriteOrigin - fix rast state */ + key.do_flat_shading = brw->curr.rast->templ.flatshade; + key.do_twoside_color = brw->curr.rast->templ.light_twoside; + + if (key.do_twoside_color) { + key.frontface_ccw = (brw->curr.rast->templ.front_winding == + PIPE_WINDING_CCW); + } + + if (brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + NULL, 0, + &brw->sf.prog_data, + &brw->sf.prog_bo)) + return PIPE_OK; + + ret = compile_sf_prog( brw, &key, &brw->sf.prog_bo ); + if (ret) + return ret; + + return PIPE_OK; +} + + +const struct brw_tracked_state brw_sf_prog = { + .dirty = { + .mesa = (PIPE_NEW_RAST | PIPE_NEW_FRAGMENT_SIGNATURE), + .brw = (BRW_NEW_REDUCED_PRIMITIVE), + .cache = 0 + }, + .prepare = upload_sf_prog +}; + diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h new file mode 100644 index 00000000000..a895c7d2f6a --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf.h @@ -0,0 +1,122 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_SF_H +#define BRW_SF_H + + +#include "brw_context.h" +#include "brw_eu.h" + + +#define SF_POINTS 0 +#define SF_LINES 1 +#define SF_TRIANGLES 2 +#define SF_UNFILLED_TRIS 3 + +struct brw_sf_prog_key { + + /* Bitmask of linear and perspective interpolated inputs, 0..nr + */ + GLuint persp_attrs:32; + GLuint linear_attrs:32; + GLuint point_coord_replace_attrs:32; + + GLuint nr_attrs:8; + GLuint primitive:2; + GLuint do_twoside_color:1; + GLuint do_flat_shading:1; + GLuint frontface_ccw:1; + GLuint do_point_sprite:1; + GLuint sprite_origin_lower_left:1; + GLuint pad:17; + + GLuint attr_col0:8; + GLuint attr_col1:8; + GLuint attr_bfc0:8; + GLuint attr_bfc1:8; +}; + +struct brw_sf_point_tex { + GLboolean CoordReplace; +}; + +struct brw_sf_compile { + struct brw_compile func; + struct brw_sf_prog_key key; + struct brw_sf_prog_data prog_data; + + struct brw_reg pv; + struct brw_reg det; + struct brw_reg dx0; + struct brw_reg dx2; + struct brw_reg dy0; + struct brw_reg dy2; + + /* z and 1/w passed in seperately: + */ + struct brw_reg z[3]; + struct brw_reg inv_w[3]; + + /* The vertices: + */ + struct brw_reg vert[3]; + + /* Temporaries, allocated after last vertex reg. + */ + struct brw_reg inv_det; + struct brw_reg a1_sub_a0; + struct brw_reg a2_sub_a0; + struct brw_reg tmp; + + struct brw_reg m1Cx; + struct brw_reg m2Cy; + struct brw_reg m3C0; + + GLuint nr_verts; + GLuint nr_attrs; + GLuint nr_attr_regs; + GLuint nr_setup_attrs; + GLuint nr_setup_regs; + + GLuint point_coord_replace_mask; +}; + + +void brw_emit_null_setup( struct brw_sf_compile *c ); +void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_anyprim_setup( struct brw_sf_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c new file mode 100644 index 00000000000..3b85725e368 --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -0,0 +1,765 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" + + +static struct brw_reg get_vert_attr(struct brw_sf_compile *c, + struct brw_reg vert, + GLuint attr) +{ + GLuint off = attr / 2; + GLuint sub = attr % 2; + + return brw_vec4_grf(vert.nr + off, sub * 4); +} + + +/*********************************************************************** + * Twoside lighting + */ +static void copy_bfc( struct brw_sf_compile *c, + struct brw_reg vert ) +{ + struct brw_compile *p = &c->func; + + if (c->key.attr_col0 && c->key.attr_bfc0) + brw_MOV(p, + get_vert_attr(c, vert, c->key.attr_col0), + get_vert_attr(c, vert, c->key.attr_bfc0)); + + if (c->key.attr_col1 && c->key.attr_bfc1) + brw_MOV(p, + get_vert_attr(c, vert, c->key.attr_col1), + get_vert_attr(c, vert, c->key.attr_bfc1)); +} + + +static void do_twoside_color( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + /* XXX: What happens if BFC isn't present? This could only happen + * for user-supplied vertex programs, as t_vp_build.c always does + * the right thing. + */ + if (!(c->key.attr_col0 && c->key.attr_bfc0) && + !(c->key.attr_col1 && c->key.attr_bfc1)) + return; + + /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_push_insn_state(p); + brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_4); + { + switch (c->nr_verts) { + case 3: copy_bfc(c, c->vert[2]); + case 2: copy_bfc(c, c->vert[1]); + case 1: copy_bfc(c, c->vert[0]); + } + } + brw_ENDIF(p, if_insn); + brw_pop_insn_state(p); +} + + + +/*********************************************************************** + * Flat shading + */ + +#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \ + (1<<VERT_RESULT_COL1)) + +static void copy_colors( struct brw_sf_compile *c, + struct brw_reg dst, + struct brw_reg src) +{ + struct brw_compile *p = &c->func; + + if (c->key.attr_col0) + brw_MOV(p, + get_vert_attr(c, dst, c->key.attr_col0), + get_vert_attr(c, src, c->key.attr_col0)); + + if (c->key.attr_col1) + brw_MOV(p, + get_vert_attr(c, dst, c->key.attr_col1), + get_vert_attr(c, src, c->key.attr_col1)); + +} + + + +/* Need to use a computed jump to copy flatshaded attributes as the + * vertices are ordered according to y-coordinate before reaching this + * point, so the PV could be anywhere. + */ +static void do_flatshade_triangle( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + GLuint jmpi = 1; + GLuint nr = 0; + + if (c->key.attr_col0) + nr++; + + if (c->key.attr_col1) + nr++; + + if (nr == 0) + return; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + + brw_push_insn_state(p); + + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); + brw_JMPI(p, ip, ip, c->pv); + + copy_colors(c, c->vert[1], c->vert[0]); + copy_colors(c, c->vert[2], c->vert[0]); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1))); + + copy_colors(c, c->vert[0], c->vert[1]); + copy_colors(c, c->vert[2], c->vert[1]); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2)); + + copy_colors(c, c->vert[0], c->vert[2]); + copy_colors(c, c->vert[1], c->vert[2]); + + brw_pop_insn_state(p); +} + + +static void do_flatshade_line( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + GLuint jmpi = 1; + GLuint nr = 0; + + if (c->key.attr_col0) + nr++; + + if (c->key.attr_col1) + nr++; + + if (nr == 0) + return; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + + brw_push_insn_state(p); + + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); + brw_JMPI(p, ip, ip, c->pv); + copy_colors(c, c->vert[1], c->vert[0]); + + brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); + copy_colors(c, c->vert[0], c->vert[1]); + + brw_pop_insn_state(p); +} + + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ + GLuint reg, i; + + /* Values computed by fixed function unit: + */ + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); + c->det = brw_vec1_grf(1, 2); + c->dx0 = brw_vec1_grf(1, 3); + c->dx2 = brw_vec1_grf(1, 4); + c->dy0 = brw_vec1_grf(1, 5); + c->dy2 = brw_vec1_grf(1, 6); + + /* z and 1/w passed in seperately: + */ + c->z[0] = brw_vec1_grf(2, 0); + c->inv_w[0] = brw_vec1_grf(2, 1); + c->z[1] = brw_vec1_grf(2, 2); + c->inv_w[1] = brw_vec1_grf(2, 3); + c->z[2] = brw_vec1_grf(2, 4); + c->inv_w[2] = brw_vec1_grf(2, 5); + + /* The vertices: + */ + reg = 3; + for (i = 0; i < c->nr_verts; i++) { + c->vert[i] = brw_vec8_grf(reg, 0); + reg += c->nr_attr_regs; + } + + /* Temporaries, allocated after last vertex reg. + */ + c->inv_det = brw_vec1_grf(reg, 0); reg++; + c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->tmp = brw_vec8_grf(reg, 0); reg++; + + /* Note grf allocation: + */ + c->prog_data.total_grf = reg; + + + /* Outputs of this program - interpolation coefficients for + * rasterization: + */ + c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); + c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); + c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint i; + + brw_push_insn_state(p); + + /* Copy both scalars with a single MOV: + */ + for (i = 0; i < c->nr_verts; i++) + brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); + + brw_pop_insn_state(p); +} + + +static void invert_det( struct brw_sf_compile *c) +{ + /* Looks like we invert all 8 elements just to get 1/det in + * position 2 !?! + */ + brw_math(&c->func, + c->inv_det, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->det, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + +} + + +/* Two attributes packed into a wide register. Figure out if either + * or both of them need linear/perspective interpolation. Constant + * regs are left as-is. + */ +static GLboolean calculate_masks( struct brw_sf_compile *c, + GLuint reg, + GLushort *pc, + GLushort *pc_persp, + GLushort *pc_linear) +{ + GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); + GLuint persp_mask = c->key.persp_attrs; + GLuint linear_mask = (c->key.persp_attrs | c->key.linear_attrs); + + *pc_persp = 0; + *pc_linear = 0; + *pc = 0xf; + + if (persp_mask & (1 << (reg*2))) + *pc_persp = 0xf; + + if (linear_mask & (1 << (reg*2))) + *pc_linear = 0xf; + + /* Maybe only processs one attribute on the final round: + */ + if (reg*2+1 < c->nr_setup_attrs) { + *pc |= 0xf0; + + if (persp_mask & (1 << (reg*2+1))) + *pc_persp |= 0xf0; + + if (linear_mask & (1 << (reg*2+1))) + *pc_linear |= 0xf0; + } + + return is_last_attr; +} + + +void brw_emit_null_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + + /* m0 is implicitly copied from r0 in the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); +} + +void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 3; + + if (allocate) + alloc_regs(c); + + invert_det(c); + copy_z_inv_w(c); + + if (c->key.do_twoside_color) + do_twoside_color(c); + + if (c->key.do_flat_shading) + do_flatshade_triangle(c); + + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + struct brw_reg a2 = offset(c->vert[2], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + brw_MUL(p, a2, a2, c->inv_w[2]); + } + + + /* Calculate coefficients for interpolated values: + */ + if (pc_linear) + { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + + /* calculate dA/dx + */ + brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); + brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + /* calculate dA/dy + */ + brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); + brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in + * the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + } + } +} + + + +void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + + c->nr_verts = 2; + + if (allocate) + alloc_regs(c); + + invert_det(c); + copy_z_inv_w(c); + + if (c->key.do_flat_shading) + do_flatshade_line(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + } + + /* Calculate coefficients for position, color: + */ + if (pc_linear) { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* XXX: only seems to check point_coord_replace_attrs for every + * second attribute?!? + */ + boolean coord_replace = !!(c->key.point_coord_replace_attrs & (1<<(2*i))); + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + if (coord_replace) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + } + + if (coord_replace) { + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, + c->tmp, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->dx0, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + + if (c->key.sprite_origin_lower_left) { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + else { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + } + else { + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + } + + { + brw_set_predicate_control_flag_value(p, pc); + if (coord_replace) { + if (c->key.sprite_origin_lower_left) { + brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); + brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); + } + else { + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + } + } + else { + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + /* This seems odd as the values are all constant, but the + * fragment shader will be expecting it: + */ + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + + /* The delta values are always zero, just send the starting + * coordinate. Again, this is to fit in with the interpolation + * code in the fragment shader. + */ + { + brw_set_predicate_control_flag_value(p, pc); + + brw_MOV(p, c->m3C0, a0); /* constant value */ + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +void brw_emit_anyprim_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); + struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); + struct brw_reg primmask; + struct brw_instruction *jmp; + struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + + GLuint saveflag; + + c->nr_verts = 3; + alloc_regs(c); + + primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, primmask, brw_imm_ud(1)); + brw_SHL(p, primmask, primmask, payload_prim); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | + (1<<_3DPRIM_TRISTRIP) | + (1<<_3DPRIM_TRIFAN) | + (1<<_3DPRIM_TRISTRIP_REVERSE) | + (1<<_3DPRIM_POLYGON) | + (1<<_3DPRIM_RECTLIST) | + (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_tri_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + /* note - thread killed in subroutine, so must + * restore the flag which is changed when building + * the subroutine. fix #13240 + */ + } + brw_land_fwd_jump(p, jmp); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | + (1<<_3DPRIM_LINESTRIP) | + (1<<_3DPRIM_LINELOOP) | + (1<<_3DPRIM_LINESTRIP_CONT) | + (1<<_3DPRIM_LINESTRIP_BF) | + (1<<_3DPRIM_LINESTRIP_CONT_BF))); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_line_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + /* note - thread killed in subroutine */ + } + brw_land_fwd_jump(p, jmp); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_point_sprite_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + } + brw_land_fwd_jump(p, jmp); + + brw_emit_point_setup( c, GL_FALSE ); +} + + + + diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c new file mode 100644 index 00000000000..25dc2b52e07 --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -0,0 +1,333 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_math.h" + +#include "pipe/p_state.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_debug.h" +#include "brw_pipe_rast.h" + +static enum pipe_error upload_sf_vp(struct brw_context *brw) +{ + const struct pipe_viewport_state *vp = &brw->curr.viewport; + const struct pipe_scissor_state *scissor = &brw->curr.scissor; + struct brw_sf_viewport sfv; + enum pipe_error ret; + + memset(&sfv, 0, sizeof(sfv)); + + /* PIPE_NEW_VIEWPORT, PIPE_NEW_SCISSOR */ + + sfv.viewport.m00 = vp->scale[0]; + sfv.viewport.m11 = vp->scale[1]; + sfv.viewport.m22 = vp->scale[2]; + sfv.viewport.m30 = vp->translate[0]; + sfv.viewport.m31 = vp->translate[1]; + sfv.viewport.m32 = vp->translate[2]; + + sfv.scissor.xmin = scissor->minx; + sfv.scissor.xmax = scissor->maxx - 1; /* ? */ + sfv.scissor.ymin = scissor->miny; + sfv.scissor.ymax = scissor->maxy - 1; /* ? */ + + ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0, + &brw->sf.vp_bo ); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_sf_vp = { + .dirty = { + .mesa = (PIPE_NEW_VIEWPORT | + PIPE_NEW_SCISSOR), + .brw = 0, + .cache = 0 + }, + .prepare = upload_sf_vp +}; + +struct brw_sf_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int nr_urb_entries, urb_size, sfsize; + + unsigned scissor:1; + unsigned line_smooth:1; + unsigned point_sprite:1; + unsigned point_attenuated:1; + unsigned front_face:2; + unsigned cull_mode:2; + unsigned flatshade_first:1; + unsigned gl_rasterization_rules:1; + unsigned line_last_pixel_enable:1; + float line_width; + float point_size; +}; + +static void +sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) +{ + const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ; + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_SF_PROG */ + key->total_grf = brw->sf.prog_data->total_grf; + key->urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_sf_entries; + key->urb_size = brw->urb.vsize; + key->sfsize = brw->urb.sfsize; + + /* PIPE_NEW_RAST */ + key->scissor = rast->scissor; + key->front_face = rast->front_winding; + key->cull_mode = rast->cull_mode; + key->line_smooth = rast->line_smooth; + key->line_width = rast->line_width; + key->flatshade_first = rast->flatshade_first; + key->line_last_pixel_enable = rast->line_last_pixel; + key->gl_rasterization_rules = rast->gl_rasterization_rules; + + key->point_sprite = rast->point_sprite; + key->point_attenuated = rast->point_size_per_vertex; + + key->point_size = CLAMP(rast->point_size, + rast->point_size_min, + rast->point_size_max); +} + +static enum pipe_error +sf_unit_create_from_key(struct brw_context *brw, + struct brw_sf_unit_key *key, + struct brw_winsys_reloc *reloc, + struct brw_winsys_buffer **bo_out) +{ + struct brw_sf_unit_state sf; + enum pipe_error ret; + int chipset_max_threads; + memset(&sf, 0, sizeof(sf)); + + sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + /* reloc */ + sf.thread0.kernel_start_pointer = 0; + + sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + sf.thread3.dispatch_grf_start_reg = 3; + + if (BRW_IS_IGDNG(brw)) + sf.thread3.urb_entry_read_offset = 3; + else + sf.thread3.urb_entry_read_offset = 1; + + sf.thread3.urb_entry_read_length = key->urb_entry_read_length; + + sf.thread4.nr_urb_entries = key->nr_urb_entries; + sf.thread4.urb_entry_allocation_size = key->sfsize - 1; + + /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or + * 48(IGDNG) threads + */ + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 48; + else + chipset_max_threads = 24; + + sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + sf.thread4.max_threads = 0; + + if (BRW_DEBUG & DEBUG_STATS) + sf.thread4.stats_enable = 1; + + /* CACHE_NEW_SF_VP */ + /* reloc */ + sf.sf5.sf_viewport_state_offset = 0; + + sf.sf5.viewport_transform = 1; + + if (key->scissor) + sf.sf6.scissor = 1; + + if (key->front_face == PIPE_WINDING_CCW) + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + + switch (key->cull_mode) { + case PIPE_WINDING_CCW: + case PIPE_WINDING_CW: + sf.sf6.cull_mode = (key->front_face == key->cull_mode ? + BRW_CULLMODE_FRONT : + BRW_CULLMODE_BACK); + break; + case PIPE_WINDING_BOTH: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + case PIPE_WINDING_NONE: + sf.sf6.cull_mode = BRW_CULLMODE_NONE; + break; + default: + assert(0); + sf.sf6.cull_mode = BRW_CULLMODE_NONE; + break; + } + + /* _NEW_LINE */ + /* XXX use ctx->Const.Min/MaxLineWidth here */ + sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1); + + sf.sf6.line_endcap_aa_region_width = 1; + if (key->line_smooth) + sf.sf6.aa_enable = 1; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; + + /* XXX: gl_rasterization_rules? something else? + */ + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; + sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; + sf.sf6.point_rast_rule = 1; + + /* XXX clamp max depends on AA vs. non-AA */ + + /* _NEW_POINT */ + sf.sf7.sprite_point = key->point_sprite; + sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); + sf.sf7.use_point_size_state = !key->point_attenuated; + sf.sf7.aa_line_distance_mode = 0; + + /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: + */ + if (!key->flatshade_first) { + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + } else { + sf.sf7.trifan_pv = 1; + sf.sf7.linestrip_pv = 0; + sf.sf7.tristrip_pv = 0; + } + + sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable; + + /* Set bias for OpenGL rasterization rules: + */ + if (key->gl_rasterization_rules) { + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + } + else { + sf.sf6.dest_org_vbias = 0x0; + sf.sf6.dest_org_hbias = 0x0; + } + + ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT, + key, sizeof(*key), + reloc, 2, + &sf, sizeof(sf), + NULL, NULL, + bo_out); + if (ret) + return ret; + + + return PIPE_OK; +} + +static enum pipe_error upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_key key; + struct brw_winsys_reloc reloc[2]; + unsigned total_grf; + unsigned viewport_transform; + unsigned front_winding; + enum pipe_error ret; + + sf_unit_populate_key(brw, &key); + + /* XXX: cut this crap and pre calculate the key: + */ + total_grf = (align(key.total_grf, 16) / 16 - 1); + viewport_transform = 1; + front_winding = (key.front_face == PIPE_WINDING_CCW ? + BRW_FRONTWINDING_CCW : + BRW_FRONTWINDING_CW); + + /* Emit SF program relocation */ + make_reloc(&reloc[0], + BRW_USAGE_STATE, + total_grf << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); + + /* Emit SF viewport relocation */ + make_reloc(&reloc[1], + BRW_USAGE_STATE, + front_winding | (viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); + + + if (brw_search_cache(&brw->cache, BRW_SF_UNIT, + &key, sizeof(key), + reloc, 2, + NULL, + &brw->sf.state_bo)) + return PIPE_OK; + + + ret = sf_unit_create_from_key(brw, &key, + reloc, + &brw->sf.state_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_sf_unit = { + .dirty = { + .mesa = (PIPE_NEW_RAST), + .brw = BRW_NEW_URB_FENCE, + .cache = (CACHE_NEW_SF_VP | + CACHE_NEW_SF_PROG) + }, + .prepare = upload_sf_unit, +}; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h new file mode 100644 index 00000000000..d2bbd0123d1 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state.h @@ -0,0 +1,174 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_STATE_H +#define BRW_STATE_H + +#include "pipe/p_defines.h" +#include "util/u_memory.h" + +#include "brw_context.h" + +static INLINE void +brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) +{ + assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos)); + + if (bo != NULL) { + bo_reference( &brw->state.validated_bos[brw->state.validated_bo_count++], + bo ); + } +} + +const struct brw_tracked_state brw_blend_constant_color; +const struct brw_tracked_state brw_cc_unit; +const struct brw_tracked_state brw_cc_vp; +const struct brw_tracked_state brw_clip_prog; +const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_curbe_buffer; +const struct brw_tracked_state brw_curbe_offsets; +const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_gs_prog; +const struct brw_tracked_state brw_gs_unit; +const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_aa_line_parameters; +const struct brw_tracked_state brw_pipelined_state_pointers; +const struct brw_tracked_state brw_binding_table_pointers; +const struct brw_tracked_state brw_depthbuffer; +const struct brw_tracked_state brw_polygon_stipple; +const struct brw_tracked_state brw_program_parameters; +const struct brw_tracked_state brw_recalculate_urb_fence; +const struct brw_tracked_state brw_sf_prog; +const struct brw_tracked_state brw_sf_unit; +const struct brw_tracked_state brw_sf_vp; +const struct brw_tracked_state brw_state_base_address; +const struct brw_tracked_state brw_urb_fence; +const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_surfaces; +const struct brw_tracked_state brw_vs_prog; +const struct brw_tracked_state brw_vs_unit; +const struct brw_tracked_state brw_wm_input_sizes; +const struct brw_tracked_state brw_wm_prog; +const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_constant_surface; +const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_unit; + +const struct brw_tracked_state brw_psp_urb_cbs; + +const struct brw_tracked_state brw_pipe_control; + +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_indices; +const struct brw_tracked_state brw_vertices; +const struct brw_tracked_state brw_index_buffer; + + +/*********************************************************************** + * brw_state.c + */ +int brw_validate_state(struct brw_context *brw); +int brw_upload_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + +/*********************************************************************** + * brw_state_cache.c + */ +enum pipe_error brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + struct brw_winsys_buffer **bo_out ); + +enum pipe_error brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + struct brw_winsys_buffer **bo_out); + +enum pipe_error brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + const void *data, + GLuint data_sz, + const void *aux, + void *aux_return , + struct brw_winsys_buffer **bo_out); + +boolean brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + void *aux_return, + struct brw_winsys_buffer **bo_out); + +void brw_state_cache_check_size( struct brw_context *brw ); + +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); +void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo); + +/*********************************************************************** + * brw_state_batch.c + */ +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) +#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) + +GLboolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + GLuint sz ); +void brw_destroy_batch_cache( struct brw_context *brw ); +void brw_clear_batch_cache( struct brw_context *brw ); + +/*********************************************************************** + * brw_wm_surface_state.c + */ + +/*********************************************************************** + * brw_state_debug.c + */ +void brw_update_dirty_counts( unsigned mesa, + unsigned brw, + unsigned cache ); + + + +#endif diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c new file mode 100644 index 00000000000..7d212e5c247 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -0,0 +1,98 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_state.h" +#include "brw_batchbuffer.h" + + + +/* A facility similar to the data caching code above, which aims to + * prevent identical commands being issued repeatedly. + */ +GLboolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + GLuint sz ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + struct header *newheader = (struct header *)data; + + if (brw->flags.always_emit_state) { + brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); + return GL_TRUE; + } + + while (item) { + if (item->header->opcode == newheader->opcode) { + if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) + return GL_FALSE; + if (item->sz != sz) { + FREE(item->header); + item->header = MALLOC(sz); + item->sz = sz; + } + goto emit; + } + item = item->next; + } + + assert(!item); + item = CALLOC_STRUCT(brw_cached_batch_item); + item->header = MALLOC(sz); + item->sz = sz; + item->next = brw->cached_batch_items; + brw->cached_batch_items = item; + + emit: + memcpy(item->header, newheader, sz); + brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); + return GL_TRUE; +} + +void brw_clear_batch_cache( struct brw_context *brw ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + + while (item) { + struct brw_cached_batch_item *next = item->next; + free((void *)item->header); + free(item); + item = next; + } + + brw->cached_batch_items = NULL; +} + +void brw_destroy_batch_cache( struct brw_context *brw ) +{ + brw_clear_batch_cache(brw); +} diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c new file mode 100644 index 00000000000..16b643ceb28 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -0,0 +1,617 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +/** @file brw_state_cache.c + * + * This file implements a simple static state cache for 965. The consumers + * can query the hash table of state using a cache_id, opaque key data, + * and list of buffers that will be used in relocations, and receive the + * corresponding state buffer object of state (plus associated auxiliary + * data) in return. + * + * The inner workings are a simple hash table based on a CRC of the key data. + * The cache_id and relocation target buffers associated with the state + * buffer are included as auxiliary key data, but are not part of the hash + * value (this should be fixed, but will likely be fixed instead by making + * consumers use structured keys). + * + * Replacement is not implemented. Instead, when the cache gets too big, at + * a safe point (unlock) we throw out all of the cache data and let it + * regenerate for the next rendering operation. + * + * The reloc structs need to be included as key data, otherwise the + * non-unique values stuffed in the offset in key data through + * brw_cache_data() may result in successful probe for state buffers + * even when the buffer being referenced doesn't match. The result would be + * that the same state cache entry is used twice for different buffers, + * only one of the two buffers referenced gets put into the offset, and the + * incorrect program is run for the other instance. + */ +#include "util/u_memory.h" + +#include "brw_debug.h" +#include "brw_state.h" +#include "brw_batchbuffer.h" + +/* XXX: Fixme - have to include these to get the sizes of the prog_key + * structs: + */ +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_clip.h" +#include "brw_sf.h" +#include "brw_gs.h" + + +static GLuint +hash_key(const void *key, GLuint key_size, + struct brw_winsys_reloc *relocs, GLuint nr_relocs) +{ + GLuint *ikey = (GLuint *)key; + GLuint hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) { + hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } + + /* Include the BO pointers as key data as well */ + ikey = (GLuint *)relocs; + key_size = nr_relocs * sizeof(struct brw_winsys_reloc); + for (i = 0; i < key_size/4; i++) { + hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } + + return hash; +} + + +/** + * Marks a new buffer as being chosen for the given cache id. + */ +static void +update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, + struct brw_winsys_buffer *bo) +{ + if (bo == cache->last_bo[cache_id]) + return; /* no change */ + + bo_reference( &cache->last_bo[cache_id], bo ); + + cache->brw->state.dirty.cache |= 1 << cache_id; +} + + +static struct brw_cache_item * +search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, + GLuint hash, const void *key, GLuint key_size, + struct brw_winsys_reloc *relocs, GLuint nr_relocs) +{ + struct brw_cache_item *c; + +#if 0 + int bucketcount = 0; + + for (c = cache->items[hash % cache->size]; c; c = c->next) + bucketcount++; + + debug_printf("bucket %d/%d = %d/%d items\n", hash % cache->size, + cache->size, bucketcount, cache->n_items); +#endif + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->cache_id == cache_id && + c->hash == hash && + c->key_size == key_size && + memcmp(c->key, key, key_size) == 0 && + c->nr_relocs == nr_relocs && + memcmp(c->relocs, relocs, nr_relocs * sizeof *relocs) == 0) + return c; + } + + return NULL; +} + + +static void +rehash(struct brw_cache *cache) +{ + struct brw_cache_item **items; + struct brw_cache_item *c, *next; + GLuint size, i; + + size = cache->size * 3; + items = (struct brw_cache_item**) CALLOC(size, sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + FREE(cache->items); + cache->items = items; + cache->size = size; +} + + +/** + * Returns the buffer object matching cache_id and key, or NULL. + */ +boolean +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + void *aux_return, + struct brw_winsys_buffer **bo_out) +{ + struct brw_cache_item *item; + GLuint hash = hash_key(key, key_size, relocs, nr_relocs); + + item = search_cache(cache, cache_id, hash, key, key_size, + relocs, nr_relocs); + + if (item) { + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + update_cache_last(cache, cache_id, item->bo); + bo_reference(bo_out, item->bo); + return TRUE; + } + + return FALSE; +} + + +enum pipe_error +brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + const void *data, + GLuint data_size, + const void *aux, + void *aux_return, + struct brw_winsys_buffer **bo_out) +{ + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + GLuint hash = hash_key(key, key_size, relocs, nr_relocs); + GLuint relocs_size = nr_relocs * sizeof relocs[0]; + GLuint aux_size = cache->aux_size[cache_id]; + enum pipe_error ret; + void *tmp; + int i; + + /* Create the buffer object to contain the data. For now, use a + * single buffer type to describe all cached state atoms. Later, + * may want to take advantage of hardware distinctions between + * these various entities. + */ + ret = cache->sws->bo_alloc(cache->sws, + cache->buffer_type, + data_size, 1 << 6, + bo_out); + if (ret) + return ret; + + + /* Set up the memory containing the key, aux_data, and relocs */ + tmp = MALLOC(key_size + aux_size + relocs_size); + + memcpy(tmp, key, key_size); + memcpy((char *)tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy((char *)tmp + key_size + aux_size, relocs, relocs_size); + for (i = 0; i < nr_relocs; i++) { + p_atomic_inc(&relocs[i].bo->reference.count); + } + + item->cache_id = cache_id; + item->key = tmp; + item->hash = hash; + item->key_size = key_size; + item->relocs = (struct brw_winsys_reloc *)((char *)tmp + key_size + aux_size); + item->nr_relocs = nr_relocs; + bo_reference( &item->bo, *bo_out ); + item->data_size = data_size; + + if (cache->n_items > cache->size * 1.5) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + cache->n_items++; + + if (aux_return) { + assert(cache->aux_size[cache_id]); + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + } + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("upload %s: %d bytes to cache id %d\n", + cache->name[cache_id], + data_size, cache_id); + + /* Copy data to the buffer */ + ret = cache->sws->bo_subdata(item->bo, + cache_id, + 0, data_size, data, + relocs, nr_relocs); + if (ret) + return ret; + + update_cache_last(cache, cache_id, item->bo); + + return PIPE_OK; +} + + +/** + * This doesn't really work with aux data. Use search/upload instead + */ +enum pipe_error +brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + struct brw_winsys_buffer **bo_out) +{ + struct brw_cache_item *item; + GLuint hash = hash_key(data, data_size, relocs, nr_relocs); + + item = search_cache(cache, cache_id, hash, data, data_size, + relocs, nr_relocs); + if (item) { + update_cache_last(cache, cache_id, item->bo); + + bo_reference(bo_out, item->bo); + return PIPE_OK; + } + + return brw_upload_cache(cache, cache_id, + data, data_size, + relocs, nr_relocs, + data, data_size, + NULL, NULL, + bo_out); +} + + +/** + * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. + * + * If nr_relocs is nonzero, brw_search_cache()/brw_upload_cache() would be + * better to use, as the potentially changing offsets in the data-used-as-key + * will result in excessive cache misses. + * + * XXX: above is no longer true -- can we remove some code? + */ +enum pipe_error +brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + struct brw_winsys_buffer **bo_out) +{ + return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], + relocs, nr_relocs, bo_out); +} + + +static void +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) +{ + cache->name[id] = strdup(name); + cache->key_size[id] = key_size; + cache->aux_size[id] = aux_size; +} + + +static void +brw_init_general_state_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->cache; + + cache->brw = brw; + cache->sws = brw->sws; + + cache->buffer_type = BRW_BUFFER_TYPE_GENERAL_STATE; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + CALLOC(cache->size, sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0); + + brw_init_cache_id(cache, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0); + + brw_init_cache_id(cache, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data)); + + brw_init_cache_id(cache, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0); + + brw_init_cache_id(cache, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0); + + brw_init_cache_id(cache, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0); + + brw_init_cache_id(cache, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data)); + + brw_init_cache_id(cache, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0); + + brw_init_cache_id(cache, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0); + + brw_init_cache_id(cache, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0); + + brw_init_cache_id(cache, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data)); + + brw_init_cache_id(cache, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0); + + brw_init_cache_id(cache, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data)); + + brw_init_cache_id(cache, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0); + + brw_init_cache_id(cache, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_state_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->surface_cache; + + cache->brw = brw; + cache->sws = brw->sws; + + cache->buffer_type = BRW_BUFFER_TYPE_SURFACE_STATE; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + CALLOC(cache->size, sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0); + + brw_init_cache_id(cache, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + 0, + 0); +} + + +void +brw_init_caches(struct brw_context *brw) +{ + brw_init_general_state_cache(brw); + brw_init_surface_state_cache(brw); +} + + +static void +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) +{ + struct brw_cache_item *c, *next; + GLuint i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + int j; + + next = c->next; + + for (j = 0; j < c->nr_relocs; j++) + bo_reference(&c->relocs[j].bo, NULL); + + bo_reference(&c->bo, NULL); + FREE((void *)c->key); + FREE(c); + } + cache->items[i] = NULL; + } + + cache->n_items = 0; + + if (brw->curbe.last_buf) { + FREE(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + } + + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + +/* Clear all entries from the cache that point to the given bo. + * + * This lets us release memory for reuse earlier for known-dead buffers, + * at the cost of walking the entire hash table. + */ +void +brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) +{ + struct brw_cache_item **prev; + GLuint i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (prev = &cache->items[i]; *prev;) { + struct brw_cache_item *c = *prev; + + if (cache->sws->bo_references(c->bo, bo)) { + int j; + + *prev = c->next; + + for (j = 0; j < c->nr_relocs; j++) + bo_reference(&c->relocs[j].bo, NULL); + + bo_reference(&c->bo, NULL); + + FREE((void *)c->key); + FREE(c); + cache->n_items--; + } else { + prev = &c->next; + } + } + } +} + +void +brw_state_cache_check_size(struct brw_context *brw) +{ + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + + /* un-tuned guess. We've got around 20 state objects for a total of around + * 32k, so 1000 of them is around 1.5MB. + */ + if (brw->cache.n_items > 1000) + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); +} + + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) +{ + GLuint i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + brw_clear_cache(brw, cache); + for (i = 0; i < BRW_MAX_CACHE; i++) { + bo_reference(&cache->last_bo[i], NULL); + FREE(cache->name[i]); + } + FREE(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); +} diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c new file mode 100644 index 00000000000..049c278c93e --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_debug.c @@ -0,0 +1,153 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + + + +#include "brw_context.h" +#include "brw_state.h" + + +struct dirty_bit_map { + uint32_t bit; + char *name; + uint32_t count; +}; + +#define DEFINE_BIT(name) {name, #name, 0} + +static struct dirty_bit_map mesa_bits[] = { + DEFINE_BIT(PIPE_NEW_DEPTH_STENCIL_ALPHA), + DEFINE_BIT(PIPE_NEW_RAST), + DEFINE_BIT(PIPE_NEW_BLEND), + DEFINE_BIT(PIPE_NEW_VIEWPORT), + DEFINE_BIT(PIPE_NEW_SAMPLERS), + DEFINE_BIT(PIPE_NEW_VERTEX_BUFFER), + DEFINE_BIT(PIPE_NEW_VERTEX_ELEMENT), + DEFINE_BIT(PIPE_NEW_FRAGMENT_SHADER), + DEFINE_BIT(PIPE_NEW_VERTEX_SHADER), + DEFINE_BIT(PIPE_NEW_FRAGMENT_CONSTANTS), + DEFINE_BIT(PIPE_NEW_VERTEX_CONSTANTS), + DEFINE_BIT(PIPE_NEW_CLIP), + DEFINE_BIT(PIPE_NEW_INDEX_BUFFER), + DEFINE_BIT(PIPE_NEW_INDEX_RANGE), + DEFINE_BIT(PIPE_NEW_BLEND_COLOR), + DEFINE_BIT(PIPE_NEW_POLYGON_STIPPLE), + DEFINE_BIT(PIPE_NEW_FRAMEBUFFER_DIMENSIONS), + DEFINE_BIT(PIPE_NEW_DEPTH_BUFFER), + DEFINE_BIT(PIPE_NEW_COLOR_BUFFERS), + DEFINE_BIT(PIPE_NEW_QUERY), + DEFINE_BIT(PIPE_NEW_SCISSOR), + DEFINE_BIT(PIPE_NEW_BOUND_TEXTURES), + DEFINE_BIT(PIPE_NEW_NR_CBUFS), + {0, 0, 0} +}; + +static struct dirty_bit_map brw_bits[] = { + DEFINE_BIT(BRW_NEW_URB_FENCE), + DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM), + DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM), + DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_CURBE_OFFSETS), + DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE), + DEFINE_BIT(BRW_NEW_PRIMITIVE), + DEFINE_BIT(BRW_NEW_CONTEXT), + DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_PSP), + DEFINE_BIT(BRW_NEW_WM_SURFACES), + DEFINE_BIT(BRW_NEW_xxx), + DEFINE_BIT(BRW_NEW_INDICES), + {0, 0, 0} +}; + +static struct dirty_bit_map cache_bits[] = { + DEFINE_BIT(CACHE_NEW_CC_VP), + DEFINE_BIT(CACHE_NEW_CC_UNIT), + DEFINE_BIT(CACHE_NEW_WM_PROG), + DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR), + DEFINE_BIT(CACHE_NEW_SAMPLER), + DEFINE_BIT(CACHE_NEW_WM_UNIT), + DEFINE_BIT(CACHE_NEW_SF_PROG), + DEFINE_BIT(CACHE_NEW_SF_VP), + DEFINE_BIT(CACHE_NEW_SF_UNIT), + DEFINE_BIT(CACHE_NEW_VS_UNIT), + DEFINE_BIT(CACHE_NEW_VS_PROG), + DEFINE_BIT(CACHE_NEW_GS_UNIT), + DEFINE_BIT(CACHE_NEW_GS_PROG), + DEFINE_BIT(CACHE_NEW_CLIP_VP), + DEFINE_BIT(CACHE_NEW_CLIP_UNIT), + DEFINE_BIT(CACHE_NEW_CLIP_PROG), + DEFINE_BIT(CACHE_NEW_SURFACE), + DEFINE_BIT(CACHE_NEW_SURF_BIND), + {0, 0, 0} +}; + + +static void +brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) +{ + int i; + + for (i = 0; i < 32; i++) { + if (bit_map[i].bit == 0) + return; + + if (bit_map[i].bit & bits) + bit_map[i].count++; + } +} + +static void +brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) +{ + int i; + + for (i = 0; i < 32; i++) { + if (bit_map[i].bit == 0) + return; + + debug_printf("0x%08x: %12d (%s)\n", + bit_map[i].bit, bit_map[i].count, bit_map[i].name); + } +} + +void +brw_update_dirty_counts( unsigned mesa, + unsigned brw, + unsigned cache ) +{ + static int dirty_count = 0; + + brw_update_dirty_count(mesa_bits, mesa); + brw_update_dirty_count(brw_bits, brw); + brw_update_dirty_count(cache_bits, cache); + if (dirty_count++ % 1000 == 0) { + brw_print_dirty_count(mesa_bits, mesa); + brw_print_dirty_count(brw_bits, brw); + brw_print_dirty_count(cache_bits, cache); + debug_printf("\n"); + } +} diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c new file mode 100644 index 00000000000..f8b91eff816 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -0,0 +1,270 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_batchbuffer.h" +#include "brw_debug.h" + +const struct brw_tracked_state *atoms[] = +{ +/* &brw_wm_input_sizes, */ + &brw_vs_prog, + &brw_gs_prog, + &brw_clip_prog, + &brw_sf_prog, + &brw_wm_prog, + + /* Once all the programs are done, we know how large urb entry + * sizes need to be and can decide if we need to change the urb + * layout. + */ + &brw_curbe_offsets, + &brw_recalculate_urb_fence, + + &brw_cc_vp, + &brw_cc_unit, + + &brw_vs_surfaces, /* must do before unit */ + /*&brw_wm_constant_surface,*/ /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ + &brw_wm_samplers, + + &brw_wm_unit, + &brw_sf_vp, + &brw_sf_unit, + &brw_vs_unit, /* always required, enabled or not */ + &brw_clip_unit, + &brw_gs_unit, + + /* Command packets: + */ + &brw_invarient_state, + &brw_state_base_address, + + &brw_binding_table_pointers, + &brw_blend_constant_color, + + &brw_depthbuffer, + &brw_polygon_stipple, + &brw_line_stipple, + + &brw_psp_urb_cbs, + + &brw_drawing_rect, + &brw_indices, + &brw_index_buffer, + &brw_vertices, + + &brw_curbe_buffer +}; + + +void brw_init_state( struct brw_context *brw ) +{ + brw_init_caches(brw); +} + + +void brw_destroy_state( struct brw_context *brw ) +{ + brw_destroy_caches(brw); + brw_destroy_batch_cache(brw); +} + +/*********************************************************************** + */ + +static GLboolean check_state( const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + return ((a->mesa & b->mesa) || + (a->brw & b->brw) || + (a->cache & b->cache)); +} + +static void accumulate_state( struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + a->mesa |= b->mesa; + a->brw |= b->brw; + a->cache |= b->cache; +} + + +static void xor_states( struct brw_state_flags *result, + const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + result->mesa = a->mesa ^ b->mesa; + result->brw = a->brw ^ b->brw; + result->cache = a->cache ^ b->cache; +} + +static void +brw_clear_validated_bos(struct brw_context *brw) +{ + int i; + + /* Clear the last round of validated bos */ + for (i = 0; i < brw->state.validated_bo_count; i++) { + bo_reference(&brw->state.validated_bos[i], NULL); + } + brw->state.validated_bo_count = 0; +} + + +/*********************************************************************** + * Emit all state: + */ +enum pipe_error brw_validate_state( struct brw_context *brw ) +{ + struct brw_state_flags *state = &brw->state.dirty; + GLuint i; + int ret; + + brw_clear_validated_bos(brw); + brw_add_validated_bo(brw, brw->batch->buf); + + if (brw->flags.always_emit_state) { + state->mesa |= ~0; + state->brw |= ~0; + state->cache |= ~0; + } + + if (state->mesa == 0 && + state->cache == 0 && + state->brw == 0) + return 0; + + if (brw->state.dirty.brw & BRW_NEW_CONTEXT) + brw_clear_batch_cache(brw); + + /* do prepare stage for all atoms */ + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + + if (check_state(state, &atom->dirty)) { + if (atom->prepare) { + ret = atom->prepare(brw); + if (ret) + return ret; + } + } + } + + /* Make sure that the textures which are referenced by the current + * brw fragment program are actually present/valid. + * If this fails, we can experience GPU lock-ups. + */ + { + const struct brw_fragment_shader *fp = brw->curr.fragment_shader; + if (fp) { + assert(fp->info.file_max[TGSI_FILE_SAMPLER] < (int)brw->curr.num_samplers); + /*assert(fp->info.texture_max <= brw->curr.num_textures);*/ + } + } + + return 0; +} + + +enum pipe_error brw_upload_state(struct brw_context *brw) +{ + struct brw_state_flags *state = &brw->state.dirty; + int ret; + int i; + + brw_clear_validated_bos(brw); + + if (BRW_DEBUG) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + struct brw_state_flags examined, prev; + memset(&examined, 0, sizeof(examined)); + prev = *state; + + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + struct brw_state_flags generated; + + assert(atom->dirty.mesa || + atom->dirty.brw || + atom->dirty.cache); + + if (check_state(state, &atom->dirty)) { + if (atom->emit) { + ret = atom->emit( brw ); + if (ret) + return ret; + } + } + + accumulate_state(&examined, &atom->dirty); + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, &prev, state); + assert(!check_state(&examined, &generated)); + prev = *state; + } + } + else { + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + + if (check_state(state, &atom->dirty)) { + if (atom->emit) { + ret = atom->emit( brw ); + if (ret) + return ret; + } + } + } + } + + if (BRW_DEBUG & DEBUG_STATE) { + brw_update_dirty_counts( state->mesa, + state->brw, + state->cache ); + } + + /* Clear dirty flags: + */ + memset(state, 0, sizeof(*state)); + return 0; +} diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h new file mode 100644 index 00000000000..bf10bc04de7 --- /dev/null +++ b/src/gallium/drivers/i965/brw_structs.h @@ -0,0 +1,1576 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +#include "brw_types.h" + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + + +/* Command packets: + */ +struct header +{ + GLuint length:16; + GLuint opcode:16; +}; + + +union header_union +{ + struct header bits; + GLuint dword; +}; + +struct brw_3d_control +{ + struct + { + GLuint length:8; + GLuint notify_enable:1; + GLuint pad:3; + GLuint wc_flush_enable:1; + GLuint depth_stall_enable:1; + GLuint operation:2; + GLuint opcode:16; + } header; + + struct + { + GLuint pad:2; + GLuint dest_addr_type:1; + GLuint dest_addr:29; + } dest; + + GLuint dword2; + GLuint dword3; +}; + + +struct brw_3d_primitive +{ + struct + { + GLuint length:8; + GLuint pad:2; + GLuint topology:5; + GLuint indexed:1; + GLuint opcode:16; + } header; + + GLuint verts_per_instance; + GLuint start_vert_location; + GLuint instance_count; + GLuint start_instance_location; + GLuint base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush +{ + GLuint flags:4; + GLuint pad:12; + GLuint opcode:16; +}; + +struct brw_vf_statistics +{ + GLuint statistics_enable:1; + GLuint pad:15; + GLuint opcode:16; +}; + + + +struct brw_binding_table_pointers +{ + struct header header; + GLuint vs; + GLuint gs; + GLuint clp; + GLuint sf; + GLuint wm; +}; + + +struct brw_blend_constant_color +{ + struct header header; + GLfloat blend_constant_color[4]; +}; + + +struct brw_depthbuffer +{ + union header_union header; + + union { + struct { + GLuint pitch:18; + GLuint format:3; + GLuint pad:2; + GLuint software_tiled_rendering_mode:2; + GLuint depth_offset_disable:1; + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad2:1; + GLuint surface_type:3; + } bits; + GLuint dword; + } dword1; + + GLuint dword2_base_addr; + + union { + struct { + GLuint pad:1; + GLuint mipmap_layout:1; + GLuint lod:4; + GLuint width:13; + GLuint height:13; + } bits; + GLuint dword; + } dword3; + + union { + struct { + GLuint pad:10; + GLuint min_array_element:11; + GLuint depth:11; + } bits; + GLuint dword; + } dword4; +}; + +struct brw_depthbuffer_g4x +{ + union header_union header; + + union { + struct { + GLuint pitch:18; + GLuint format:3; + GLuint pad:2; + GLuint software_tiled_rendering_mode:2; + GLuint depth_offset_disable:1; + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad2:1; + GLuint surface_type:3; + } bits; + GLuint dword; + } dword1; + + GLuint dword2_base_addr; + + union { + struct { + GLuint pad:1; + GLuint mipmap_layout:1; + GLuint lod:4; + GLuint width:13; + GLuint height:13; + } bits; + GLuint dword; + } dword3; + + union { + struct { + GLuint pad:10; + GLuint min_array_element:11; + GLuint depth:11; + } bits; + GLuint dword; + } dword4; + + union { + struct { + GLuint xoffset:16; + GLuint yoffset:16; + } bits; + GLuint dword; + } dword5; /* NEW in Integrated Graphics Device */ +}; + +struct brw_drawrect +{ + struct header header; + GLuint xmin:16; + GLuint ymin:16; + GLuint xmax:16; + GLuint ymax:16; + GLuint xorg:16; + GLuint yorg:16; +}; + + + + +struct brw_global_depth_offset_clamp +{ + struct header header; + GLfloat depth_offset_clamp; +}; + +struct brw_indexbuffer +{ + union { + struct + { + GLuint length:8; + GLuint index_format:2; + GLuint cut_index_enable:1; + GLuint pad:5; + GLuint opcode:16; + } bits; + GLuint dword; + + } header; + + GLuint buffer_start; + GLuint buffer_end; +}; + +/* NEW in Integrated Graphics Device */ +struct brw_aa_line_parameters +{ + struct header header; + + struct { + GLuint aa_coverage_scope:8; + GLuint pad0:8; + GLuint aa_coverage_bias:8; + GLuint pad1:8; + } bits0; + + struct { + GLuint aa_coverage_endcap_slope:8; + GLuint pad0:8; + GLuint aa_coverage_endcap_bias:8; + GLuint pad1:8; + } bits1; +}; + +struct brw_line_stipple +{ + struct header header; + + struct + { + GLuint pattern:16; + GLuint pad:16; + } bits0; + + struct + { + GLuint repeat_count:9; + GLuint pad:7; + GLuint inverse_repeat_count:16; + } bits1; +}; + + +struct brw_pipelined_state_pointers +{ + struct header header; + + struct { + GLuint pad:5; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } vs; + + struct + { + GLuint enable:1; + GLuint pad:4; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } gs; + + struct + { + GLuint enable:1; + GLuint pad:4; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } clp; + + struct + { + GLuint pad:5; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } sf; + + struct + { + GLuint pad:5; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } wm; + + struct + { + GLuint pad:5; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */ + } cc; +}; + + +struct brw_polygon_stipple_offset +{ + struct header header; + + struct { + GLuint y_offset:5; + GLuint pad:3; + GLuint x_offset:5; + GLuint pad0:19; + } bits0; +}; + + + +struct brw_polygon_stipple +{ + struct header header; + GLuint stipple[32]; +}; + + + +struct brw_pipeline_select +{ + struct + { + GLuint pipeline_select:1; + GLuint pad:15; + GLuint opcode:16; + } header; +}; + + +struct brw_pipe_control +{ + struct + { + GLuint length:8; + GLuint notify_enable:1; + GLuint texture_cache_flush_enable:1; + GLuint indirect_state_pointers_disable:1; + GLuint instruction_state_cache_flush_enable:1; + GLuint write_cache_flush_enable:1; + GLuint depth_stall_enable:1; + GLuint post_sync_operation:2; + + GLuint opcode:16; + } header; + + struct + { + GLuint pad:2; + GLuint dest_addr_type:1; + GLuint dest_addr:29; + } bits1; + + GLuint data0; + GLuint data1; +}; + + +struct brw_urb_fence +{ + struct + { + GLuint length:8; + GLuint vs_realloc:1; + GLuint gs_realloc:1; + GLuint clp_realloc:1; + GLuint sf_realloc:1; + GLuint vfe_realloc:1; + GLuint cs_realloc:1; + GLuint pad:2; + GLuint opcode:16; + } header; + + struct + { + GLuint vs_fence:10; + GLuint gs_fence:10; + GLuint clp_fence:10; + GLuint pad:2; + } bits0; + + struct + { + GLuint sf_fence:10; + GLuint vf_fence:10; + GLuint cs_fence:11; + GLuint pad:1; + } bits1; +}; + +struct brw_cs_urb_state +{ + struct header header; + + struct + { + GLuint nr_urb_entries:3; + GLuint pad:1; + GLuint urb_entry_size:5; + GLuint pad0:23; + } bits0; +}; + +struct brw_constant_buffer +{ + struct + { + GLuint length:8; + GLuint valid:1; + GLuint pad:7; + GLuint opcode:16; + } header; + + struct + { + GLuint buffer_length:6; + GLuint buffer_address:26; + } bits0; +}; + +struct brw_state_base_address +{ + struct header header; + + struct + { + GLuint modify_enable:1; + GLuint pad:4; + GLuint general_state_address:27; + } bits0; + + struct + { + GLuint modify_enable:1; + GLuint pad:4; + GLuint surface_state_address:27; + } bits1; + + struct + { + GLuint modify_enable:1; + GLuint pad:4; + GLuint indirect_object_state_address:27; + } bits2; + + struct + { + GLuint modify_enable:1; + GLuint pad:11; + GLuint general_state_upper_bound:20; + } bits3; + + struct + { + GLuint modify_enable:1; + GLuint pad:11; + GLuint indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch +{ + struct header header; + + struct + { + GLuint prefetch_count:3; + GLuint pad:3; + GLuint prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer +{ + struct header header; + + struct + { + GLuint pad:4; + GLuint system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + GLuint pad0:1; + GLuint grf_reg_count:3; + GLuint pad1:2; + GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ +}; + +struct thread1 +{ + GLuint ext_halt_exception_enable:1; + GLuint sw_exception_enable:1; + GLuint mask_stack_exception_enable:1; + GLuint timeout_exception_enable:1; + GLuint illegal_op_exception_enable:1; + GLuint pad0:3; + GLuint depth_coef_urb_read_offset:6; /* WM only */ + GLuint pad1:2; + GLuint floating_point_mode:1; + GLuint thread_priority:1; + GLuint binding_table_entry_count:8; + GLuint pad3:5; + GLuint single_program_flow:1; +}; + +struct thread2 +{ + GLuint per_thread_scratch_space:4; + GLuint pad0:6; + GLuint scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + GLuint dispatch_grf_start_reg:4; + GLuint urb_entry_read_offset:6; + GLuint pad0:1; + GLuint urb_entry_read_length:6; + GLuint pad1:1; + GLuint const_urb_entry_read_offset:6; + GLuint pad2:1; + GLuint const_urb_entry_read_length:6; + GLuint pad3:1; +}; + + + +struct brw_clip_unit_state +{ + struct thread0 thread0; + struct + { + GLuint pad0:7; + GLuint sw_exception_enable:1; + GLuint pad1:3; + GLuint mask_stack_exception_enable:1; + GLuint pad2:1; + GLuint illegal_op_exception_enable:1; + GLuint pad3:2; + GLuint floating_point_mode:1; + GLuint thread_priority:1; + GLuint binding_table_entry_count:8; + GLuint pad4:5; + GLuint single_program_flow:1; + } thread1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:9; + GLuint gs_output_stats:1; /* not always */ + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:5; /* may be less */ + GLuint pad3:2; + } thread4; + + struct + { + GLuint pad0:13; + GLuint clip_mode:3; + GLuint userclip_enable_flags:8; + GLuint userclip_must_clip:1; + GLuint negative_w_clip_test:1; + GLuint guard_band_enable:1; + GLuint viewport_z_clip_enable:1; + GLuint viewport_xy_clip_enable:1; + GLuint vertex_position_space:1; + GLuint api_mode:1; + GLuint pad2:1; + } clip5; + + struct + { + GLuint pad0:5; + GLuint clipper_viewport_state_ptr:27; + } clip6; + + + GLfloat viewport_xmin; + GLfloat viewport_xmax; + GLfloat viewport_ymin; + GLfloat viewport_ymax; +}; + + + +struct brw_cc_unit_state +{ + struct brw_cc0 + { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } cc0; + + + struct brw_cc1 + { + GLuint bf_stencil_ref:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + GLuint stencil_ref:8; + } cc1; + + + struct brw_cc2 + { + GLuint logicop_enable:1; + GLuint pad0:10; + GLuint depth_write_enable:1; + GLuint depth_test_function:3; + GLuint depth_test:1; + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + } cc2; + + + struct brw_cc3 + { + GLuint pad0:8; + GLuint alpha_test_func:3; + GLuint alpha_test:1; + GLuint blend_enable:1; + GLuint ia_blend_enable:1; + GLuint pad1:1; + GLuint alpha_test_format:1; + GLuint pad2:16; + } cc3; + + struct brw_cc4 + { + GLuint pad0:5; + GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ + } cc4; + + struct brw_cc5 + { + GLuint pad0:2; + GLuint ia_dest_blend_factor:5; + GLuint ia_src_blend_factor:5; + GLuint ia_blend_function:3; + GLuint statistics_enable:1; + GLuint logicop_func:4; + GLuint pad1:11; + GLuint dither_enable:1; + } cc5; + + struct brw_cc6 + { + GLuint clamp_post_alpha_blend:1; + GLuint clamp_pre_alpha_blend:1; + GLuint clamp_range:2; + GLuint pad0:11; + GLuint y_dither_offset:2; + GLuint x_dither_offset:2; + GLuint dest_blend_factor:5; + GLuint src_blend_factor:5; + GLuint blend_function:3; + } cc6; + + struct brw_cc7 { + union { + GLfloat f; + GLubyte ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct brw_sf_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:10; + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:6; + GLuint pad3:1; + } thread4; + + struct + { + GLuint front_winding:1; + GLuint viewport_transform:1; + GLuint pad0:3; + GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ + } sf5; + + struct + { + GLuint pad0:9; + GLuint dest_org_vbias:4; + GLuint dest_org_hbias:4; + GLuint scissor:1; + GLuint disable_2x2_trifilter:1; + GLuint disable_zero_pix_trifilter:1; + GLuint point_rast_rule:2; + GLuint line_endcap_aa_region_width:2; + GLuint line_width:4; + GLuint fast_scissor_disable:1; + GLuint cull_mode:2; + GLuint aa_enable:1; + } sf6; + + struct + { + GLuint point_size:11; + GLuint use_point_size_state:1; + GLuint subpixel_precision:1; + GLuint sprite_point:1; + GLuint pad0:10; + GLuint aa_line_distance_mode:1; + GLuint trifan_pv:2; + GLuint linestrip_pv:2; + GLuint tristrip_pv:2; + GLuint line_last_pixel_enable:1; + } sf7; + +}; + + +struct brw_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:8; + GLuint rendering_enable:1; /* for IGDNG */ + GLuint pad4:1; + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:5; + GLuint pad3:2; + } thread4; + + struct + { + GLuint sampler_count:3; + GLuint pad0:2; + GLuint sampler_state_pointer:27; + } gs5; + + + struct + { + GLuint max_vp_index:4; + GLuint pad0:12; + GLuint svbi_post_inc_value:10; + GLuint pad1:1; + GLuint svbi_post_inc_enable:1; + GLuint svbi_payload:1; + GLuint discard_adjaceny:1; + GLuint reorder_enable:1; + GLuint pad2:1; + } gs6; +}; + + +struct brw_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:10; + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:6; + GLuint pad3:1; + } thread4; + + struct + { + GLuint sampler_count:3; + GLuint pad0:2; + GLuint sampler_state_pointer:27; + } vs5; + + struct + { + GLuint vs_enable:1; + GLuint vert_cache_disable:1; + GLuint pad0:30; + } vs6; +}; + + +struct brw_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + GLuint stats_enable:1; + GLuint depth_buffer_clear:1; + GLuint sampler_count:3; + GLuint sampler_state_pointer:27; + } wm4; + + struct + { + GLuint enable_8_pix:1; + GLuint enable_16_pix:1; + GLuint enable_32_pix:1; + GLuint enable_con_32_pix:1; + GLuint enable_con_64_pix:1; + GLuint pad0:5; + GLuint legacy_global_depth_bias:1; + GLuint line_stipple:1; + GLuint depth_offset:1; + GLuint polygon_stipple:1; + GLuint line_aa_region_width:2; + GLuint line_endcap_aa_region_width:2; + GLuint early_depth_test:1; + GLuint thread_dispatch_enable:1; + GLuint program_uses_depth:1; + GLuint program_computes_depth:1; + GLuint program_uses_killpixel:1; + GLuint legacy_line_rast: 1; + GLuint transposed_urb_read_enable:1; + GLuint max_threads:7; + } wm5; + + GLfloat global_depth_offset_constant; + GLfloat global_depth_offset_scale; + + /* for IGDNG only */ + struct { + GLuint pad0:1; + GLuint grf_reg_count_1:3; + GLuint pad1:2; + GLuint kernel_start_pointer_1:26; + } wm8; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_2:3; + GLuint pad1:2; + GLuint kernel_start_pointer_2:26; + } wm9; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_3:3; + GLuint pad1:2; + GLuint kernel_start_pointer_3:26; + } wm10; +}; + +struct brw_sampler_default_color { + GLfloat color[4]; +}; + +struct brw_sampler_state +{ + + struct brw_ss0 + { + GLuint shadow_function:3; + GLuint lod_bias:11; + GLuint min_filter:3; + GLuint mag_filter:3; + GLuint mip_filter:2; + GLuint base_level:5; + GLuint pad:1; + GLuint lod_preclamp:1; + GLuint default_color_mode:1; + GLuint pad0:1; + GLuint disable:1; + } ss0; + + struct brw_ss1 + { + GLuint r_wrap_mode:3; + GLuint t_wrap_mode:3; + GLuint s_wrap_mode:3; + GLuint pad:3; + GLuint max_lod:10; + GLuint min_lod:10; + } ss1; + + + struct brw_ss2 + { + GLuint pad:5; + GLuint default_color_pointer:27; + } ss2; + + struct brw_ss3 + { + GLuint pad:19; + GLuint max_aniso:3; + GLuint chroma_key_mode:1; + GLuint chroma_key_index:2; + GLuint chroma_key_enable:1; + GLuint monochrome_filter_width:3; + GLuint monochrome_filter_height:3; + } ss3; +}; + + +struct brw_clipper_viewport +{ + GLfloat xmin; + GLfloat xmax; + GLfloat ymin; + GLfloat ymax; +}; + +struct brw_cc_viewport +{ + GLfloat min_depth; + GLfloat max_depth; +}; + +struct brw_sf_viewport +{ + struct { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; + } viewport; + + /* scissor coordinates are inclusive */ + struct { + GLshort xmin; + GLshort ymin; + GLshort xmax; + GLshort ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct brw_surface_state +{ + struct brw_surf_ss0 { + GLuint cube_pos_z:1; + GLuint cube_neg_z:1; + GLuint cube_pos_y:1; + GLuint cube_neg_y:1; + GLuint cube_pos_x:1; + GLuint cube_neg_x:1; + GLuint pad:4; + GLuint mipmap_layout_mode:1; + GLuint vert_line_stride_ofs:1; + GLuint vert_line_stride:1; + GLuint color_blend:1; + GLuint writedisable_blue:1; + GLuint writedisable_green:1; + GLuint writedisable_red:1; + GLuint writedisable_alpha:1; + GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */ + GLuint data_return_format:1; + GLuint pad0:1; + GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ + } ss0; + + struct brw_surf_ss1 { + GLuint base_addr; + } ss1; + + struct brw_surf_ss2 { + GLuint pad:2; + GLuint mip_count:4; + GLuint width:13; + GLuint height:13; + } ss2; + + struct brw_surf_ss3 { + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad:1; + GLuint pitch:18; + GLuint depth:11; + } ss3; + + struct brw_surf_ss4 { + GLuint multisample_position_palette_index:3; + GLuint pad1:1; + GLuint num_multisamples:3; + GLuint pad0:1; + GLuint render_target_view_extent:9; + GLuint min_array_elt:11; + GLuint min_lod:4; + } ss4; + + struct brw_surf_ss5 { + GLuint pad1:16; + GLuint llc_mapping:1; + GLuint mlc_mapping:1; + GLuint gfdt:1; + GLuint gfdt_src:1; + GLuint y_offset:4; + GLuint pad0:1; + GLuint x_offset:7; + } ss5; /* New in G4X */ + +}; + + + +struct brw_vertex_buffer_state +{ + struct { + GLuint pitch:11; + GLuint pad:15; + GLuint access_type:1; + GLuint vb_index:5; + } vb0; + + GLuint start_addr; + GLuint max_index; +#if 1 + GLuint instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + + +struct brw_vertex_element_state +{ + struct + { + GLuint src_offset:11; + GLuint pad:5; + GLuint src_format:9; + GLuint pad0:1; + GLuint valid:1; + GLuint vertex_buffer_index:5; + } ve0; + + struct + { + GLuint dst_offset:8; + GLuint pad:8; + GLuint vfcomponent3:4; + GLuint vfcomponent2:4; + GLuint vfcomponent1:4; + GLuint vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct brw_urb_immediate { + GLuint opcode:4; + GLuint offset:6; + GLuint swizzle_control:2; + GLuint pad:1; + GLuint allocate:1; + GLuint used:1; + GLuint complete:1; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ + struct + { + GLuint opcode:7; + GLuint pad:1; + GLuint access_mode:1; + GLuint mask_control:1; + GLuint dependency_control:2; + GLuint compression_control:2; + GLuint thread_control:2; + GLuint predicate_control:4; + GLuint predicate_inverse:1; + GLuint execution_size:3; + GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ + GLuint pad0:2; + GLuint debug_control:1; + GLuint saturate:1; + } header; + + union { + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad:1; + GLuint dest_subreg_nr:5; + GLuint dest_reg_nr:8; + GLuint dest_horiz_stride:2; + GLuint dest_address_mode:1; + } da1; + + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; /* 0x00000c00 */ + GLuint src1_reg_type:3; /* 0x00007000 */ + GLuint pad:1; + GLint dest_indirect_offset:10; /* offset against the deref'd address reg */ + GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */ + GLuint dest_horiz_stride:2; + GLuint dest_address_mode:1; + } ia1; + + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad:1; + GLuint dest_writemask:4; + GLuint dest_subreg_nr:1; + GLuint dest_reg_nr:8; + GLuint pad1:2; + GLuint dest_address_mode:1; + } da16; + + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint pad0:6; + GLuint dest_writemask:4; + GLint dest_indirect_offset:6; + GLuint dest_subreg_nr:3; + GLuint pad1:2; + GLuint dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + GLuint src0_subreg_nr:5; + GLuint src0_reg_nr:8; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_horiz_stride:2; + GLuint src0_width:3; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad:6; + } da1; + + struct + { + GLint src0_indirect_offset:10; + GLuint src0_subreg_nr:3; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_horiz_stride:2; + GLuint src0_width:3; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad:6; + } ia1; + + struct + { + GLuint src0_swz_x:2; + GLuint src0_swz_y:2; + GLuint src0_subreg_nr:1; + GLuint src0_reg_nr:8; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_swz_z:2; + GLuint src0_swz_w:2; + GLuint pad0:1; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad1:6; + } da16; + + struct + { + GLuint src0_swz_x:2; + GLuint src0_swz_y:2; + GLint src0_indirect_offset:6; + GLuint src0_subreg_nr:3; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_swz_z:2; + GLuint src0_swz_w:2; + GLuint pad0:1; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad1:6; + } ia16; + + struct + { + GLuint pad:26; + GLuint end_of_thread:1; + GLuint pad1:1; + GLuint sfid:4; + } send_igdng; /* for IGDNG only */ + + } bits2; + + union + { + struct + { + GLuint src1_subreg_nr:5; + GLuint src1_reg_nr:8; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint src1_address_mode:1; + GLuint src1_horiz_stride:2; + GLuint src1_width:3; + GLuint src1_vert_stride:4; + GLuint pad0:7; + } da1; + + struct + { + GLuint src1_swz_x:2; + GLuint src1_swz_y:2; + GLuint src1_subreg_nr:1; + GLuint src1_reg_nr:8; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint src1_address_mode:1; + GLuint src1_swz_z:2; + GLuint src1_swz_w:2; + GLuint pad1:1; + GLuint src1_vert_stride:4; + GLuint pad2:7; + } da16; + + struct + { + GLint src1_indirect_offset:10; + GLuint src1_subreg_nr:3; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint src1_address_mode:1; + GLuint src1_horiz_stride:2; + GLuint src1_width:3; + GLuint src1_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad1:6; + } ia1; + + struct + { + GLuint src1_swz_x:2; + GLuint src1_swz_y:2; + GLint src1_indirect_offset:6; + GLuint src1_subreg_nr:3; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint pad0:1; + GLuint src1_swz_z:2; + GLuint src1_swz_w:2; + GLuint pad1:1; + GLuint src1_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad2:6; + } ia16; + + + struct + { + GLint jump_count:16; /* note: signed */ + GLuint pop_count:4; + GLuint pad0:12; + } if_else; + + struct { + GLuint function:4; + GLuint int_type:1; + GLuint precision:1; + GLuint saturate:1; + GLuint data_type:1; + GLuint pad0:8; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } math; + + struct { + GLuint function:4; + GLuint int_type:1; + GLuint precision:1; + GLuint saturate:1; + GLuint data_type:1; + GLuint snapshot:1; + GLuint pad0:10; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } math_igdng; + + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint return_format:2; + GLuint msg_type:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } sampler; + + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint msg_type:4; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } sampler_g4x; + + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint msg_type:4; + GLuint simd_mode:2; + GLuint pad0:1; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } sampler_igdng; + + struct brw_urb_immediate urb; + + struct { + GLuint opcode:4; + GLuint offset:6; + GLuint swizzle_control:2; + GLuint pad:1; + GLuint allocate:1; + GLuint used:1; + GLuint complete:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } urb_igdng; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:4; + GLuint msg_type:2; + GLuint target_cache:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_read; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_read_igdng; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:3; + GLuint send_commit_msg:1; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_write; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:3; + GLuint send_commit_msg:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_write_igdng; + + struct { + GLuint pad:16; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } generic; + + struct { + GLuint pad:19; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } generic_igdng; + + GLint d; + GLuint ud; + float f; + } bits3; +}; + + +#endif diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c new file mode 100644 index 00000000000..cd40fc6d618 --- /dev/null +++ b/src/gallium/drivers/i965/brw_structs_dump.c @@ -0,0 +1,1247 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * @file + * Dump i965 data structures. + * + * Generated automatically from brw_structs.h by brw_structs_dump.py. + */ + +#include "util/u_debug.h" + +#include "brw_types.h" +#include "brw_structs.h" +#include "brw_structs_dump.h" + +void +brw_dump_3d_control(const struct brw_3d_control *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable); + debug_printf("\t\t.header.wc_flush_enable = 0x%x\n", (*ptr).header.wc_flush_enable); + debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable); + debug_printf("\t\t.header.operation = 0x%x\n", (*ptr).header.operation); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.dest.dest_addr_type = 0x%x\n", (*ptr).dest.dest_addr_type); + debug_printf("\t\t.dest.dest_addr = 0x%x\n", (*ptr).dest.dest_addr); + debug_printf("\t\t.dword2 = 0x%x\n", (*ptr).dword2); + debug_printf("\t\t.dword3 = 0x%x\n", (*ptr).dword3); +} + +void +brw_dump_3d_primitive(const struct brw_3d_primitive *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.topology = 0x%x\n", (*ptr).header.topology); + debug_printf("\t\t.header.indexed = 0x%x\n", (*ptr).header.indexed); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.verts_per_instance = 0x%x\n", (*ptr).verts_per_instance); + debug_printf("\t\t.start_vert_location = 0x%x\n", (*ptr).start_vert_location); + debug_printf("\t\t.instance_count = 0x%x\n", (*ptr).instance_count); + debug_printf("\t\t.start_instance_location = 0x%x\n", (*ptr).start_instance_location); + debug_printf("\t\t.base_vert_location = 0x%x\n", (*ptr).base_vert_location); +} + +void +brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope); + debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias); + debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope); + debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias); +} + +void +brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.vs = 0x%x\n", (*ptr).vs); + debug_printf("\t\t.gs = 0x%x\n", (*ptr).gs); + debug_printf("\t\t.clp = 0x%x\n", (*ptr).clp); + debug_printf("\t\t.sf = 0x%x\n", (*ptr).sf); + debug_printf("\t\t.wm = 0x%x\n", (*ptr).wm); +} + +void +brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.blend_constant_color[0] = %f\n", (*ptr).blend_constant_color[0]); + debug_printf("\t\t.blend_constant_color[1] = %f\n", (*ptr).blend_constant_color[1]); + debug_printf("\t\t.blend_constant_color[2] = %f\n", (*ptr).blend_constant_color[2]); + debug_printf("\t\t.blend_constant_color[3] = %f\n", (*ptr).blend_constant_color[3]); +} + +void +brw_dump_cc0(const struct brw_cc0 *ptr) +{ + debug_printf("\t\t.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_pass_op); + debug_printf("\t\t.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_fail_op); + debug_printf("\t\t.bf_stencil_fail_op = 0x%x\n", (*ptr).bf_stencil_fail_op); + debug_printf("\t\t.bf_stencil_func = 0x%x\n", (*ptr).bf_stencil_func); + debug_printf("\t\t.bf_stencil_enable = 0x%x\n", (*ptr).bf_stencil_enable); + debug_printf("\t\t.stencil_write_enable = 0x%x\n", (*ptr).stencil_write_enable); + debug_printf("\t\t.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).stencil_pass_depth_pass_op); + debug_printf("\t\t.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).stencil_pass_depth_fail_op); + debug_printf("\t\t.stencil_fail_op = 0x%x\n", (*ptr).stencil_fail_op); + debug_printf("\t\t.stencil_func = 0x%x\n", (*ptr).stencil_func); + debug_printf("\t\t.stencil_enable = 0x%x\n", (*ptr).stencil_enable); +} + +void +brw_dump_cc1(const struct brw_cc1 *ptr) +{ + debug_printf("\t\t.bf_stencil_ref = 0x%x\n", (*ptr).bf_stencil_ref); + debug_printf("\t\t.stencil_write_mask = 0x%x\n", (*ptr).stencil_write_mask); + debug_printf("\t\t.stencil_test_mask = 0x%x\n", (*ptr).stencil_test_mask); + debug_printf("\t\t.stencil_ref = 0x%x\n", (*ptr).stencil_ref); +} + +void +brw_dump_cc2(const struct brw_cc2 *ptr) +{ + debug_printf("\t\t.logicop_enable = 0x%x\n", (*ptr).logicop_enable); + debug_printf("\t\t.depth_write_enable = 0x%x\n", (*ptr).depth_write_enable); + debug_printf("\t\t.depth_test_function = 0x%x\n", (*ptr).depth_test_function); + debug_printf("\t\t.depth_test = 0x%x\n", (*ptr).depth_test); + debug_printf("\t\t.bf_stencil_write_mask = 0x%x\n", (*ptr).bf_stencil_write_mask); + debug_printf("\t\t.bf_stencil_test_mask = 0x%x\n", (*ptr).bf_stencil_test_mask); +} + +void +brw_dump_cc3(const struct brw_cc3 *ptr) +{ + debug_printf("\t\t.alpha_test_func = 0x%x\n", (*ptr).alpha_test_func); + debug_printf("\t\t.alpha_test = 0x%x\n", (*ptr).alpha_test); + debug_printf("\t\t.blend_enable = 0x%x\n", (*ptr).blend_enable); + debug_printf("\t\t.ia_blend_enable = 0x%x\n", (*ptr).ia_blend_enable); + debug_printf("\t\t.alpha_test_format = 0x%x\n", (*ptr).alpha_test_format); +} + +void +brw_dump_cc4(const struct brw_cc4 *ptr) +{ + debug_printf("\t\t.cc_viewport_state_offset = 0x%x\n", (*ptr).cc_viewport_state_offset); +} + +void +brw_dump_cc5(const struct brw_cc5 *ptr) +{ + debug_printf("\t\t.ia_dest_blend_factor = 0x%x\n", (*ptr).ia_dest_blend_factor); + debug_printf("\t\t.ia_src_blend_factor = 0x%x\n", (*ptr).ia_src_blend_factor); + debug_printf("\t\t.ia_blend_function = 0x%x\n", (*ptr).ia_blend_function); + debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable); + debug_printf("\t\t.logicop_func = 0x%x\n", (*ptr).logicop_func); + debug_printf("\t\t.dither_enable = 0x%x\n", (*ptr).dither_enable); +} + +void +brw_dump_cc6(const struct brw_cc6 *ptr) +{ + debug_printf("\t\t.clamp_post_alpha_blend = 0x%x\n", (*ptr).clamp_post_alpha_blend); + debug_printf("\t\t.clamp_pre_alpha_blend = 0x%x\n", (*ptr).clamp_pre_alpha_blend); + debug_printf("\t\t.clamp_range = 0x%x\n", (*ptr).clamp_range); + debug_printf("\t\t.y_dither_offset = 0x%x\n", (*ptr).y_dither_offset); + debug_printf("\t\t.x_dither_offset = 0x%x\n", (*ptr).x_dither_offset); + debug_printf("\t\t.dest_blend_factor = 0x%x\n", (*ptr).dest_blend_factor); + debug_printf("\t\t.src_blend_factor = 0x%x\n", (*ptr).src_blend_factor); + debug_printf("\t\t.blend_function = 0x%x\n", (*ptr).blend_function); +} + +void +brw_dump_cc7(const struct brw_cc7 *ptr) +{ + debug_printf("\t\t.alpha_ref.f = %f\n", (*ptr).alpha_ref.f); + debug_printf("\t\t.alpha_ref.ub[0] = 0x%x\n", (*ptr).alpha_ref.ub[0]); + debug_printf("\t\t.alpha_ref.ub[1] = 0x%x\n", (*ptr).alpha_ref.ub[1]); + debug_printf("\t\t.alpha_ref.ub[2] = 0x%x\n", (*ptr).alpha_ref.ub[2]); + debug_printf("\t\t.alpha_ref.ub[3] = 0x%x\n", (*ptr).alpha_ref.ub[3]); +} + +void +brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr) +{ + debug_printf("\t\t.cc0.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_pass_op); + debug_printf("\t\t.cc0.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_fail_op); + debug_printf("\t\t.cc0.bf_stencil_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_fail_op); + debug_printf("\t\t.cc0.bf_stencil_func = 0x%x\n", (*ptr).cc0.bf_stencil_func); + debug_printf("\t\t.cc0.bf_stencil_enable = 0x%x\n", (*ptr).cc0.bf_stencil_enable); + debug_printf("\t\t.cc0.stencil_write_enable = 0x%x\n", (*ptr).cc0.stencil_write_enable); + debug_printf("\t\t.cc0.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_pass_op); + debug_printf("\t\t.cc0.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_fail_op); + debug_printf("\t\t.cc0.stencil_fail_op = 0x%x\n", (*ptr).cc0.stencil_fail_op); + debug_printf("\t\t.cc0.stencil_func = 0x%x\n", (*ptr).cc0.stencil_func); + debug_printf("\t\t.cc0.stencil_enable = 0x%x\n", (*ptr).cc0.stencil_enable); + debug_printf("\t\t.cc1.bf_stencil_ref = 0x%x\n", (*ptr).cc1.bf_stencil_ref); + debug_printf("\t\t.cc1.stencil_write_mask = 0x%x\n", (*ptr).cc1.stencil_write_mask); + debug_printf("\t\t.cc1.stencil_test_mask = 0x%x\n", (*ptr).cc1.stencil_test_mask); + debug_printf("\t\t.cc1.stencil_ref = 0x%x\n", (*ptr).cc1.stencil_ref); + debug_printf("\t\t.cc2.logicop_enable = 0x%x\n", (*ptr).cc2.logicop_enable); + debug_printf("\t\t.cc2.depth_write_enable = 0x%x\n", (*ptr).cc2.depth_write_enable); + debug_printf("\t\t.cc2.depth_test_function = 0x%x\n", (*ptr).cc2.depth_test_function); + debug_printf("\t\t.cc2.depth_test = 0x%x\n", (*ptr).cc2.depth_test); + debug_printf("\t\t.cc2.bf_stencil_write_mask = 0x%x\n", (*ptr).cc2.bf_stencil_write_mask); + debug_printf("\t\t.cc2.bf_stencil_test_mask = 0x%x\n", (*ptr).cc2.bf_stencil_test_mask); + debug_printf("\t\t.cc3.alpha_test_func = 0x%x\n", (*ptr).cc3.alpha_test_func); + debug_printf("\t\t.cc3.alpha_test = 0x%x\n", (*ptr).cc3.alpha_test); + debug_printf("\t\t.cc3.blend_enable = 0x%x\n", (*ptr).cc3.blend_enable); + debug_printf("\t\t.cc3.ia_blend_enable = 0x%x\n", (*ptr).cc3.ia_blend_enable); + debug_printf("\t\t.cc3.alpha_test_format = 0x%x\n", (*ptr).cc3.alpha_test_format); + debug_printf("\t\t.cc4.cc_viewport_state_offset = 0x%x\n", (*ptr).cc4.cc_viewport_state_offset); + debug_printf("\t\t.cc5.ia_dest_blend_factor = 0x%x\n", (*ptr).cc5.ia_dest_blend_factor); + debug_printf("\t\t.cc5.ia_src_blend_factor = 0x%x\n", (*ptr).cc5.ia_src_blend_factor); + debug_printf("\t\t.cc5.ia_blend_function = 0x%x\n", (*ptr).cc5.ia_blend_function); + debug_printf("\t\t.cc5.statistics_enable = 0x%x\n", (*ptr).cc5.statistics_enable); + debug_printf("\t\t.cc5.logicop_func = 0x%x\n", (*ptr).cc5.logicop_func); + debug_printf("\t\t.cc5.dither_enable = 0x%x\n", (*ptr).cc5.dither_enable); + debug_printf("\t\t.cc6.clamp_post_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_post_alpha_blend); + debug_printf("\t\t.cc6.clamp_pre_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_pre_alpha_blend); + debug_printf("\t\t.cc6.clamp_range = 0x%x\n", (*ptr).cc6.clamp_range); + debug_printf("\t\t.cc6.y_dither_offset = 0x%x\n", (*ptr).cc6.y_dither_offset); + debug_printf("\t\t.cc6.x_dither_offset = 0x%x\n", (*ptr).cc6.x_dither_offset); + debug_printf("\t\t.cc6.dest_blend_factor = 0x%x\n", (*ptr).cc6.dest_blend_factor); + debug_printf("\t\t.cc6.src_blend_factor = 0x%x\n", (*ptr).cc6.src_blend_factor); + debug_printf("\t\t.cc6.blend_function = 0x%x\n", (*ptr).cc6.blend_function); + debug_printf("\t\t.cc7.alpha_ref.f = %f\n", (*ptr).cc7.alpha_ref.f); + debug_printf("\t\t.cc7.alpha_ref.ub[0] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[0]); + debug_printf("\t\t.cc7.alpha_ref.ub[1] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[1]); + debug_printf("\t\t.cc7.alpha_ref.ub[2] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[2]); + debug_printf("\t\t.cc7.alpha_ref.ub[3] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[3]); +} + +void +brw_dump_cc_viewport(const struct brw_cc_viewport *ptr) +{ + debug_printf("\t\t.min_depth = %f\n", (*ptr).min_depth); + debug_printf("\t\t.max_depth = %f\n", (*ptr).max_depth); +} + +void +brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr) +{ + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.thread4.gs_output_stats = 0x%x\n", (*ptr).thread4.gs_output_stats); + debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); + debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); + debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); + debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); + debug_printf("\t\t.clip5.clip_mode = 0x%x\n", (*ptr).clip5.clip_mode); + debug_printf("\t\t.clip5.userclip_enable_flags = 0x%x\n", (*ptr).clip5.userclip_enable_flags); + debug_printf("\t\t.clip5.userclip_must_clip = 0x%x\n", (*ptr).clip5.userclip_must_clip); + debug_printf("\t\t.clip5.negative_w_clip_test = 0x%x\n", (*ptr).clip5.negative_w_clip_test); + debug_printf("\t\t.clip5.guard_band_enable = 0x%x\n", (*ptr).clip5.guard_band_enable); + debug_printf("\t\t.clip5.viewport_z_clip_enable = 0x%x\n", (*ptr).clip5.viewport_z_clip_enable); + debug_printf("\t\t.clip5.viewport_xy_clip_enable = 0x%x\n", (*ptr).clip5.viewport_xy_clip_enable); + debug_printf("\t\t.clip5.vertex_position_space = 0x%x\n", (*ptr).clip5.vertex_position_space); + debug_printf("\t\t.clip5.api_mode = 0x%x\n", (*ptr).clip5.api_mode); + debug_printf("\t\t.clip6.clipper_viewport_state_ptr = 0x%x\n", (*ptr).clip6.clipper_viewport_state_ptr); + debug_printf("\t\t.viewport_xmin = %f\n", (*ptr).viewport_xmin); + debug_printf("\t\t.viewport_xmax = %f\n", (*ptr).viewport_xmax); + debug_printf("\t\t.viewport_ymin = %f\n", (*ptr).viewport_ymin); + debug_printf("\t\t.viewport_ymax = %f\n", (*ptr).viewport_ymax); +} + +void +brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr) +{ + debug_printf("\t\t.xmin = %f\n", (*ptr).xmin); + debug_printf("\t\t.xmax = %f\n", (*ptr).xmax); + debug_printf("\t\t.ymin = %f\n", (*ptr).ymin); + debug_printf("\t\t.ymax = %f\n", (*ptr).ymax); +} + +void +brw_dump_constant_buffer(const struct brw_constant_buffer *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.valid = 0x%x\n", (*ptr).header.valid); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.buffer_length = 0x%x\n", (*ptr).bits0.buffer_length); + debug_printf("\t\t.bits0.buffer_address = 0x%x\n", (*ptr).bits0.buffer_address); +} + +void +brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.nr_urb_entries = 0x%x\n", (*ptr).bits0.nr_urb_entries); + debug_printf("\t\t.bits0.urb_entry_size = 0x%x\n", (*ptr).bits0.urb_entry_size); +} + +void +brw_dump_depthbuffer(const struct brw_depthbuffer *ptr) +{ + debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length); + debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode); + debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch); + debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format); + debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode); + debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable); + debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk); + debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface); + debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type); + debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr); + debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout); + debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod); + debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width); + debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height); + debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element); + debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth); +} + +void +brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr) +{ + debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length); + debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode); + debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch); + debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format); + debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode); + debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable); + debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk); + debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface); + debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type); + debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr); + debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout); + debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod); + debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width); + debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height); + debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element); + debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth); + debug_printf("\t\t.dword5.bits.xoffset = 0x%x\n", (*ptr).dword5.bits.xoffset); + debug_printf("\t\t.dword5.bits.yoffset = 0x%x\n", (*ptr).dword5.bits.yoffset); +} + +void +brw_dump_drawrect(const struct brw_drawrect *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.xmin = 0x%x\n", (*ptr).xmin); + debug_printf("\t\t.ymin = 0x%x\n", (*ptr).ymin); + debug_printf("\t\t.xmax = 0x%x\n", (*ptr).xmax); + debug_printf("\t\t.ymax = 0x%x\n", (*ptr).ymax); + debug_printf("\t\t.xorg = 0x%x\n", (*ptr).xorg); + debug_printf("\t\t.yorg = 0x%x\n", (*ptr).yorg); +} + +void +brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.depth_offset_clamp = %f\n", (*ptr).depth_offset_clamp); +} + +void +brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr) +{ + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.thread4.rendering_enable = 0x%x\n", (*ptr).thread4.rendering_enable); + debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); + debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); + debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); + debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); + debug_printf("\t\t.gs5.sampler_count = 0x%x\n", (*ptr).gs5.sampler_count); + debug_printf("\t\t.gs5.sampler_state_pointer = 0x%x\n", (*ptr).gs5.sampler_state_pointer); + debug_printf("\t\t.gs6.max_vp_index = 0x%x\n", (*ptr).gs6.max_vp_index); + debug_printf("\t\t.gs6.svbi_post_inc_value = 0x%x\n", (*ptr).gs6.svbi_post_inc_value); + debug_printf("\t\t.gs6.svbi_post_inc_enable = 0x%x\n", (*ptr).gs6.svbi_post_inc_enable); + debug_printf("\t\t.gs6.svbi_payload = 0x%x\n", (*ptr).gs6.svbi_payload); + debug_printf("\t\t.gs6.discard_adjaceny = 0x%x\n", (*ptr).gs6.discard_adjaceny); + debug_printf("\t\t.gs6.reorder_enable = 0x%x\n", (*ptr).gs6.reorder_enable); +} + +void +brw_dump_indexbuffer(const struct brw_indexbuffer *ptr) +{ + debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length); + debug_printf("\t\t.header.bits.index_format = 0x%x\n", (*ptr).header.bits.index_format); + debug_printf("\t\t.header.bits.cut_index_enable = 0x%x\n", (*ptr).header.bits.cut_index_enable); + debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode); + debug_printf("\t\t.buffer_start = 0x%x\n", (*ptr).buffer_start); + debug_printf("\t\t.buffer_end = 0x%x\n", (*ptr).buffer_end); +} + +void +brw_dump_line_stipple(const struct brw_line_stipple *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.pattern = 0x%x\n", (*ptr).bits0.pattern); + debug_printf("\t\t.bits1.repeat_count = 0x%x\n", (*ptr).bits1.repeat_count); + debug_printf("\t\t.bits1.inverse_repeat_count = 0x%x\n", (*ptr).bits1.inverse_repeat_count); +} + +void +brw_dump_mi_flush(const struct brw_mi_flush *ptr) +{ + debug_printf("\t\t.flags = 0x%x\n", (*ptr).flags); + debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode); +} + +void +brw_dump_pipe_control(const struct brw_pipe_control *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable); + debug_printf("\t\t.header.texture_cache_flush_enable = 0x%x\n", (*ptr).header.texture_cache_flush_enable); + debug_printf("\t\t.header.indirect_state_pointers_disable = 0x%x\n", (*ptr).header.indirect_state_pointers_disable); + debug_printf("\t\t.header.instruction_state_cache_flush_enable = 0x%x\n", (*ptr).header.instruction_state_cache_flush_enable); + debug_printf("\t\t.header.write_cache_flush_enable = 0x%x\n", (*ptr).header.write_cache_flush_enable); + debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable); + debug_printf("\t\t.header.post_sync_operation = 0x%x\n", (*ptr).header.post_sync_operation); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits1.dest_addr_type = 0x%x\n", (*ptr).bits1.dest_addr_type); + debug_printf("\t\t.bits1.dest_addr = 0x%x\n", (*ptr).bits1.dest_addr); + debug_printf("\t\t.data0 = 0x%x\n", (*ptr).data0); + debug_printf("\t\t.data1 = 0x%x\n", (*ptr).data1); +} + +void +brw_dump_pipeline_select(const struct brw_pipeline_select *ptr) +{ + debug_printf("\t\t.header.pipeline_select = 0x%x\n", (*ptr).header.pipeline_select); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); +} + +void +brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.vs.offset = 0x%x\n", (*ptr).vs.offset); + debug_printf("\t\t.gs.enable = 0x%x\n", (*ptr).gs.enable); + debug_printf("\t\t.gs.offset = 0x%x\n", (*ptr).gs.offset); + debug_printf("\t\t.clp.enable = 0x%x\n", (*ptr).clp.enable); + debug_printf("\t\t.clp.offset = 0x%x\n", (*ptr).clp.offset); + debug_printf("\t\t.sf.offset = 0x%x\n", (*ptr).sf.offset); + debug_printf("\t\t.wm.offset = 0x%x\n", (*ptr).wm.offset); + debug_printf("\t\t.cc.offset = 0x%x\n", (*ptr).cc.offset); +} + +void +brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.stipple[0] = 0x%x\n", (*ptr).stipple[0]); + debug_printf("\t\t.stipple[1] = 0x%x\n", (*ptr).stipple[1]); + debug_printf("\t\t.stipple[2] = 0x%x\n", (*ptr).stipple[2]); + debug_printf("\t\t.stipple[3] = 0x%x\n", (*ptr).stipple[3]); + debug_printf("\t\t.stipple[4] = 0x%x\n", (*ptr).stipple[4]); + debug_printf("\t\t.stipple[5] = 0x%x\n", (*ptr).stipple[5]); + debug_printf("\t\t.stipple[6] = 0x%x\n", (*ptr).stipple[6]); + debug_printf("\t\t.stipple[7] = 0x%x\n", (*ptr).stipple[7]); + debug_printf("\t\t.stipple[8] = 0x%x\n", (*ptr).stipple[8]); + debug_printf("\t\t.stipple[9] = 0x%x\n", (*ptr).stipple[9]); + debug_printf("\t\t.stipple[10] = 0x%x\n", (*ptr).stipple[10]); + debug_printf("\t\t.stipple[11] = 0x%x\n", (*ptr).stipple[11]); + debug_printf("\t\t.stipple[12] = 0x%x\n", (*ptr).stipple[12]); + debug_printf("\t\t.stipple[13] = 0x%x\n", (*ptr).stipple[13]); + debug_printf("\t\t.stipple[14] = 0x%x\n", (*ptr).stipple[14]); + debug_printf("\t\t.stipple[15] = 0x%x\n", (*ptr).stipple[15]); + debug_printf("\t\t.stipple[16] = 0x%x\n", (*ptr).stipple[16]); + debug_printf("\t\t.stipple[17] = 0x%x\n", (*ptr).stipple[17]); + debug_printf("\t\t.stipple[18] = 0x%x\n", (*ptr).stipple[18]); + debug_printf("\t\t.stipple[19] = 0x%x\n", (*ptr).stipple[19]); + debug_printf("\t\t.stipple[20] = 0x%x\n", (*ptr).stipple[20]); + debug_printf("\t\t.stipple[21] = 0x%x\n", (*ptr).stipple[21]); + debug_printf("\t\t.stipple[22] = 0x%x\n", (*ptr).stipple[22]); + debug_printf("\t\t.stipple[23] = 0x%x\n", (*ptr).stipple[23]); + debug_printf("\t\t.stipple[24] = 0x%x\n", (*ptr).stipple[24]); + debug_printf("\t\t.stipple[25] = 0x%x\n", (*ptr).stipple[25]); + debug_printf("\t\t.stipple[26] = 0x%x\n", (*ptr).stipple[26]); + debug_printf("\t\t.stipple[27] = 0x%x\n", (*ptr).stipple[27]); + debug_printf("\t\t.stipple[28] = 0x%x\n", (*ptr).stipple[28]); + debug_printf("\t\t.stipple[29] = 0x%x\n", (*ptr).stipple[29]); + debug_printf("\t\t.stipple[30] = 0x%x\n", (*ptr).stipple[30]); + debug_printf("\t\t.stipple[31] = 0x%x\n", (*ptr).stipple[31]); +} + +void +brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.y_offset = 0x%x\n", (*ptr).bits0.y_offset); + debug_printf("\t\t.bits0.x_offset = 0x%x\n", (*ptr).bits0.x_offset); +} + +void +brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr) +{ + debug_printf("\t\t.color[0] = %f\n", (*ptr).color[0]); + debug_printf("\t\t.color[1] = %f\n", (*ptr).color[1]); + debug_printf("\t\t.color[2] = %f\n", (*ptr).color[2]); + debug_printf("\t\t.color[3] = %f\n", (*ptr).color[3]); +} + +void +brw_dump_sampler_state(const struct brw_sampler_state *ptr) +{ + debug_printf("\t\t.ss0.shadow_function = 0x%x\n", (*ptr).ss0.shadow_function); + debug_printf("\t\t.ss0.lod_bias = 0x%x\n", (*ptr).ss0.lod_bias); + debug_printf("\t\t.ss0.min_filter = 0x%x\n", (*ptr).ss0.min_filter); + debug_printf("\t\t.ss0.mag_filter = 0x%x\n", (*ptr).ss0.mag_filter); + debug_printf("\t\t.ss0.mip_filter = 0x%x\n", (*ptr).ss0.mip_filter); + debug_printf("\t\t.ss0.base_level = 0x%x\n", (*ptr).ss0.base_level); + debug_printf("\t\t.ss0.lod_preclamp = 0x%x\n", (*ptr).ss0.lod_preclamp); + debug_printf("\t\t.ss0.default_color_mode = 0x%x\n", (*ptr).ss0.default_color_mode); + debug_printf("\t\t.ss0.disable = 0x%x\n", (*ptr).ss0.disable); + debug_printf("\t\t.ss1.r_wrap_mode = 0x%x\n", (*ptr).ss1.r_wrap_mode); + debug_printf("\t\t.ss1.t_wrap_mode = 0x%x\n", (*ptr).ss1.t_wrap_mode); + debug_printf("\t\t.ss1.s_wrap_mode = 0x%x\n", (*ptr).ss1.s_wrap_mode); + debug_printf("\t\t.ss1.max_lod = 0x%x\n", (*ptr).ss1.max_lod); + debug_printf("\t\t.ss1.min_lod = 0x%x\n", (*ptr).ss1.min_lod); + debug_printf("\t\t.ss2.default_color_pointer = 0x%x\n", (*ptr).ss2.default_color_pointer); + debug_printf("\t\t.ss3.max_aniso = 0x%x\n", (*ptr).ss3.max_aniso); + debug_printf("\t\t.ss3.chroma_key_mode = 0x%x\n", (*ptr).ss3.chroma_key_mode); + debug_printf("\t\t.ss3.chroma_key_index = 0x%x\n", (*ptr).ss3.chroma_key_index); + debug_printf("\t\t.ss3.chroma_key_enable = 0x%x\n", (*ptr).ss3.chroma_key_enable); + debug_printf("\t\t.ss3.monochrome_filter_width = 0x%x\n", (*ptr).ss3.monochrome_filter_width); + debug_printf("\t\t.ss3.monochrome_filter_height = 0x%x\n", (*ptr).ss3.monochrome_filter_height); +} + +void +brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr) +{ + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); + debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); + debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); + debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); + debug_printf("\t\t.sf5.front_winding = 0x%x\n", (*ptr).sf5.front_winding); + debug_printf("\t\t.sf5.viewport_transform = 0x%x\n", (*ptr).sf5.viewport_transform); + debug_printf("\t\t.sf5.sf_viewport_state_offset = 0x%x\n", (*ptr).sf5.sf_viewport_state_offset); + debug_printf("\t\t.sf6.dest_org_vbias = 0x%x\n", (*ptr).sf6.dest_org_vbias); + debug_printf("\t\t.sf6.dest_org_hbias = 0x%x\n", (*ptr).sf6.dest_org_hbias); + debug_printf("\t\t.sf6.scissor = 0x%x\n", (*ptr).sf6.scissor); + debug_printf("\t\t.sf6.disable_2x2_trifilter = 0x%x\n", (*ptr).sf6.disable_2x2_trifilter); + debug_printf("\t\t.sf6.disable_zero_pix_trifilter = 0x%x\n", (*ptr).sf6.disable_zero_pix_trifilter); + debug_printf("\t\t.sf6.point_rast_rule = 0x%x\n", (*ptr).sf6.point_rast_rule); + debug_printf("\t\t.sf6.line_endcap_aa_region_width = 0x%x\n", (*ptr).sf6.line_endcap_aa_region_width); + debug_printf("\t\t.sf6.line_width = 0x%x\n", (*ptr).sf6.line_width); + debug_printf("\t\t.sf6.fast_scissor_disable = 0x%x\n", (*ptr).sf6.fast_scissor_disable); + debug_printf("\t\t.sf6.cull_mode = 0x%x\n", (*ptr).sf6.cull_mode); + debug_printf("\t\t.sf6.aa_enable = 0x%x\n", (*ptr).sf6.aa_enable); + debug_printf("\t\t.sf7.point_size = 0x%x\n", (*ptr).sf7.point_size); + debug_printf("\t\t.sf7.use_point_size_state = 0x%x\n", (*ptr).sf7.use_point_size_state); + debug_printf("\t\t.sf7.subpixel_precision = 0x%x\n", (*ptr).sf7.subpixel_precision); + debug_printf("\t\t.sf7.sprite_point = 0x%x\n", (*ptr).sf7.sprite_point); + debug_printf("\t\t.sf7.aa_line_distance_mode = 0x%x\n", (*ptr).sf7.aa_line_distance_mode); + debug_printf("\t\t.sf7.trifan_pv = 0x%x\n", (*ptr).sf7.trifan_pv); + debug_printf("\t\t.sf7.linestrip_pv = 0x%x\n", (*ptr).sf7.linestrip_pv); + debug_printf("\t\t.sf7.tristrip_pv = 0x%x\n", (*ptr).sf7.tristrip_pv); + debug_printf("\t\t.sf7.line_last_pixel_enable = 0x%x\n", (*ptr).sf7.line_last_pixel_enable); +} + +void +brw_dump_sf_viewport(const struct brw_sf_viewport *ptr) +{ + debug_printf("\t\t.viewport.m00 = %f\n", (*ptr).viewport.m00); + debug_printf("\t\t.viewport.m11 = %f\n", (*ptr).viewport.m11); + debug_printf("\t\t.viewport.m22 = %f\n", (*ptr).viewport.m22); + debug_printf("\t\t.viewport.m30 = %f\n", (*ptr).viewport.m30); + debug_printf("\t\t.viewport.m31 = %f\n", (*ptr).viewport.m31); + debug_printf("\t\t.viewport.m32 = %f\n", (*ptr).viewport.m32); + debug_printf("\t\t.scissor.xmin = 0x%x\n", (*ptr).scissor.xmin); + debug_printf("\t\t.scissor.ymin = 0x%x\n", (*ptr).scissor.ymin); + debug_printf("\t\t.scissor.xmax = 0x%x\n", (*ptr).scissor.xmax); + debug_printf("\t\t.scissor.ymax = 0x%x\n", (*ptr).scissor.ymax); +} + +void +brw_dump_ss0(const struct brw_ss0 *ptr) +{ + debug_printf("\t\t.shadow_function = 0x%x\n", (*ptr).shadow_function); + debug_printf("\t\t.lod_bias = 0x%x\n", (*ptr).lod_bias); + debug_printf("\t\t.min_filter = 0x%x\n", (*ptr).min_filter); + debug_printf("\t\t.mag_filter = 0x%x\n", (*ptr).mag_filter); + debug_printf("\t\t.mip_filter = 0x%x\n", (*ptr).mip_filter); + debug_printf("\t\t.base_level = 0x%x\n", (*ptr).base_level); + debug_printf("\t\t.lod_preclamp = 0x%x\n", (*ptr).lod_preclamp); + debug_printf("\t\t.default_color_mode = 0x%x\n", (*ptr).default_color_mode); + debug_printf("\t\t.disable = 0x%x\n", (*ptr).disable); +} + +void +brw_dump_ss1(const struct brw_ss1 *ptr) +{ + debug_printf("\t\t.r_wrap_mode = 0x%x\n", (*ptr).r_wrap_mode); + debug_printf("\t\t.t_wrap_mode = 0x%x\n", (*ptr).t_wrap_mode); + debug_printf("\t\t.s_wrap_mode = 0x%x\n", (*ptr).s_wrap_mode); + debug_printf("\t\t.max_lod = 0x%x\n", (*ptr).max_lod); + debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod); +} + +void +brw_dump_ss2(const struct brw_ss2 *ptr) +{ + debug_printf("\t\t.default_color_pointer = 0x%x\n", (*ptr).default_color_pointer); +} + +void +brw_dump_ss3(const struct brw_ss3 *ptr) +{ + debug_printf("\t\t.max_aniso = 0x%x\n", (*ptr).max_aniso); + debug_printf("\t\t.chroma_key_mode = 0x%x\n", (*ptr).chroma_key_mode); + debug_printf("\t\t.chroma_key_index = 0x%x\n", (*ptr).chroma_key_index); + debug_printf("\t\t.chroma_key_enable = 0x%x\n", (*ptr).chroma_key_enable); + debug_printf("\t\t.monochrome_filter_width = 0x%x\n", (*ptr).monochrome_filter_width); + debug_printf("\t\t.monochrome_filter_height = 0x%x\n", (*ptr).monochrome_filter_height); +} + +void +brw_dump_state_base_address(const struct brw_state_base_address *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.modify_enable = 0x%x\n", (*ptr).bits0.modify_enable); + debug_printf("\t\t.bits0.general_state_address = 0x%x\n", (*ptr).bits0.general_state_address); + debug_printf("\t\t.bits1.modify_enable = 0x%x\n", (*ptr).bits1.modify_enable); + debug_printf("\t\t.bits1.surface_state_address = 0x%x\n", (*ptr).bits1.surface_state_address); + debug_printf("\t\t.bits2.modify_enable = 0x%x\n", (*ptr).bits2.modify_enable); + debug_printf("\t\t.bits2.indirect_object_state_address = 0x%x\n", (*ptr).bits2.indirect_object_state_address); + debug_printf("\t\t.bits3.modify_enable = 0x%x\n", (*ptr).bits3.modify_enable); + debug_printf("\t\t.bits3.general_state_upper_bound = 0x%x\n", (*ptr).bits3.general_state_upper_bound); + debug_printf("\t\t.bits4.modify_enable = 0x%x\n", (*ptr).bits4.modify_enable); + debug_printf("\t\t.bits4.indirect_object_state_upper_bound = 0x%x\n", (*ptr).bits4.indirect_object_state_upper_bound); +} + +void +brw_dump_state_prefetch(const struct brw_state_prefetch *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.prefetch_count = 0x%x\n", (*ptr).bits0.prefetch_count); + debug_printf("\t\t.bits0.prefetch_pointer = 0x%x\n", (*ptr).bits0.prefetch_pointer); +} + +void +brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr) +{ + debug_printf("\t\t.cube_pos_z = 0x%x\n", (*ptr).cube_pos_z); + debug_printf("\t\t.cube_neg_z = 0x%x\n", (*ptr).cube_neg_z); + debug_printf("\t\t.cube_pos_y = 0x%x\n", (*ptr).cube_pos_y); + debug_printf("\t\t.cube_neg_y = 0x%x\n", (*ptr).cube_neg_y); + debug_printf("\t\t.cube_pos_x = 0x%x\n", (*ptr).cube_pos_x); + debug_printf("\t\t.cube_neg_x = 0x%x\n", (*ptr).cube_neg_x); + debug_printf("\t\t.mipmap_layout_mode = 0x%x\n", (*ptr).mipmap_layout_mode); + debug_printf("\t\t.vert_line_stride_ofs = 0x%x\n", (*ptr).vert_line_stride_ofs); + debug_printf("\t\t.vert_line_stride = 0x%x\n", (*ptr).vert_line_stride); + debug_printf("\t\t.color_blend = 0x%x\n", (*ptr).color_blend); + debug_printf("\t\t.writedisable_blue = 0x%x\n", (*ptr).writedisable_blue); + debug_printf("\t\t.writedisable_green = 0x%x\n", (*ptr).writedisable_green); + debug_printf("\t\t.writedisable_red = 0x%x\n", (*ptr).writedisable_red); + debug_printf("\t\t.writedisable_alpha = 0x%x\n", (*ptr).writedisable_alpha); + debug_printf("\t\t.surface_format = 0x%x\n", (*ptr).surface_format); + debug_printf("\t\t.data_return_format = 0x%x\n", (*ptr).data_return_format); + debug_printf("\t\t.surface_type = 0x%x\n", (*ptr).surface_type); +} + +void +brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr) +{ + debug_printf("\t\t.base_addr = 0x%x\n", (*ptr).base_addr); +} + +void +brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr) +{ + debug_printf("\t\t.mip_count = 0x%x\n", (*ptr).mip_count); + debug_printf("\t\t.width = 0x%x\n", (*ptr).width); + debug_printf("\t\t.height = 0x%x\n", (*ptr).height); +} + +void +brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr) +{ + debug_printf("\t\t.tile_walk = 0x%x\n", (*ptr).tile_walk); + debug_printf("\t\t.tiled_surface = 0x%x\n", (*ptr).tiled_surface); + debug_printf("\t\t.pitch = 0x%x\n", (*ptr).pitch); + debug_printf("\t\t.depth = 0x%x\n", (*ptr).depth); +} + +void +brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr) +{ + debug_printf("\t\t.multisample_position_palette_index = 0x%x\n", (*ptr).multisample_position_palette_index); + debug_printf("\t\t.num_multisamples = 0x%x\n", (*ptr).num_multisamples); + debug_printf("\t\t.render_target_view_extent = 0x%x\n", (*ptr).render_target_view_extent); + debug_printf("\t\t.min_array_elt = 0x%x\n", (*ptr).min_array_elt); + debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod); +} + +void +brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr) +{ + debug_printf("\t\t.llc_mapping = 0x%x\n", (*ptr).llc_mapping); + debug_printf("\t\t.mlc_mapping = 0x%x\n", (*ptr).mlc_mapping); + debug_printf("\t\t.gfdt = 0x%x\n", (*ptr).gfdt); + debug_printf("\t\t.gfdt_src = 0x%x\n", (*ptr).gfdt_src); + debug_printf("\t\t.y_offset = 0x%x\n", (*ptr).y_offset); + debug_printf("\t\t.x_offset = 0x%x\n", (*ptr).x_offset); +} + +void +brw_dump_surface_state(const struct brw_surface_state *ptr) +{ + debug_printf("\t\t.ss0.cube_pos_z = 0x%x\n", (*ptr).ss0.cube_pos_z); + debug_printf("\t\t.ss0.cube_neg_z = 0x%x\n", (*ptr).ss0.cube_neg_z); + debug_printf("\t\t.ss0.cube_pos_y = 0x%x\n", (*ptr).ss0.cube_pos_y); + debug_printf("\t\t.ss0.cube_neg_y = 0x%x\n", (*ptr).ss0.cube_neg_y); + debug_printf("\t\t.ss0.cube_pos_x = 0x%x\n", (*ptr).ss0.cube_pos_x); + debug_printf("\t\t.ss0.cube_neg_x = 0x%x\n", (*ptr).ss0.cube_neg_x); + debug_printf("\t\t.ss0.mipmap_layout_mode = 0x%x\n", (*ptr).ss0.mipmap_layout_mode); + debug_printf("\t\t.ss0.vert_line_stride_ofs = 0x%x\n", (*ptr).ss0.vert_line_stride_ofs); + debug_printf("\t\t.ss0.vert_line_stride = 0x%x\n", (*ptr).ss0.vert_line_stride); + debug_printf("\t\t.ss0.color_blend = 0x%x\n", (*ptr).ss0.color_blend); + debug_printf("\t\t.ss0.writedisable_blue = 0x%x\n", (*ptr).ss0.writedisable_blue); + debug_printf("\t\t.ss0.writedisable_green = 0x%x\n", (*ptr).ss0.writedisable_green); + debug_printf("\t\t.ss0.writedisable_red = 0x%x\n", (*ptr).ss0.writedisable_red); + debug_printf("\t\t.ss0.writedisable_alpha = 0x%x\n", (*ptr).ss0.writedisable_alpha); + debug_printf("\t\t.ss0.surface_format = 0x%x\n", (*ptr).ss0.surface_format); + debug_printf("\t\t.ss0.data_return_format = 0x%x\n", (*ptr).ss0.data_return_format); + debug_printf("\t\t.ss0.surface_type = 0x%x\n", (*ptr).ss0.surface_type); + debug_printf("\t\t.ss1.base_addr = 0x%x\n", (*ptr).ss1.base_addr); + debug_printf("\t\t.ss2.mip_count = 0x%x\n", (*ptr).ss2.mip_count); + debug_printf("\t\t.ss2.width = 0x%x\n", (*ptr).ss2.width); + debug_printf("\t\t.ss2.height = 0x%x\n", (*ptr).ss2.height); + debug_printf("\t\t.ss3.tile_walk = 0x%x\n", (*ptr).ss3.tile_walk); + debug_printf("\t\t.ss3.tiled_surface = 0x%x\n", (*ptr).ss3.tiled_surface); + debug_printf("\t\t.ss3.pitch = 0x%x\n", (*ptr).ss3.pitch); + debug_printf("\t\t.ss3.depth = 0x%x\n", (*ptr).ss3.depth); + debug_printf("\t\t.ss4.multisample_position_palette_index = 0x%x\n", (*ptr).ss4.multisample_position_palette_index); + debug_printf("\t\t.ss4.num_multisamples = 0x%x\n", (*ptr).ss4.num_multisamples); + debug_printf("\t\t.ss4.render_target_view_extent = 0x%x\n", (*ptr).ss4.render_target_view_extent); + debug_printf("\t\t.ss4.min_array_elt = 0x%x\n", (*ptr).ss4.min_array_elt); + debug_printf("\t\t.ss4.min_lod = 0x%x\n", (*ptr).ss4.min_lod); + debug_printf("\t\t.ss5.llc_mapping = 0x%x\n", (*ptr).ss5.llc_mapping); + debug_printf("\t\t.ss5.mlc_mapping = 0x%x\n", (*ptr).ss5.mlc_mapping); + debug_printf("\t\t.ss5.gfdt = 0x%x\n", (*ptr).ss5.gfdt); + debug_printf("\t\t.ss5.gfdt_src = 0x%x\n", (*ptr).ss5.gfdt_src); + debug_printf("\t\t.ss5.y_offset = 0x%x\n", (*ptr).ss5.y_offset); + debug_printf("\t\t.ss5.x_offset = 0x%x\n", (*ptr).ss5.x_offset); +} + +void +brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.system_instruction_pointer = 0x%x\n", (*ptr).bits0.system_instruction_pointer); +} + +void +brw_dump_urb_fence(const struct brw_urb_fence *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.vs_realloc = 0x%x\n", (*ptr).header.vs_realloc); + debug_printf("\t\t.header.gs_realloc = 0x%x\n", (*ptr).header.gs_realloc); + debug_printf("\t\t.header.clp_realloc = 0x%x\n", (*ptr).header.clp_realloc); + debug_printf("\t\t.header.sf_realloc = 0x%x\n", (*ptr).header.sf_realloc); + debug_printf("\t\t.header.vfe_realloc = 0x%x\n", (*ptr).header.vfe_realloc); + debug_printf("\t\t.header.cs_realloc = 0x%x\n", (*ptr).header.cs_realloc); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.vs_fence = 0x%x\n", (*ptr).bits0.vs_fence); + debug_printf("\t\t.bits0.gs_fence = 0x%x\n", (*ptr).bits0.gs_fence); + debug_printf("\t\t.bits0.clp_fence = 0x%x\n", (*ptr).bits0.clp_fence); + debug_printf("\t\t.bits1.sf_fence = 0x%x\n", (*ptr).bits1.sf_fence); + debug_printf("\t\t.bits1.vf_fence = 0x%x\n", (*ptr).bits1.vf_fence); + debug_printf("\t\t.bits1.cs_fence = 0x%x\n", (*ptr).bits1.cs_fence); +} + +void +brw_dump_urb_immediate(const struct brw_urb_immediate *ptr) +{ + debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode); + debug_printf("\t\t.offset = 0x%x\n", (*ptr).offset); + debug_printf("\t\t.swizzle_control = 0x%x\n", (*ptr).swizzle_control); + debug_printf("\t\t.allocate = 0x%x\n", (*ptr).allocate); + debug_printf("\t\t.used = 0x%x\n", (*ptr).used); + debug_printf("\t\t.complete = 0x%x\n", (*ptr).complete); + debug_printf("\t\t.response_length = 0x%x\n", (*ptr).response_length); + debug_printf("\t\t.msg_length = 0x%x\n", (*ptr).msg_length); + debug_printf("\t\t.msg_target = 0x%x\n", (*ptr).msg_target); + debug_printf("\t\t.end_of_thread = 0x%x\n", (*ptr).end_of_thread); +} + +void +brw_dump_vb_array_state(const struct brw_vb_array_state *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.vb[0].vb0.pitch = 0x%x\n", (*ptr).vb[0].vb0.pitch); + debug_printf("\t\t.vb[0].vb0.access_type = 0x%x\n", (*ptr).vb[0].vb0.access_type); + debug_printf("\t\t.vb[0].vb0.vb_index = 0x%x\n", (*ptr).vb[0].vb0.vb_index); + debug_printf("\t\t.vb[0].start_addr = 0x%x\n", (*ptr).vb[0].start_addr); + debug_printf("\t\t.vb[0].max_index = 0x%x\n", (*ptr).vb[0].max_index); + debug_printf("\t\t.vb[0].instance_data_step_rate = 0x%x\n", (*ptr).vb[0].instance_data_step_rate); + debug_printf("\t\t.vb[1].vb0.pitch = 0x%x\n", (*ptr).vb[1].vb0.pitch); + debug_printf("\t\t.vb[1].vb0.access_type = 0x%x\n", (*ptr).vb[1].vb0.access_type); + debug_printf("\t\t.vb[1].vb0.vb_index = 0x%x\n", (*ptr).vb[1].vb0.vb_index); + debug_printf("\t\t.vb[1].start_addr = 0x%x\n", (*ptr).vb[1].start_addr); + debug_printf("\t\t.vb[1].max_index = 0x%x\n", (*ptr).vb[1].max_index); + debug_printf("\t\t.vb[1].instance_data_step_rate = 0x%x\n", (*ptr).vb[1].instance_data_step_rate); + debug_printf("\t\t.vb[2].vb0.pitch = 0x%x\n", (*ptr).vb[2].vb0.pitch); + debug_printf("\t\t.vb[2].vb0.access_type = 0x%x\n", (*ptr).vb[2].vb0.access_type); + debug_printf("\t\t.vb[2].vb0.vb_index = 0x%x\n", (*ptr).vb[2].vb0.vb_index); + debug_printf("\t\t.vb[2].start_addr = 0x%x\n", (*ptr).vb[2].start_addr); + debug_printf("\t\t.vb[2].max_index = 0x%x\n", (*ptr).vb[2].max_index); + debug_printf("\t\t.vb[2].instance_data_step_rate = 0x%x\n", (*ptr).vb[2].instance_data_step_rate); + debug_printf("\t\t.vb[3].vb0.pitch = 0x%x\n", (*ptr).vb[3].vb0.pitch); + debug_printf("\t\t.vb[3].vb0.access_type = 0x%x\n", (*ptr).vb[3].vb0.access_type); + debug_printf("\t\t.vb[3].vb0.vb_index = 0x%x\n", (*ptr).vb[3].vb0.vb_index); + debug_printf("\t\t.vb[3].start_addr = 0x%x\n", (*ptr).vb[3].start_addr); + debug_printf("\t\t.vb[3].max_index = 0x%x\n", (*ptr).vb[3].max_index); + debug_printf("\t\t.vb[3].instance_data_step_rate = 0x%x\n", (*ptr).vb[3].instance_data_step_rate); + debug_printf("\t\t.vb[4].vb0.pitch = 0x%x\n", (*ptr).vb[4].vb0.pitch); + debug_printf("\t\t.vb[4].vb0.access_type = 0x%x\n", (*ptr).vb[4].vb0.access_type); + debug_printf("\t\t.vb[4].vb0.vb_index = 0x%x\n", (*ptr).vb[4].vb0.vb_index); + debug_printf("\t\t.vb[4].start_addr = 0x%x\n", (*ptr).vb[4].start_addr); + debug_printf("\t\t.vb[4].max_index = 0x%x\n", (*ptr).vb[4].max_index); + debug_printf("\t\t.vb[4].instance_data_step_rate = 0x%x\n", (*ptr).vb[4].instance_data_step_rate); + debug_printf("\t\t.vb[5].vb0.pitch = 0x%x\n", (*ptr).vb[5].vb0.pitch); + debug_printf("\t\t.vb[5].vb0.access_type = 0x%x\n", (*ptr).vb[5].vb0.access_type); + debug_printf("\t\t.vb[5].vb0.vb_index = 0x%x\n", (*ptr).vb[5].vb0.vb_index); + debug_printf("\t\t.vb[5].start_addr = 0x%x\n", (*ptr).vb[5].start_addr); + debug_printf("\t\t.vb[5].max_index = 0x%x\n", (*ptr).vb[5].max_index); + debug_printf("\t\t.vb[5].instance_data_step_rate = 0x%x\n", (*ptr).vb[5].instance_data_step_rate); + debug_printf("\t\t.vb[6].vb0.pitch = 0x%x\n", (*ptr).vb[6].vb0.pitch); + debug_printf("\t\t.vb[6].vb0.access_type = 0x%x\n", (*ptr).vb[6].vb0.access_type); + debug_printf("\t\t.vb[6].vb0.vb_index = 0x%x\n", (*ptr).vb[6].vb0.vb_index); + debug_printf("\t\t.vb[6].start_addr = 0x%x\n", (*ptr).vb[6].start_addr); + debug_printf("\t\t.vb[6].max_index = 0x%x\n", (*ptr).vb[6].max_index); + debug_printf("\t\t.vb[6].instance_data_step_rate = 0x%x\n", (*ptr).vb[6].instance_data_step_rate); + debug_printf("\t\t.vb[7].vb0.pitch = 0x%x\n", (*ptr).vb[7].vb0.pitch); + debug_printf("\t\t.vb[7].vb0.access_type = 0x%x\n", (*ptr).vb[7].vb0.access_type); + debug_printf("\t\t.vb[7].vb0.vb_index = 0x%x\n", (*ptr).vb[7].vb0.vb_index); + debug_printf("\t\t.vb[7].start_addr = 0x%x\n", (*ptr).vb[7].start_addr); + debug_printf("\t\t.vb[7].max_index = 0x%x\n", (*ptr).vb[7].max_index); + debug_printf("\t\t.vb[7].instance_data_step_rate = 0x%x\n", (*ptr).vb[7].instance_data_step_rate); + debug_printf("\t\t.vb[8].vb0.pitch = 0x%x\n", (*ptr).vb[8].vb0.pitch); + debug_printf("\t\t.vb[8].vb0.access_type = 0x%x\n", (*ptr).vb[8].vb0.access_type); + debug_printf("\t\t.vb[8].vb0.vb_index = 0x%x\n", (*ptr).vb[8].vb0.vb_index); + debug_printf("\t\t.vb[8].start_addr = 0x%x\n", (*ptr).vb[8].start_addr); + debug_printf("\t\t.vb[8].max_index = 0x%x\n", (*ptr).vb[8].max_index); + debug_printf("\t\t.vb[8].instance_data_step_rate = 0x%x\n", (*ptr).vb[8].instance_data_step_rate); + debug_printf("\t\t.vb[9].vb0.pitch = 0x%x\n", (*ptr).vb[9].vb0.pitch); + debug_printf("\t\t.vb[9].vb0.access_type = 0x%x\n", (*ptr).vb[9].vb0.access_type); + debug_printf("\t\t.vb[9].vb0.vb_index = 0x%x\n", (*ptr).vb[9].vb0.vb_index); + debug_printf("\t\t.vb[9].start_addr = 0x%x\n", (*ptr).vb[9].start_addr); + debug_printf("\t\t.vb[9].max_index = 0x%x\n", (*ptr).vb[9].max_index); + debug_printf("\t\t.vb[9].instance_data_step_rate = 0x%x\n", (*ptr).vb[9].instance_data_step_rate); + debug_printf("\t\t.vb[10].vb0.pitch = 0x%x\n", (*ptr).vb[10].vb0.pitch); + debug_printf("\t\t.vb[10].vb0.access_type = 0x%x\n", (*ptr).vb[10].vb0.access_type); + debug_printf("\t\t.vb[10].vb0.vb_index = 0x%x\n", (*ptr).vb[10].vb0.vb_index); + debug_printf("\t\t.vb[10].start_addr = 0x%x\n", (*ptr).vb[10].start_addr); + debug_printf("\t\t.vb[10].max_index = 0x%x\n", (*ptr).vb[10].max_index); + debug_printf("\t\t.vb[10].instance_data_step_rate = 0x%x\n", (*ptr).vb[10].instance_data_step_rate); + debug_printf("\t\t.vb[11].vb0.pitch = 0x%x\n", (*ptr).vb[11].vb0.pitch); + debug_printf("\t\t.vb[11].vb0.access_type = 0x%x\n", (*ptr).vb[11].vb0.access_type); + debug_printf("\t\t.vb[11].vb0.vb_index = 0x%x\n", (*ptr).vb[11].vb0.vb_index); + debug_printf("\t\t.vb[11].start_addr = 0x%x\n", (*ptr).vb[11].start_addr); + debug_printf("\t\t.vb[11].max_index = 0x%x\n", (*ptr).vb[11].max_index); + debug_printf("\t\t.vb[11].instance_data_step_rate = 0x%x\n", (*ptr).vb[11].instance_data_step_rate); + debug_printf("\t\t.vb[12].vb0.pitch = 0x%x\n", (*ptr).vb[12].vb0.pitch); + debug_printf("\t\t.vb[12].vb0.access_type = 0x%x\n", (*ptr).vb[12].vb0.access_type); + debug_printf("\t\t.vb[12].vb0.vb_index = 0x%x\n", (*ptr).vb[12].vb0.vb_index); + debug_printf("\t\t.vb[12].start_addr = 0x%x\n", (*ptr).vb[12].start_addr); + debug_printf("\t\t.vb[12].max_index = 0x%x\n", (*ptr).vb[12].max_index); + debug_printf("\t\t.vb[12].instance_data_step_rate = 0x%x\n", (*ptr).vb[12].instance_data_step_rate); + debug_printf("\t\t.vb[13].vb0.pitch = 0x%x\n", (*ptr).vb[13].vb0.pitch); + debug_printf("\t\t.vb[13].vb0.access_type = 0x%x\n", (*ptr).vb[13].vb0.access_type); + debug_printf("\t\t.vb[13].vb0.vb_index = 0x%x\n", (*ptr).vb[13].vb0.vb_index); + debug_printf("\t\t.vb[13].start_addr = 0x%x\n", (*ptr).vb[13].start_addr); + debug_printf("\t\t.vb[13].max_index = 0x%x\n", (*ptr).vb[13].max_index); + debug_printf("\t\t.vb[13].instance_data_step_rate = 0x%x\n", (*ptr).vb[13].instance_data_step_rate); + debug_printf("\t\t.vb[14].vb0.pitch = 0x%x\n", (*ptr).vb[14].vb0.pitch); + debug_printf("\t\t.vb[14].vb0.access_type = 0x%x\n", (*ptr).vb[14].vb0.access_type); + debug_printf("\t\t.vb[14].vb0.vb_index = 0x%x\n", (*ptr).vb[14].vb0.vb_index); + debug_printf("\t\t.vb[14].start_addr = 0x%x\n", (*ptr).vb[14].start_addr); + debug_printf("\t\t.vb[14].max_index = 0x%x\n", (*ptr).vb[14].max_index); + debug_printf("\t\t.vb[14].instance_data_step_rate = 0x%x\n", (*ptr).vb[14].instance_data_step_rate); + debug_printf("\t\t.vb[15].vb0.pitch = 0x%x\n", (*ptr).vb[15].vb0.pitch); + debug_printf("\t\t.vb[15].vb0.access_type = 0x%x\n", (*ptr).vb[15].vb0.access_type); + debug_printf("\t\t.vb[15].vb0.vb_index = 0x%x\n", (*ptr).vb[15].vb0.vb_index); + debug_printf("\t\t.vb[15].start_addr = 0x%x\n", (*ptr).vb[15].start_addr); + debug_printf("\t\t.vb[15].max_index = 0x%x\n", (*ptr).vb[15].max_index); + debug_printf("\t\t.vb[15].instance_data_step_rate = 0x%x\n", (*ptr).vb[15].instance_data_step_rate); + debug_printf("\t\t.vb[16].vb0.pitch = 0x%x\n", (*ptr).vb[16].vb0.pitch); + debug_printf("\t\t.vb[16].vb0.access_type = 0x%x\n", (*ptr).vb[16].vb0.access_type); + debug_printf("\t\t.vb[16].vb0.vb_index = 0x%x\n", (*ptr).vb[16].vb0.vb_index); + debug_printf("\t\t.vb[16].start_addr = 0x%x\n", (*ptr).vb[16].start_addr); + debug_printf("\t\t.vb[16].max_index = 0x%x\n", (*ptr).vb[16].max_index); + debug_printf("\t\t.vb[16].instance_data_step_rate = 0x%x\n", (*ptr).vb[16].instance_data_step_rate); +} + +void +brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr) +{ + debug_printf("\t\t.vb0.pitch = 0x%x\n", (*ptr).vb0.pitch); + debug_printf("\t\t.vb0.access_type = 0x%x\n", (*ptr).vb0.access_type); + debug_printf("\t\t.vb0.vb_index = 0x%x\n", (*ptr).vb0.vb_index); + debug_printf("\t\t.start_addr = 0x%x\n", (*ptr).start_addr); + debug_printf("\t\t.max_index = 0x%x\n", (*ptr).max_index); + debug_printf("\t\t.instance_data_step_rate = 0x%x\n", (*ptr).instance_data_step_rate); +} + +void +brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.ve[0].ve0.src_offset = 0x%x\n", (*ptr).ve[0].ve0.src_offset); + debug_printf("\t\t.ve[0].ve0.src_format = 0x%x\n", (*ptr).ve[0].ve0.src_format); + debug_printf("\t\t.ve[0].ve0.valid = 0x%x\n", (*ptr).ve[0].ve0.valid); + debug_printf("\t\t.ve[0].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[0].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[0].ve1.dst_offset = 0x%x\n", (*ptr).ve[0].ve1.dst_offset); + debug_printf("\t\t.ve[0].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent3); + debug_printf("\t\t.ve[0].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent2); + debug_printf("\t\t.ve[0].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent1); + debug_printf("\t\t.ve[0].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent0); + debug_printf("\t\t.ve[1].ve0.src_offset = 0x%x\n", (*ptr).ve[1].ve0.src_offset); + debug_printf("\t\t.ve[1].ve0.src_format = 0x%x\n", (*ptr).ve[1].ve0.src_format); + debug_printf("\t\t.ve[1].ve0.valid = 0x%x\n", (*ptr).ve[1].ve0.valid); + debug_printf("\t\t.ve[1].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[1].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[1].ve1.dst_offset = 0x%x\n", (*ptr).ve[1].ve1.dst_offset); + debug_printf("\t\t.ve[1].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent3); + debug_printf("\t\t.ve[1].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent2); + debug_printf("\t\t.ve[1].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent1); + debug_printf("\t\t.ve[1].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent0); + debug_printf("\t\t.ve[2].ve0.src_offset = 0x%x\n", (*ptr).ve[2].ve0.src_offset); + debug_printf("\t\t.ve[2].ve0.src_format = 0x%x\n", (*ptr).ve[2].ve0.src_format); + debug_printf("\t\t.ve[2].ve0.valid = 0x%x\n", (*ptr).ve[2].ve0.valid); + debug_printf("\t\t.ve[2].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[2].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[2].ve1.dst_offset = 0x%x\n", (*ptr).ve[2].ve1.dst_offset); + debug_printf("\t\t.ve[2].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent3); + debug_printf("\t\t.ve[2].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent2); + debug_printf("\t\t.ve[2].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent1); + debug_printf("\t\t.ve[2].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent0); + debug_printf("\t\t.ve[3].ve0.src_offset = 0x%x\n", (*ptr).ve[3].ve0.src_offset); + debug_printf("\t\t.ve[3].ve0.src_format = 0x%x\n", (*ptr).ve[3].ve0.src_format); + debug_printf("\t\t.ve[3].ve0.valid = 0x%x\n", (*ptr).ve[3].ve0.valid); + debug_printf("\t\t.ve[3].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[3].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[3].ve1.dst_offset = 0x%x\n", (*ptr).ve[3].ve1.dst_offset); + debug_printf("\t\t.ve[3].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent3); + debug_printf("\t\t.ve[3].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent2); + debug_printf("\t\t.ve[3].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent1); + debug_printf("\t\t.ve[3].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent0); + debug_printf("\t\t.ve[4].ve0.src_offset = 0x%x\n", (*ptr).ve[4].ve0.src_offset); + debug_printf("\t\t.ve[4].ve0.src_format = 0x%x\n", (*ptr).ve[4].ve0.src_format); + debug_printf("\t\t.ve[4].ve0.valid = 0x%x\n", (*ptr).ve[4].ve0.valid); + debug_printf("\t\t.ve[4].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[4].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[4].ve1.dst_offset = 0x%x\n", (*ptr).ve[4].ve1.dst_offset); + debug_printf("\t\t.ve[4].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent3); + debug_printf("\t\t.ve[4].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent2); + debug_printf("\t\t.ve[4].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent1); + debug_printf("\t\t.ve[4].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent0); + debug_printf("\t\t.ve[5].ve0.src_offset = 0x%x\n", (*ptr).ve[5].ve0.src_offset); + debug_printf("\t\t.ve[5].ve0.src_format = 0x%x\n", (*ptr).ve[5].ve0.src_format); + debug_printf("\t\t.ve[5].ve0.valid = 0x%x\n", (*ptr).ve[5].ve0.valid); + debug_printf("\t\t.ve[5].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[5].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[5].ve1.dst_offset = 0x%x\n", (*ptr).ve[5].ve1.dst_offset); + debug_printf("\t\t.ve[5].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent3); + debug_printf("\t\t.ve[5].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent2); + debug_printf("\t\t.ve[5].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent1); + debug_printf("\t\t.ve[5].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent0); + debug_printf("\t\t.ve[6].ve0.src_offset = 0x%x\n", (*ptr).ve[6].ve0.src_offset); + debug_printf("\t\t.ve[6].ve0.src_format = 0x%x\n", (*ptr).ve[6].ve0.src_format); + debug_printf("\t\t.ve[6].ve0.valid = 0x%x\n", (*ptr).ve[6].ve0.valid); + debug_printf("\t\t.ve[6].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[6].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[6].ve1.dst_offset = 0x%x\n", (*ptr).ve[6].ve1.dst_offset); + debug_printf("\t\t.ve[6].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent3); + debug_printf("\t\t.ve[6].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent2); + debug_printf("\t\t.ve[6].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent1); + debug_printf("\t\t.ve[6].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent0); + debug_printf("\t\t.ve[7].ve0.src_offset = 0x%x\n", (*ptr).ve[7].ve0.src_offset); + debug_printf("\t\t.ve[7].ve0.src_format = 0x%x\n", (*ptr).ve[7].ve0.src_format); + debug_printf("\t\t.ve[7].ve0.valid = 0x%x\n", (*ptr).ve[7].ve0.valid); + debug_printf("\t\t.ve[7].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[7].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[7].ve1.dst_offset = 0x%x\n", (*ptr).ve[7].ve1.dst_offset); + debug_printf("\t\t.ve[7].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent3); + debug_printf("\t\t.ve[7].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent2); + debug_printf("\t\t.ve[7].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent1); + debug_printf("\t\t.ve[7].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent0); + debug_printf("\t\t.ve[8].ve0.src_offset = 0x%x\n", (*ptr).ve[8].ve0.src_offset); + debug_printf("\t\t.ve[8].ve0.src_format = 0x%x\n", (*ptr).ve[8].ve0.src_format); + debug_printf("\t\t.ve[8].ve0.valid = 0x%x\n", (*ptr).ve[8].ve0.valid); + debug_printf("\t\t.ve[8].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[8].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[8].ve1.dst_offset = 0x%x\n", (*ptr).ve[8].ve1.dst_offset); + debug_printf("\t\t.ve[8].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent3); + debug_printf("\t\t.ve[8].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent2); + debug_printf("\t\t.ve[8].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent1); + debug_printf("\t\t.ve[8].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent0); + debug_printf("\t\t.ve[9].ve0.src_offset = 0x%x\n", (*ptr).ve[9].ve0.src_offset); + debug_printf("\t\t.ve[9].ve0.src_format = 0x%x\n", (*ptr).ve[9].ve0.src_format); + debug_printf("\t\t.ve[9].ve0.valid = 0x%x\n", (*ptr).ve[9].ve0.valid); + debug_printf("\t\t.ve[9].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[9].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[9].ve1.dst_offset = 0x%x\n", (*ptr).ve[9].ve1.dst_offset); + debug_printf("\t\t.ve[9].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent3); + debug_printf("\t\t.ve[9].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent2); + debug_printf("\t\t.ve[9].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent1); + debug_printf("\t\t.ve[9].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent0); + debug_printf("\t\t.ve[10].ve0.src_offset = 0x%x\n", (*ptr).ve[10].ve0.src_offset); + debug_printf("\t\t.ve[10].ve0.src_format = 0x%x\n", (*ptr).ve[10].ve0.src_format); + debug_printf("\t\t.ve[10].ve0.valid = 0x%x\n", (*ptr).ve[10].ve0.valid); + debug_printf("\t\t.ve[10].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[10].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[10].ve1.dst_offset = 0x%x\n", (*ptr).ve[10].ve1.dst_offset); + debug_printf("\t\t.ve[10].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent3); + debug_printf("\t\t.ve[10].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent2); + debug_printf("\t\t.ve[10].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent1); + debug_printf("\t\t.ve[10].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent0); + debug_printf("\t\t.ve[11].ve0.src_offset = 0x%x\n", (*ptr).ve[11].ve0.src_offset); + debug_printf("\t\t.ve[11].ve0.src_format = 0x%x\n", (*ptr).ve[11].ve0.src_format); + debug_printf("\t\t.ve[11].ve0.valid = 0x%x\n", (*ptr).ve[11].ve0.valid); + debug_printf("\t\t.ve[11].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[11].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[11].ve1.dst_offset = 0x%x\n", (*ptr).ve[11].ve1.dst_offset); + debug_printf("\t\t.ve[11].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent3); + debug_printf("\t\t.ve[11].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent2); + debug_printf("\t\t.ve[11].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent1); + debug_printf("\t\t.ve[11].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent0); + debug_printf("\t\t.ve[12].ve0.src_offset = 0x%x\n", (*ptr).ve[12].ve0.src_offset); + debug_printf("\t\t.ve[12].ve0.src_format = 0x%x\n", (*ptr).ve[12].ve0.src_format); + debug_printf("\t\t.ve[12].ve0.valid = 0x%x\n", (*ptr).ve[12].ve0.valid); + debug_printf("\t\t.ve[12].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[12].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[12].ve1.dst_offset = 0x%x\n", (*ptr).ve[12].ve1.dst_offset); + debug_printf("\t\t.ve[12].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent3); + debug_printf("\t\t.ve[12].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent2); + debug_printf("\t\t.ve[12].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent1); + debug_printf("\t\t.ve[12].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent0); + debug_printf("\t\t.ve[13].ve0.src_offset = 0x%x\n", (*ptr).ve[13].ve0.src_offset); + debug_printf("\t\t.ve[13].ve0.src_format = 0x%x\n", (*ptr).ve[13].ve0.src_format); + debug_printf("\t\t.ve[13].ve0.valid = 0x%x\n", (*ptr).ve[13].ve0.valid); + debug_printf("\t\t.ve[13].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[13].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[13].ve1.dst_offset = 0x%x\n", (*ptr).ve[13].ve1.dst_offset); + debug_printf("\t\t.ve[13].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent3); + debug_printf("\t\t.ve[13].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent2); + debug_printf("\t\t.ve[13].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent1); + debug_printf("\t\t.ve[13].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent0); + debug_printf("\t\t.ve[14].ve0.src_offset = 0x%x\n", (*ptr).ve[14].ve0.src_offset); + debug_printf("\t\t.ve[14].ve0.src_format = 0x%x\n", (*ptr).ve[14].ve0.src_format); + debug_printf("\t\t.ve[14].ve0.valid = 0x%x\n", (*ptr).ve[14].ve0.valid); + debug_printf("\t\t.ve[14].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[14].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[14].ve1.dst_offset = 0x%x\n", (*ptr).ve[14].ve1.dst_offset); + debug_printf("\t\t.ve[14].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent3); + debug_printf("\t\t.ve[14].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent2); + debug_printf("\t\t.ve[14].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent1); + debug_printf("\t\t.ve[14].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent0); + debug_printf("\t\t.ve[15].ve0.src_offset = 0x%x\n", (*ptr).ve[15].ve0.src_offset); + debug_printf("\t\t.ve[15].ve0.src_format = 0x%x\n", (*ptr).ve[15].ve0.src_format); + debug_printf("\t\t.ve[15].ve0.valid = 0x%x\n", (*ptr).ve[15].ve0.valid); + debug_printf("\t\t.ve[15].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[15].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[15].ve1.dst_offset = 0x%x\n", (*ptr).ve[15].ve1.dst_offset); + debug_printf("\t\t.ve[15].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent3); + debug_printf("\t\t.ve[15].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent2); + debug_printf("\t\t.ve[15].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent1); + debug_printf("\t\t.ve[15].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent0); + debug_printf("\t\t.ve[16].ve0.src_offset = 0x%x\n", (*ptr).ve[16].ve0.src_offset); + debug_printf("\t\t.ve[16].ve0.src_format = 0x%x\n", (*ptr).ve[16].ve0.src_format); + debug_printf("\t\t.ve[16].ve0.valid = 0x%x\n", (*ptr).ve[16].ve0.valid); + debug_printf("\t\t.ve[16].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[16].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[16].ve1.dst_offset = 0x%x\n", (*ptr).ve[16].ve1.dst_offset); + debug_printf("\t\t.ve[16].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent3); + debug_printf("\t\t.ve[16].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent2); + debug_printf("\t\t.ve[16].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent1); + debug_printf("\t\t.ve[16].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent0); + debug_printf("\t\t.ve[17].ve0.src_offset = 0x%x\n", (*ptr).ve[17].ve0.src_offset); + debug_printf("\t\t.ve[17].ve0.src_format = 0x%x\n", (*ptr).ve[17].ve0.src_format); + debug_printf("\t\t.ve[17].ve0.valid = 0x%x\n", (*ptr).ve[17].ve0.valid); + debug_printf("\t\t.ve[17].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[17].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[17].ve1.dst_offset = 0x%x\n", (*ptr).ve[17].ve1.dst_offset); + debug_printf("\t\t.ve[17].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent3); + debug_printf("\t\t.ve[17].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent2); + debug_printf("\t\t.ve[17].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent1); + debug_printf("\t\t.ve[17].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent0); +} + +void +brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr) +{ + debug_printf("\t\t.ve0.src_offset = 0x%x\n", (*ptr).ve0.src_offset); + debug_printf("\t\t.ve0.src_format = 0x%x\n", (*ptr).ve0.src_format); + debug_printf("\t\t.ve0.valid = 0x%x\n", (*ptr).ve0.valid); + debug_printf("\t\t.ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve0.vertex_buffer_index); + debug_printf("\t\t.ve1.dst_offset = 0x%x\n", (*ptr).ve1.dst_offset); + debug_printf("\t\t.ve1.vfcomponent3 = 0x%x\n", (*ptr).ve1.vfcomponent3); + debug_printf("\t\t.ve1.vfcomponent2 = 0x%x\n", (*ptr).ve1.vfcomponent2); + debug_printf("\t\t.ve1.vfcomponent1 = 0x%x\n", (*ptr).ve1.vfcomponent1); + debug_printf("\t\t.ve1.vfcomponent0 = 0x%x\n", (*ptr).ve1.vfcomponent0); +} + +void +brw_dump_vf_statistics(const struct brw_vf_statistics *ptr) +{ + debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable); + debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode); +} + +void +brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr) +{ + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); + debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); + debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); + debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); + debug_printf("\t\t.vs5.sampler_count = 0x%x\n", (*ptr).vs5.sampler_count); + debug_printf("\t\t.vs5.sampler_state_pointer = 0x%x\n", (*ptr).vs5.sampler_state_pointer); + debug_printf("\t\t.vs6.vs_enable = 0x%x\n", (*ptr).vs6.vs_enable); + debug_printf("\t\t.vs6.vert_cache_disable = 0x%x\n", (*ptr).vs6.vert_cache_disable); +} + +void +brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr) +{ + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.wm4.stats_enable = 0x%x\n", (*ptr).wm4.stats_enable); + debug_printf("\t\t.wm4.depth_buffer_clear = 0x%x\n", (*ptr).wm4.depth_buffer_clear); + debug_printf("\t\t.wm4.sampler_count = 0x%x\n", (*ptr).wm4.sampler_count); + debug_printf("\t\t.wm4.sampler_state_pointer = 0x%x\n", (*ptr).wm4.sampler_state_pointer); + debug_printf("\t\t.wm5.enable_8_pix = 0x%x\n", (*ptr).wm5.enable_8_pix); + debug_printf("\t\t.wm5.enable_16_pix = 0x%x\n", (*ptr).wm5.enable_16_pix); + debug_printf("\t\t.wm5.enable_32_pix = 0x%x\n", (*ptr).wm5.enable_32_pix); + debug_printf("\t\t.wm5.enable_con_32_pix = 0x%x\n", (*ptr).wm5.enable_con_32_pix); + debug_printf("\t\t.wm5.enable_con_64_pix = 0x%x\n", (*ptr).wm5.enable_con_64_pix); + debug_printf("\t\t.wm5.legacy_global_depth_bias = 0x%x\n", (*ptr).wm5.legacy_global_depth_bias); + debug_printf("\t\t.wm5.line_stipple = 0x%x\n", (*ptr).wm5.line_stipple); + debug_printf("\t\t.wm5.depth_offset = 0x%x\n", (*ptr).wm5.depth_offset); + debug_printf("\t\t.wm5.polygon_stipple = 0x%x\n", (*ptr).wm5.polygon_stipple); + debug_printf("\t\t.wm5.line_aa_region_width = 0x%x\n", (*ptr).wm5.line_aa_region_width); + debug_printf("\t\t.wm5.line_endcap_aa_region_width = 0x%x\n", (*ptr).wm5.line_endcap_aa_region_width); + debug_printf("\t\t.wm5.early_depth_test = 0x%x\n", (*ptr).wm5.early_depth_test); + debug_printf("\t\t.wm5.thread_dispatch_enable = 0x%x\n", (*ptr).wm5.thread_dispatch_enable); + debug_printf("\t\t.wm5.program_uses_depth = 0x%x\n", (*ptr).wm5.program_uses_depth); + debug_printf("\t\t.wm5.program_computes_depth = 0x%x\n", (*ptr).wm5.program_computes_depth); + debug_printf("\t\t.wm5.program_uses_killpixel = 0x%x\n", (*ptr).wm5.program_uses_killpixel); + debug_printf("\t\t.wm5.legacy_line_rast = 0x%x\n", (*ptr).wm5.legacy_line_rast); + debug_printf("\t\t.wm5.transposed_urb_read_enable = 0x%x\n", (*ptr).wm5.transposed_urb_read_enable); + debug_printf("\t\t.wm5.max_threads = 0x%x\n", (*ptr).wm5.max_threads); + debug_printf("\t\t.global_depth_offset_constant = %f\n", (*ptr).global_depth_offset_constant); + debug_printf("\t\t.global_depth_offset_scale = %f\n", (*ptr).global_depth_offset_scale); + debug_printf("\t\t.wm8.grf_reg_count_1 = 0x%x\n", (*ptr).wm8.grf_reg_count_1); + debug_printf("\t\t.wm8.kernel_start_pointer_1 = 0x%x\n", (*ptr).wm8.kernel_start_pointer_1); + debug_printf("\t\t.wm9.grf_reg_count_2 = 0x%x\n", (*ptr).wm9.grf_reg_count_2); + debug_printf("\t\t.wm9.kernel_start_pointer_2 = 0x%x\n", (*ptr).wm9.kernel_start_pointer_2); + debug_printf("\t\t.wm10.grf_reg_count_3 = 0x%x\n", (*ptr).wm10.grf_reg_count_3); + debug_printf("\t\t.wm10.kernel_start_pointer_3 = 0x%x\n", (*ptr).wm10.kernel_start_pointer_3); +} + diff --git a/src/gallium/drivers/i965/brw_structs_dump.h b/src/gallium/drivers/i965/brw_structs_dump.h new file mode 100644 index 00000000000..7c02dbfe33f --- /dev/null +++ b/src/gallium/drivers/i965/brw_structs_dump.h @@ -0,0 +1,276 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * @file + * Dump i965 data structures. + * + * Generated automatically from brw_structs.h by brw_structs_dump.py. + */ + +#ifndef BRW_STRUCTS_DUMP_H +#define BRW_STRUCTS_DUMP_H + +struct brw_3d_control; +struct brw_3d_primitive; +struct brw_aa_line_parameters; +struct brw_binding_table_pointers; +struct brw_blend_constant_color; +struct brw_cc0; +struct brw_cc1; +struct brw_cc2; +struct brw_cc3; +struct brw_cc4; +struct brw_cc5; +struct brw_cc6; +struct brw_cc7; +struct brw_cc_unit_state; +struct brw_cc_viewport; +struct brw_clip_unit_state; +struct brw_clipper_viewport; +struct brw_constant_buffer; +struct brw_cs_urb_state; +struct brw_depthbuffer; +struct brw_depthbuffer_g4x; +struct brw_drawrect; +struct brw_global_depth_offset_clamp; +struct brw_gs_unit_state; +struct brw_indexbuffer; +struct brw_line_stipple; +struct brw_mi_flush; +struct brw_pipe_control; +struct brw_pipeline_select; +struct brw_pipelined_state_pointers; +struct brw_polygon_stipple; +struct brw_polygon_stipple_offset; +struct brw_sampler_default_color; +struct brw_sampler_state; +struct brw_sf_unit_state; +struct brw_sf_viewport; +struct brw_ss0; +struct brw_ss1; +struct brw_ss2; +struct brw_ss3; +struct brw_state_base_address; +struct brw_state_prefetch; +struct brw_surf_ss0; +struct brw_surf_ss1; +struct brw_surf_ss2; +struct brw_surf_ss3; +struct brw_surf_ss4; +struct brw_surf_ss5; +struct brw_surface_state; +struct brw_system_instruction_pointer; +struct brw_urb_fence; +struct brw_urb_immediate; +struct brw_vb_array_state; +struct brw_vertex_buffer_state; +struct brw_vertex_element_packet; +struct brw_vertex_element_state; +struct brw_vf_statistics; +struct brw_vs_unit_state; +struct brw_wm_unit_state; + +void +brw_dump_3d_control(const struct brw_3d_control *ptr); + +void +brw_dump_3d_primitive(const struct brw_3d_primitive *ptr); + +void +brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr); + +void +brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr); + +void +brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr); + +void +brw_dump_cc0(const struct brw_cc0 *ptr); + +void +brw_dump_cc1(const struct brw_cc1 *ptr); + +void +brw_dump_cc2(const struct brw_cc2 *ptr); + +void +brw_dump_cc3(const struct brw_cc3 *ptr); + +void +brw_dump_cc4(const struct brw_cc4 *ptr); + +void +brw_dump_cc5(const struct brw_cc5 *ptr); + +void +brw_dump_cc6(const struct brw_cc6 *ptr); + +void +brw_dump_cc7(const struct brw_cc7 *ptr); + +void +brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr); + +void +brw_dump_cc_viewport(const struct brw_cc_viewport *ptr); + +void +brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr); + +void +brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr); + +void +brw_dump_constant_buffer(const struct brw_constant_buffer *ptr); + +void +brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr); + +void +brw_dump_depthbuffer(const struct brw_depthbuffer *ptr); + +void +brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr); + +void +brw_dump_drawrect(const struct brw_drawrect *ptr); + +void +brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr); + +void +brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr); + +void +brw_dump_indexbuffer(const struct brw_indexbuffer *ptr); + +void +brw_dump_line_stipple(const struct brw_line_stipple *ptr); + +void +brw_dump_mi_flush(const struct brw_mi_flush *ptr); + +void +brw_dump_pipe_control(const struct brw_pipe_control *ptr); + +void +brw_dump_pipeline_select(const struct brw_pipeline_select *ptr); + +void +brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr); + +void +brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr); + +void +brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr); + +void +brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr); + +void +brw_dump_sampler_state(const struct brw_sampler_state *ptr); + +void +brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr); + +void +brw_dump_sf_viewport(const struct brw_sf_viewport *ptr); + +void +brw_dump_ss0(const struct brw_ss0 *ptr); + +void +brw_dump_ss1(const struct brw_ss1 *ptr); + +void +brw_dump_ss2(const struct brw_ss2 *ptr); + +void +brw_dump_ss3(const struct brw_ss3 *ptr); + +void +brw_dump_state_base_address(const struct brw_state_base_address *ptr); + +void +brw_dump_state_prefetch(const struct brw_state_prefetch *ptr); + +void +brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr); + +void +brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr); + +void +brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr); + +void +brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr); + +void +brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr); + +void +brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr); + +void +brw_dump_surface_state(const struct brw_surface_state *ptr); + +void +brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr); + +void +brw_dump_urb_fence(const struct brw_urb_fence *ptr); + +void +brw_dump_urb_immediate(const struct brw_urb_immediate *ptr); + +void +brw_dump_vb_array_state(const struct brw_vb_array_state *ptr); + +void +brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr); + +void +brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr); + +void +brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr); + +void +brw_dump_vf_statistics(const struct brw_vf_statistics *ptr); + +void +brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr); + +void +brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr); + + +#endif /* BRW_STRUCTS_DUMP_H */ diff --git a/src/gallium/drivers/i965/brw_structs_dump.py b/src/gallium/drivers/i965/brw_structs_dump.py new file mode 100755 index 00000000000..6dba49ad911 --- /dev/null +++ b/src/gallium/drivers/i965/brw_structs_dump.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python +''' +Generates dumpers for the i965 state strucutures using pygccxml. + +Run as + + PYTHONPATH=/path/to/pygccxml-1.0.0 python brw_structs_dump.py + +Jose Fonseca <[email protected]> +''' + +copyright = ''' +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + ''' + +import os +import sys +import re + +from pygccxml import parser +from pygccxml import declarations + +from pygccxml.declarations import algorithm +from pygccxml.declarations import decl_visitor +from pygccxml.declarations import type_traits +from pygccxml.declarations import type_visitor + + +enums = True + + +def vars_filter(variable): + name = variable.name + return not re.match('^pad\d*', name) and name != 'dword' + + +class decl_dumper_t(decl_visitor.decl_visitor_t): + + def __init__(self, stream, instance = '', decl = None): + decl_visitor.decl_visitor_t.__init__(self) + self.stream = stream + self._instance = instance + self.decl = decl + + def clone(self): + return decl_dumper_t(self.stream, self._instance, self.decl) + + def visit_class(self): + class_ = self.decl + assert self.decl.class_type in ('struct', 'union') + + for variable in class_.variables(recursive = False): + if vars_filter(variable): + dump_type(self.stream, self._instance + '.' + variable.name, variable.type) + + def visit_enumeration(self): + if enums: + self.stream.write(' switch(%s) {\n' % ("(*ptr)" + self._instance,)) + for name, value in self.decl.values: + self.stream.write(' case %s:\n' % (name,)) + self.stream.write(' debug_printf("\\t\\t%s = %s\\n");\n' % (self._instance, name)) + self.stream.write(' break;\n') + self.stream.write(' default:\n') + self.stream.write(' debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance)) + self.stream.write(' break;\n') + self.stream.write(' }\n') + else: + self.stream.write(' debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance)) + + +def dump_decl(stream, instance, decl): + dumper = decl_dumper_t(stream, instance, decl) + algorithm.apply_visitor(dumper, decl) + + +class type_dumper_t(type_visitor.type_visitor_t): + + def __init__(self, stream, instance, type_): + type_visitor.type_visitor_t.__init__(self) + self.stream = stream + self.instance = instance + self.type = type_ + + def clone(self): + return type_dumper_t(self.instance, self.type) + + def visit_bool(self): + self.print_instance('%i') + + def visit_char(self): + #self.print_instance('%i') + self.print_instance('0x%x') + + def visit_unsigned_char(self): + #self.print_instance('%u') + self.print_instance('0x%x') + + def visit_signed_char(self): + #self.print_instance('%i') + self.print_instance('0x%x') + + def visit_wchar(self): + self.print_instance('0x%x') + + def visit_short_int(self): + #self.print_instance('%i') + self.print_instance('0x%x') + + def visit_short_unsigned_int(self): + #self.print_instance('%u') + self.print_instance('0x%x') + + def visit_int(self): + #self.print_instance('%i') + self.print_instance('0x%x') + + def visit_unsigned_int(self): + #self.print_instance('%u') + self.print_instance('0x%x') + + def visit_long_int(self): + #self.print_instance('%li') + self.print_instance('0x%lx') + + def visit_long_unsigned_int(self): + #self.print_instance('%lu') + self.print_instance('%0xlx') + + def visit_long_long_int(self): + #self.print_instance('%lli') + self.print_instance('%0xllx') + + def visit_long_long_unsigned_int(self): + #self.print_instance('%llu') + self.print_instance('0x%llx') + + def visit_float(self): + self.print_instance('%f') + + def visit_double(self): + self.print_instance('%f') + + def visit_array(self): + for i in range(type_traits.array_size(self.type)): + dump_type(self.stream, self.instance + '[%i]' % i, type_traits.base_type(self.type)) + + def visit_pointer(self): + self.print_instance('%p') + + def visit_declarated(self): + #stream.write('decl = %r\n' % self.type.decl_string) + decl = type_traits.remove_declarated(self.type) + dump_decl(self.stream, self.instance, decl) + + def print_instance(self, format): + self.stream.write(' debug_printf("\\t\\t%s = %s\\n", %s);\n' % (self.instance, format, "(*ptr)" + self.instance)) + + + +def dump_type(stream, instance, type_): + type_ = type_traits.remove_alias(type_) + visitor = type_dumper_t(stream, instance, type_) + algorithm.apply_visitor(visitor, type_) + + +def dump_struct_interface(stream, class_, suffix = ';'): + name = class_.name + assert name.startswith('brw_'); + name = name[:4] + 'dump_' + name[4:] + stream.write('void\n') + stream.write('%s(const struct %s *ptr)%s\n' % (name, class_.name, suffix)) + + +def dump_struct_implementation(stream, decls, class_): + dump_struct_interface(stream, class_, suffix = '') + stream.write('{\n') + dump_decl(stream, '', class_) + stream.write('}\n') + stream.write('\n') + + +def dump_header(stream): + stream.write(copyright.strip() + '\n') + stream.write('\n') + stream.write('/**\n') + stream.write(' * @file\n') + stream.write(' * Dump i965 data structures.\n') + stream.write(' *\n') + stream.write(' * Generated automatically from brw_structs.h by brw_structs_dump.py.\n') + stream.write(' */\n') + stream.write('\n') + + +def dump_interfaces(decls, global_ns, names): + stream = open('brw_structs_dump.h', 'wt') + + dump_header(stream) + + stream.write('#ifndef BRW_STRUCTS_DUMP_H\n') + stream.write('#define BRW_STRUCTS_DUMP_H\n') + stream.write('\n') + + for name in names: + stream.write('struct %s;\n' % (name,)) + stream.write('\n') + + for name in names: + (class_,) = global_ns.classes(name = name) + dump_struct_interface(stream, class_) + stream.write('\n') + stream.write('\n') + + stream.write('#endif /* BRW_STRUCTS_DUMP_H */\n') + + +def dump_implementations(decls, global_ns, names): + stream = open('brw_structs_dump.c', 'wt') + + dump_header(stream) + + stream.write('#include "util/u_debug.h"\n') + stream.write('\n') + stream.write('#include "brw_types.h"\n') + stream.write('#include "brw_structs.h"\n') + stream.write('#include "brw_structs_dump.h"\n') + stream.write('\n') + + for name in names: + (class_,) = global_ns.classes(name = name) + dump_struct_implementation(stream, decls, class_) + + +def decl_filter(decl): + '''Filter the declarations we're interested in''' + name = decl.name + return name.startswith('brw_') and name not in ('brw_instruction',) + + +def main(): + + config = parser.config_t( + include_paths = [ + '../../include', + ], + compiler = 'gcc', + ) + + headers = [ + 'brw_types.h', + 'brw_structs.h', + ] + + decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE) + global_ns = declarations.get_global_namespace(decls) + + names = [] + for class_ in global_ns.classes(decl_filter): + names.append(class_.name) + names.sort() + + dump_interfaces(decls, global_ns, names) + dump_implementations(decls, global_ns, names) + + +if __name__ == '__main__': + main() diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c new file mode 100644 index 00000000000..464013e7c40 --- /dev/null +++ b/src/gallium/drivers/i965/brw_swtnl.c @@ -0,0 +1,95 @@ + +#include "brw_context.h" +#include "brw_pipe_rast.h" + + +static GLboolean need_swtnl( struct brw_context *brw ) +{ + const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ; + + /* If we don't require strict OpenGL conformance, never + * use fallbacks. If we're forcing fallbacks, always + * use fallfacks. + */ + if (brw->flags.no_swtnl) + return FALSE; + + if (brw->flags.force_swtnl) + return TRUE; + + /* Exceeding hw limits on number of VS inputs? + */ + if (brw->curr.num_vertex_elements == 0 || + brw->curr.num_vertex_elements >= BRW_VEP_MAX) { + return TRUE; + } + + /* Position array with zero stride? + * + * XXX: position isn't always at zero... + * XXX: eliminate zero-stride arrays + */ + { + int ve0_vb = brw->curr.vertex_element[0].vertex_buffer_index; + + if (brw->curr.vertex_buffer[ve0_vb].stride == 0) + return TRUE; + } + + /* XXX: short-circuit + */ + return FALSE; + + if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) { + if (rast->poly_smooth) + return TRUE; + + } + + if (brw->reduced_primitive == PIPE_PRIM_LINES || + (brw->reduced_primitive == PIPE_PRIM_TRIANGLES && + (rast->fill_cw == PIPE_POLYGON_MODE_LINE || + rast->fill_ccw == PIPE_POLYGON_MODE_LINE))) + { + /* BRW hardware will do AA lines, but they are non-conformant it + * seems. TBD whether we keep this fallback: + */ + if (rast->line_smooth) + return TRUE; + + /* XXX: was a fallback in mesa (gs doesn't get enough + * information to know when to reset stipple counter), but there + * must be a way around it. + */ + if (rast->line_stipple_enable && + (brw->reduced_primitive == PIPE_PRIM_TRIANGLES || + brw->primitive == PIPE_PRIM_LINE_LOOP || + brw->primitive == PIPE_PRIM_LINE_STRIP)) + return TRUE; + } + + + if (brw->reduced_primitive == PIPE_PRIM_POINTS || + (brw->reduced_primitive == PIPE_PRIM_TRIANGLES && + (rast->fill_cw == PIPE_POLYGON_MODE_POINT || + rast->fill_ccw == PIPE_POLYGON_MODE_POINT))) + { + if (rast->point_smooth) + return TRUE; + } + + /* BRW hardware doesn't handle CLAMP texturing correctly; + * brw_wm_sampler_state:translate_wrap_mode() treats CLAMP + * as CLAMP_TO_EDGE instead. If we're using CLAMP, and + * we want strict conformance, force the fallback. + * + * XXX: need a workaround for this. + */ + + /* Nothing stopping us from the fast path now */ + return FALSE; +} + + + + diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h new file mode 100644 index 00000000000..89e08a5c804 --- /dev/null +++ b/src/gallium/drivers/i965/brw_types.h @@ -0,0 +1,21 @@ +#ifndef BRW_TYPES_H +#define BRW_TYPES_H + +#include "pipe/p_compiler.h" + +typedef uint32_t GLuint; +typedef uint8_t GLubyte; +typedef uint16_t GLushort; +typedef int32_t GLint; +typedef int8_t GLbyte; +typedef int16_t GLshort; +typedef float GLfloat; + +/* no GLenum, translate all away */ + +typedef uint8_t GLboolean; + +#define GL_FALSE FALSE +#define GL_TRUE TRUE + +#endif diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c new file mode 100644 index 00000000000..907ec56c6ca --- /dev/null +++ b/src/gallium/drivers/i965/brw_urb.c @@ -0,0 +1,263 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_debug.h" + +#define VS 0 +#define GS 1 +#define CLP 2 +#define SF 3 +#define CS 4 + +/** @file brw_urb.c + * + * Manages the division of the URB space between the various fixed-function + * units. + * + * See the Thread Initiation Management section of the GEN4 B-Spec, and + * the individual *_STATE structures for restrictions on numbers of + * entries and threads. + */ + +/* + * Generally, a unit requires a min_nr_entries based on how many entries + * it produces before the downstream unit gets unblocked and can use and + * dereference some of its handles. + * + * The SF unit preallocates a PUE at the start of thread dispatch, and only + * uses that one. So it requires one entry per thread. + * + * For CLIP, the SF unit will hold the previous primitive while the + * next is getting assembled, meaning that linestrips require 3 CLIP VUEs + * (vertices) to ensure continued processing, trifans require 4, and tristrips + * require 5. There can be 1 or 2 threads, and each has the same requirement. + * + * GS has the same requirement as CLIP, but it never handles tristrips, + * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces. + * We only run it single-threaded. + * + * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X). + * Each thread processes 2 preallocated VUEs (vertices) at a time, and they + * get streamed down as soon as threads processing earlier vertices get + * theirs accepted. + * + * Each unit will take the number of URB entries we give it (based on the + * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs, + * and brw_curbe.c for the CURBEs) and decide its maximum number of + * threads it can support based on that. in brw_*_state.c. + * + * XXX: Are the min_entry_size numbers useful? + * XXX: Verify min_nr_entries, esp for VS. + * XXX: Verify SF min_entry_size. + */ +static const struct urb_limits { + GLuint min_nr_entries; + GLuint preferred_nr_entries; + GLuint min_entry_size; + GLuint max_entry_size; +} limits[CS+1] = { + { 16, 32, 1, 5 }, /* vs */ + { 4, 8, 1, 5 }, /* gs */ + { 5, 10, 1, 5 }, /* clp */ + { 1, 8, 1, 12 }, /* sf */ + { 1, 4, 1, 32 } /* cs */ +}; + + +static GLboolean check_urb_layout( struct brw_context *brw ) +{ + brw->urb.vs_start = 0; + brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; + brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; + brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; + brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; + + return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw); +} + +/* Most minimal update, forces re-emit of URB fence packet after GS + * unit turned on/off. + */ +static int recalculate_urb_fence( struct brw_context *brw ) +{ + GLuint csize = brw->curbe.total_size; + GLuint vsize = brw->vs.prog_data->urb_entry_size; + GLuint sfsize = brw->sf.prog_data->urb_entry_size; + + if (csize < limits[CS].min_entry_size) + csize = limits[CS].min_entry_size; + + if (vsize < limits[VS].min_entry_size) + vsize = limits[VS].min_entry_size; + + if (sfsize < limits[SF].min_entry_size) + sfsize = limits[SF].min_entry_size; + + if (brw->urb.vsize < vsize || + brw->urb.sfsize < sfsize || + brw->urb.csize < csize || + (brw->urb.constrained && (brw->urb.vsize > vsize || + brw->urb.sfsize > sfsize || + brw->urb.csize > csize))) { + + + brw->urb.csize = csize; + brw->urb.sfsize = sfsize; + brw->urb.vsize = vsize; + + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; + + brw->urb.constrained = 0; + + if (BRW_IS_IGDNG(brw)) { + brw->urb.nr_vs_entries = 128; + brw->urb.nr_sf_entries = 48; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + } + } else if (BRW_IS_G4X(brw)) { + brw->urb.nr_vs_entries = 64; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + } + } + + if (BRW_DEBUG & DEBUG_MIN_URB) { + brw->urb.nr_vs_entries = limits[VS].min_nr_entries; + brw->urb.nr_gs_entries = limits[GS].min_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; + brw->urb.nr_sf_entries = limits[SF].min_nr_entries; + brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + brw->urb.constrained = 1; + } + + if (!check_urb_layout(brw)) { + brw->urb.nr_vs_entries = limits[VS].min_nr_entries; + brw->urb.nr_gs_entries = limits[GS].min_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; + brw->urb.nr_sf_entries = limits[SF].min_nr_entries; + brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + + /* Mark us as operating with constrained nr_entries, so that next + * time we recalculate we'll resize the fences in the hope of + * escaping constrained mode and getting back to normal performance. + */ + brw->urb.constrained = 1; + + if (!check_urb_layout(brw)) { + /* This is impossible, given the maximal sizes of urb + * entries and the values for minimum nr of entries + * provided above. + */ + debug_printf("couldn't calculate URB layout!\n"); + exit(1); + } + + if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + debug_printf("URB CONSTRAINED\n"); + } + +done: + if (BRW_DEBUG & DEBUG_URB) + debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + brw->urb.vs_start, + brw->urb.gs_start, + brw->urb.clip_start, + brw->urb.sf_start, + brw->urb.cs_start, + URB_SIZES(brw)); + + brw->state.dirty.brw |= BRW_NEW_URB_FENCE; + } + + return 0; +} + + +const struct brw_tracked_state brw_recalculate_urb_fence = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CURBE_OFFSETS, + .cache = (CACHE_NEW_VS_PROG | + CACHE_NEW_SF_PROG) + }, + .prepare = recalculate_urb_fence +}; + + + + + +int brw_upload_urb_fence(struct brw_context *brw) +{ + struct brw_urb_fence uf; + memset(&uf, 0, sizeof(uf)); + + uf.header.opcode = CMD_URB_FENCE; + uf.header.length = sizeof(uf)/4-2; + uf.header.vs_realloc = 1; + uf.header.gs_realloc = 1; + uf.header.clp_realloc = 1; + uf.header.sf_realloc = 1; + uf.header.vfe_realloc = 1; + uf.header.cs_realloc = 1; + + /* The ordering below is correct, not the layout in the + * instruction. + * + * There are 256/384 urb reg pairs in total. + */ + uf.bits0.vs_fence = brw->urb.gs_start; + uf.bits0.gs_fence = brw->urb.clip_start; + uf.bits0.clp_fence = brw->urb.sf_start; + uf.bits1.sf_fence = brw->urb.cs_start; + uf.bits1.cs_fence = URB_SIZES(brw); + + BRW_BATCH_STRUCT(brw, &uf); + return 0; +} diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c new file mode 100644 index 00000000000..458058d668d --- /dev/null +++ b/src/gallium/drivers/i965/brw_util.c @@ -0,0 +1,38 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_util.h" +#include "brw_defines.h" + + + + diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h new file mode 100644 index 00000000000..b5f9a36e7bc --- /dev/null +++ b/src/gallium/drivers/i965/brw_util.h @@ -0,0 +1,44 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_UTIL_H +#define BRW_UTIL_H + +#include "brw_types.h" + +extern GLuint brw_count_bits( GLuint val ); +extern GLuint brw_translate_blend_factor( unsigned factor ); +extern GLuint brw_translate_blend_equation( unsigned mode ); + + + +#endif diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c new file mode 100644 index 00000000000..e3ea5a3a135 --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs.c @@ -0,0 +1,131 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "tgsi/tgsi_dump.h" + +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_pipe_rast.h" + + + +static enum pipe_error do_vs_prog( struct brw_context *brw, + struct brw_vertex_shader *vp, + struct brw_vs_prog_key *key, + struct brw_winsys_buffer **bo_out) +{ + enum pipe_error ret; + GLuint program_size; + const GLuint *program; + struct brw_vs_compile c; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + + brw_init_compile(brw, &c.func); + c.vp = vp; + + c.prog_data.nr_outputs = vp->info.num_outputs; + c.prog_data.nr_inputs = vp->info.num_inputs; + + if (1) + tgsi_dump(c.vp->tokens, 0); + + /* Emit GEN4 code. + */ + brw_vs_emit(&c); + + /* get the program + */ + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; + + ret = brw_upload_cache( &brw->cache, BRW_VS_PROG, + &c.key, brw_vs_prog_key_size(&c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->vs.prog_data, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + + +static enum pipe_error brw_upload_vs_prog(struct brw_context *brw) +{ + struct brw_vs_prog_key key; + struct brw_vertex_shader *vp = brw->curr.vertex_shader; + struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature; + enum pipe_error ret; + + memset(&key, 0, sizeof(key)); + + key.program_string_id = vp->id; + key.nr_userclip = brw->curr.ucp.nr; + + memcpy(&key.fs_signature, sig, brw_fs_signature_size(sig)); + + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache, BRW_VS_PROG, + &key, brw_vs_prog_key_size(&key), + NULL, 0, + &brw->vs.prog_data, + &brw->vs.prog_bo)) + return PIPE_OK; + + ret = do_vs_prog(brw, vp, &key, &brw->vs.prog_bo); + if (ret) + return ret; + + return PIPE_OK; +} + + +/* See brw_vs.c: + */ +const struct brw_tracked_state brw_vs_prog = { + .dirty = { + .mesa = (PIPE_NEW_CLIP | + PIPE_NEW_RAST | + PIPE_NEW_FRAGMENT_SIGNATURE), + .brw = BRW_NEW_VERTEX_PROGRAM, + .cache = 0 + }, + .prepare = brw_upload_vs_prog +}; diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h new file mode 100644 index 00000000000..944d88c84cc --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs.h @@ -0,0 +1,106 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_VS_H +#define BRW_VS_H + + +#include "brw_context.h" +#include "brw_eu.h" + + +struct brw_vs_prog_key { + GLuint program_string_id; + GLuint nr_userclip:4; + GLuint pad:26; + struct brw_fs_signature fs_signature; +}; + +#define brw_vs_prog_key_size(s) (offsetof(struct brw_vs_prog_key, fs_signature) + \ + brw_fs_signature_size(&(s)->fs_signature)) + + +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 + +struct brw_vs_compile { + struct brw_compile func; + struct brw_vs_prog_key key; + struct brw_vs_prog_data prog_data; + struct brw_chipset chipset; + + struct brw_vertex_shader *vp; + + GLuint nr_inputs; + GLuint nr_outputs; + GLuint nr_immediates; + GLfloat immediate[128][4]; + + GLuint overflow_grf_start; + GLuint overflow_count; + + GLuint first_tmp; + GLuint last_tmp; + + struct brw_reg r0; + struct brw_reg r1; + struct brw_reg regs[TGSI_FILE_COUNT][128]; + struct brw_reg tmp; + struct brw_reg stack; + + struct { + GLboolean used_in_src; + struct brw_reg reg; + } output_regs[128]; + + struct brw_reg userplane[6]; + + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; + + struct brw_instruction *if_inst[MAX_IF_DEPTH]; + struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + GLuint insn; + GLuint if_depth; + GLuint loop_depth; + GLuint end_offset; + + struct brw_indirect stack_index; +}; + + +void brw_vs_emit( struct brw_vs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c new file mode 100644 index 00000000000..8a16205d2f6 --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -0,0 +1,1654 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "pipe/p_shader_tokens.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" + +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_debug.h" +#include "brw_disasm.h" + +/* Choose one of the 4 vec4's which can be packed into each 16-wide reg. + */ +static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot ) +{ + int nr = reg + slot/2; + int subnr = (slot%2) * 4; + + return stride(brw_vec4_grf(nr, subnr), 0, 4, 1); +} + + +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +static boolean is_position_output( struct brw_vs_compile *c, + unsigned vs_output ) +{ + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); +} + + +static boolean find_output_slot( struct brw_vs_compile *c, + unsigned vs_output, + unsigned *fs_input_slot ) +{ + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; + + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && + c->key.fs_signature.input[i].semantic_index == index) { + *fs_input_slot = i; + return TRUE; + } + } + + return FALSE; +} + + +/** + * Preallocate GRF register before code emit. + * Do things as simply as possible. Allocate and populate all regs + * ahead of time. + */ +static void brw_vs_alloc_regs( struct brw_vs_compile *c ) +{ + GLuint i, reg = 0, subreg = 0, mrf; + int attributes_in_vue; + + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1 + + c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 + + c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 > BRW_MAX_GRF) + c->vp->use_const_buffer = GL_TRUE; + else { + /* XXX: immediates can go elsewhere if necessary: + */ + assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 + + c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 <= BRW_MAX_GRF); + + c->vp->use_const_buffer = GL_FALSE; + } + + /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/ + + /* r0 -- reserved as usual + */ + c->r0 = brw_vec8_grf(reg, 0); + reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + /* Skip over fixed planes: Or never read them into vs unit? + */ + subreg += 6; + + for (i = 0; i < c->key.nr_userclip; i++, subreg++) { + c->userplane[i] = + stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1); + } + + /* Deal with curbe alignment: + */ + subreg = align(subreg, 2); + /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/ + } + + + /* Immediates: always in the curbe. + * + * XXX: Can try to encode some immediates as brw immediates + * XXX: Make sure ureg sets minimal immediate size and respect it + * here. + */ + for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) { + c->regs[TGSI_FILE_IMMEDIATE][i] = + stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1); + } + c->prog_data.nr_params = c->vp->info.immediate_count * 4; + + + /* Vertex constant buffer. + * + * Constants from the buffer can be either cached in the curbe or + * loaded as needed from the actual constant buffer. + */ + if (!c->vp->use_const_buffer) { + GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1; + + for (i = 0; i < nr_params; i++, subreg++) { + c->regs[TGSI_FILE_CONSTANT][i] = + stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1); + } + + c->prog_data.nr_params += nr_params * 4; + } + + /* All regs allocated + */ + reg += (subreg + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + + + /* Allocate input regs: + */ + c->nr_inputs = c->vp->info.num_inputs; + for (i = 0; i < c->nr_inputs; i++) { + c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + + /* If there are no inputs, we'll still be reading one attribute's worth + * because it's required -- see urb_read_length setting. + */ + if (c->nr_inputs == 0) + reg++; + + + + /* Allocate outputs. The non-position outputs go straight into message regs. + */ + c->nr_outputs = c->prog_data.nr_outputs; + + if (c->chipset.is_igdng) + mrf = 8; + else + mrf = 4; + + + if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) { + c->overflow_grf_start = reg; + c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF; + reg += c->overflow_count; + } + + /* XXX: need to access vertex output semantics here: + */ + for (i = 0; i < c->nr_outputs; i++) { + unsigned slot; + + /* XXX: Put output position in slot zero always. Clipper, etc, + * need access to this reg. + */ + if (is_position_output(c, i)) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */ + reg++; + } + else if (find_output_slot(c, i, &slot)) { + + if (0 /* is_psize_output(c, i) */ ) { + /* c->psize_out.grf = reg; */ + /* c->psize_out.mrf = i; */ + } + + /* The first (16-4) outputs can go straight into the message regs. + */ + if (slot + mrf < BRW_MAX_MRF) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf); + } + else { + int grf = c->overflow_grf_start + slot - BRW_MAX_MRF; + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0); + } + } + else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg(); + } + } + + /* Allocate program temporaries: + */ + + for (i = 0; i < c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1; i++) { + c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0); + reg++; + } + + /* Address reg(s). Don't try to use the internal address reg until + * deref time. + */ + for (i = 0; i < c->vp->info.file_max[TGSI_FILE_ADDRESS]+1; i++) { + c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, + reg, + 0, + BRW_REGISTER_TYPE_D, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XXXX, + BRW_WRITEMASK_X); + reg++; + } + + if (c->vp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + +#if 0 + for (i = 0; i < 128; i++) { + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } +#endif + + if (c->vp->has_flow_control) { + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + } + + /* Some opcodes need an internal temporary: + */ + c->first_tmp = reg; + c->last_tmp = reg; /* for allocation purposes */ + + /* Each input reg holds data from two vertices. The + * urb_read_length is the number of registers read from *each* + * vertex urb, so is half the amount: + */ + c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + + /* Setting this field to 0 leads to undefined behavior according to the + * the VS_STATE docs. Our VUEs will always have at least one attribute + * sitting in them, even if it's padding. + */ + if (c->prog_data.urb_read_length == 0) + c->prog_data.urb_read_length = 1; + + /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size + * them to fit the biggest thing they need to. + */ + attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); + + if (c->chipset.is_igdng) + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + else + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; + + c->prog_data.total_grf = reg; + + if (BRW_DEBUG & DEBUG_VS) { + debug_printf("%s NumAddrRegs %d\n", __FUNCTION__, + c->vp->info.file_max[TGSI_FILE_ADDRESS]+1); + debug_printf("%s NumTemps %d\n", __FUNCTION__, + c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1); + debug_printf("%s reg = %d\n", __FUNCTION__, reg); + } +} + + +/** + * If an instruction uses a temp reg both as a src and the dest, we + * sometimes need to allocate an intermediate temporary. + */ +static void unalias1( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg )) +{ + if (dst.file == arg0.file && dst.nr == arg0.nr) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0); + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } + else { + func(c, dst, arg0); + } +} + +/** + * \sa unalias2 + * Checkes if 2-operand instruction needs an intermediate temporary. + */ +static void unalias2( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg )) +{ + if ((dst.file == arg0.file && dst.nr == arg0.nr) || + (dst.file == arg1.file && dst.nr == arg1.nr)) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0, arg1); + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } + else { + func(c, dst, arg0, arg1); + } +} + +/** + * \sa unalias2 + * Checkes if 3-operand instruction needs an intermediate temporary. + */ +static void unalias3( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + struct brw_reg arg2, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg, + struct brw_reg )) +{ + if ((dst.file == arg0.file && dst.nr == arg0.nr) || + (dst.file == arg1.file && dst.nr == arg1.nr) || + (dst.file == arg2.file && dst.nr == arg2.nr)) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0, arg1, arg2); + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } + else { + func(c, dst, arg0, arg1, arg2); + } +} + +static void emit_sop( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint cond) +{ + brw_MOV(p, dst, brw_imm_f(0.0f)); + brw_CMP(p, brw_null_reg(), cond, arg0, arg1); + brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_set_predicate_control_flag_value(p, 0xff); +} + +static void emit_seq( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} + +static void emit_sne( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} +static void emit_slt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} + +static void emit_sle( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); +} + +static void emit_sgt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} + +static void emit_sge( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); +} + +static void emit_max( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg1, arg0); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void emit_min( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +static void emit_math1( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + GLuint precision) +{ + /* There are various odd behaviours with SEND on the simulator. In + * addition there are documented issues with the fact that the GEN4 + * processor doesn't do dependency control properly on SEND + * results. So, on balance, this kludge to get around failures + * with writemasked math results looks like it might be necessary + * whether that turns out to be a simulator bug or not: + */ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + +static void emit_math2( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_message_reg(3), arg1); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + +static void emit_exp_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + + + if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) { + struct brw_reg tmp = get_tmp(c); + struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); + + /* tmp_d = floor(arg0.x) */ + brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); + + /* result[0] = 2.0 ^ tmp */ + + /* Adjust exponent for floating point: + * exp += 127 + */ + brw_ADD(p, brw_writemask(tmp_d, BRW_WRITEMASK_X), tmp_d, brw_imm_d(127)); + + /* Install exponent and sign. + * Excess drops off the edge: + */ + brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), BRW_WRITEMASK_X), + tmp_d, brw_imm_d(23)); + + release_tmp(c, tmp); + } + + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) { + /* result[1] = arg0.x - floor(arg0.x) */ + brw_FRC(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0, 0)); + } + + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) { + /* As with the LOG instruction, we might be better off just + * doing a taylor expansion here, seeing as we have to do all + * the prep work. + * + * If mathbox partial precision is too low, consider also: + * result[3] = result[0] * EXP(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_EXP, + brw_writemask(dst, BRW_WRITEMASK_Z), + brw_swizzle1(arg0, 0), + BRW_MATH_PRECISION_FULL); + } + + if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), brw_imm_f(1)); + } +} + + +static void emit_log_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) { + tmp = get_tmp(c); + tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + } + + /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt + * according to spec: + * + * These almost look likey they could be joined up, but not really + * practical: + * + * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 + * result[1].i = (x.i & ((1<<23)-1) + (127<<23) + */ + if (dst.dw1.bits.writemask & BRW_WRITEMASK_XZ) { + brw_AND(p, + brw_writemask(tmp_ud, BRW_WRITEMASK_X), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1U<<31)-1)); + + brw_SHR(p, + brw_writemask(tmp_ud, BRW_WRITEMASK_X), + tmp_ud, + brw_imm_ud(23)); + + brw_ADD(p, + brw_writemask(tmp, BRW_WRITEMASK_X), + retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ + brw_imm_d(-127)); + } + + if (dst.dw1.bits.writemask & BRW_WRITEMASK_YZ) { + brw_AND(p, + brw_writemask(tmp_ud, BRW_WRITEMASK_Y), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1<<23)-1)); + + brw_OR(p, + brw_writemask(tmp_ud, BRW_WRITEMASK_Y), + tmp_ud, + brw_imm_ud(127<<23)); + } + + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) { + /* result[2] = result[0] + LOG2(result[1]); */ + + /* Why bother? The above is just a hint how to do this with a + * taylor series. Maybe we *should* use a taylor series as by + * the time all the above has been done it's almost certainly + * quicker than calling the mathbox, even with low precision. + * + * Options are: + * - result[0] + mathbox.LOG2(result[1]) + * - mathbox.LOG2(arg0.x) + * - result[0] + inline_taylor_approx(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_LOG, + brw_writemask(tmp, BRW_WRITEMASK_Z), + brw_swizzle1(tmp, 1), + BRW_MATH_PRECISION_FULL); + + brw_ADD(p, + brw_writemask(tmp, BRW_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(tmp, 0)); + } + + if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_W), brw_imm_f(1)); + } + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + +/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 + */ +static void emit_dst_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1) +{ + struct brw_compile *p = &c->func; + + /* There must be a better way to do this: + */ + if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_X), brw_imm_f(1.0)); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) + brw_MUL(p, brw_writemask(dst, BRW_WRITEMASK_Y), arg0, arg1); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Z), arg0); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), arg1); +} + + +static void emit_xpd( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg t, + struct brw_reg u) +{ + brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); + brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); +} + + +static void emit_lit_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_YZ), brw_imm_f(0)); + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_XW), brw_imm_f(1)); + + /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_8); + { + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0,0)); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); + brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(arg0,1)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + emit_math2(c, + BRW_MATH_FUNCTION_POW, + brw_writemask(dst, BRW_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(arg0, 3), + BRW_MATH_PRECISION_PARTIAL); + } + + brw_ENDIF(p, if_insn); + + release_tmp(c, tmp); +} + +static void emit_lrp_noalias(struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + struct brw_reg arg2) +{ + struct brw_compile *p = &c->func; + + brw_ADD(p, dst, negate(arg0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, arg2); + brw_MAC(p, dst, arg0, arg1); +} + +/** 3 or 4-component vector normalization */ +static void emit_nrm( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + int num_comps) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* tmp = dot(arg0, arg0) */ + if (num_comps == 3) + brw_DP3(p, tmp, arg0, arg0); + else + brw_DP4(p, tmp, arg0, arg0); + + /* tmp = 1 / sqrt(tmp) */ + emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL); + + /* dst = arg0 * tmp */ + brw_MUL(p, dst, arg0, tmp); + + release_tmp(c, tmp); +} + + +static struct brw_reg +get_constant(struct brw_vs_compile *c, + GLuint argIndex, + GLuint index, + GLboolean relAddr) +{ + struct brw_compile *p = &c->func; + struct brw_reg const_reg; + struct brw_reg const2_reg; + + assert(argIndex < 3); + + if (c->current_const[argIndex].index != index || relAddr) { + struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0]; + + c->current_const[argIndex].index = index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src.Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + /* need to fetch the constant now */ + brw_dp_READ_4_vs(p, + c->current_const[argIndex].reg,/* writeback dest */ + 0, /* oword */ + relAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + + if (relAddr) { + /* second read */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + relAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + } + } + + const_reg = c->current_const[argIndex].reg; + + if (relAddr) { + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); + } + else { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } + + return const_reg; +} + + + +/* TODO: relative addressing! + */ +static struct brw_reg get_reg( struct brw_vs_compile *c, + enum tgsi_file_type file, + GLuint index ) +{ + switch (file) { + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case TGSI_FILE_CONSTANT: + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + + case TGSI_FILE_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case TGSI_FILE_NULL: /* undef values */ + return brw_null_reg(); + + default: + assert(0); + return brw_null_reg(); + } +} + + +/** + * Indirect addressing: get reg[[arg] + offset]. + */ +static struct brw_reg deref( struct brw_vs_compile *c, + struct brw_reg arg, + GLint offset) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = vec4(get_tmp(c)); + struct brw_reg addr_reg = c->regs[TGSI_FILE_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); + GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; + struct brw_reg indirect = brw_vec4_indirect(0,0); + + { + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + + /* This is pretty clunky - load the address register twice and + * fetch each 4-dword value in turn. There must be a way to do + * this in a single pass, but I couldn't get it to work. + */ + brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); + brw_MOV(p, tmp, indirect); + + brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); + brw_MOV(p, suboffset(tmp, 4), indirect); + + brw_pop_insn_state(p); + } + + /* NOTE: tmp not released */ + return vec8(tmp); +} + + +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + GLuint argIndex, + GLuint file, + GLint index, + GLboolean relAddr ) +{ + + switch (file) { + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + + case TGSI_FILE_IMMEDIATE: + return c->regs[file][index]; + + case TGSI_FILE_CONSTANT: + if (c->vp->use_const_buffer) { + return get_constant(c, argIndex, index, relAddr); + } + else if (relAddr) { + return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index); + } + else { + assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index]; + } + case TGSI_FILE_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case TGSI_FILE_NULL: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + default: + assert(0); + return brw_null_reg(); + } +} + + +static void emit_arl( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ + + if (need_tmp) + release_tmp(c, tmp); +} + + +/** + * Return the brw reg for the given instruction's src argument. + */ +static struct brw_reg get_arg( struct brw_vs_compile *c, + const struct tgsi_full_src_register *src, + GLuint argIndex ) +{ + struct brw_reg reg; + + if (src->Register.File == TGSI_FILE_NULL) + return brw_null_reg(); + + reg = get_src_reg(c, argIndex, + src->Register.File, + src->Register.Index, + src->Register.Indirect); + + /* Convert 3-bit swizzle to 2-bit. + */ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->Register.SwizzleX, + src->Register.SwizzleY, + src->Register.SwizzleZ, + src->Register.SwizzleW); + + reg.negate = src->Register.Negate ? 1 : 0; + + /* XXX: abs, absneg + */ + + return reg; +} + + +/** + * Get brw register for the given program dest register. + */ +static struct brw_reg get_dst( struct brw_vs_compile *c, + unsigned file, + unsigned index, + unsigned writemask ) +{ + struct brw_reg reg; + + switch (file) { + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_OUTPUT: + assert(c->regs[file][index].nr != 0); + reg = c->regs[file][index]; + break; + case TGSI_FILE_ADDRESS: + assert(index == 0); + reg = c->regs[file][index]; + break; + case TGSI_FILE_NULL: + /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ + reg = brw_null_reg(); + break; + default: + assert(0); + reg = brw_null_reg(); + } + + reg.dw1.bits.writemask = writemask; + + return reg; +} + + + + +/** + * Post-vertex-program processing. Send the results to the URB. + */ +static void emit_vertex_write( struct brw_vs_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg m0 = brw_message_reg(0); + struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS]; + struct brw_reg ndc; + int eot; + int i; + GLuint len_vertext_header = 2; + + /* Build ndc coords */ + ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ + brw_MUL(p, brw_writemask(ndc, BRW_WRITEMASK_XYZ), pos, ndc); + + /* Update the header for point size, user clipping flags, and -ve rhw + * workaround. + */ + if (c->prog_data.writes_psiz || + c->key.nr_userclip || + c->chipset.is_965) + { + struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + GLuint i; + + brw_MOV(p, header1, brw_imm_ud(0)); + + brw_set_access_mode(p, BRW_ALIGN_16); + + if (c->prog_data.writes_psiz) { + struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_PSIZ]; + brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); + } + + for (i = 0; i < c->key.nr_userclip; i++) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); + brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<i)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (c->chipset.is_965) { + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); + brw_set_access_mode(p, BRW_ALIGN_16); + + release_tmp(c, header1); + } + else { + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + } + + /* Emit the (interleaved) headers for the two vertices - an 8-reg + * of zeros followed by two sets of NDC coordinates: + */ + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, offset(m0, 2), ndc); + + if (c->chipset.is_igdng) { + /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ + brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ + /* m4, m5 contain the distances from vertex to the user clip planeXXX. + * Seems it is useless for us. + * m6 is used for aligning, so that the remainder of vertex element is + * reg-aligned. + */ + brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */ + len_vertext_header = 6; + } else { + brw_MOV(p, offset(m0, 3), pos); + len_vertext_header = 2; + } + + eot = (c->overflow_count == 0); + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 0, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ + 0, /* response len */ + eot, /* eot */ + eot, /* writes complete */ + 0, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + + /* Not all of the vertex outputs/results fit into the MRF. + * Move the overflowed attributes from the GRF to the MRF and + * issue another brw_urb_WRITE(). + */ + for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) { + unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF); + GLuint j; + + eot = (i + nr >= c->overflow_count); + + /* XXX I'm not 100% sure about which MRF regs to use here. Starting + * at mrf[4] atm... + */ + for (j = 0; j < nr; j++) { + brw_MOV(p, brw_message_reg(4+j), + brw_vec8_grf(c->overflow_grf_start + i + j, 0)); + } + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 4, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + nr+1, /* msg len */ + 0, /* response len */ + eot, /* eot */ + eot, /* writes complete */ + i-1, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + } +} + + +/** + * Called after code generation to resolve subroutine calls and the + * END instruction. + * \param end_inst points to brw code for END instruction + * \param last_inst points to last instruction emitted before vertex write + */ +static void +post_vs_emit( struct brw_vs_compile *c, + struct brw_instruction *end_inst, + struct brw_instruction *last_inst ) +{ + GLint offset; + + brw_resolve_cals(&c->func); + + /* patch up the END code to jump past subroutines, etc */ + offset = last_inst - end_inst; + if (offset > 1) { + brw_set_src1(end_inst, brw_imm_d(offset * 16)); + } else { + end_inst->header.opcode = BRW_OPCODE_NOP; + } +} + +static uint32_t +get_predicate(const struct tgsi_full_instruction *inst) +{ + /* XXX: disabling for now + */ +#if 0 + if (inst->dst.CondMask == COND_TR) + return BRW_PREDICATE_NONE; + + /* All of GLSL only produces predicates for COND_NE and one channel per + * vector. Fail badly if someone starts doing something else, as it might + * mean infinite looping or something. + * + * We'd like to support all the condition codes, but our hardware doesn't + * quite match the Mesa IR, which is modeled after the NV extensions. For + * those, the instruction may update the condition codes or not, then any + * later instruction may use one of those condition codes. For gen4, the + * instruction may update the flags register based on one of the condition + * codes output by the instruction, and then further instructions may + * predicate on that. We can probably support this, but it won't + * necessarily be easy. + */ +/* assert(inst->dst.CondMask == COND_NE); */ + + switch (inst->dst.CondSwizzle) { + case SWIZZLE_XXXX: + return BRW_PREDICATE_ALIGN16_REPLICATE_X; + case SWIZZLE_YYYY: + return BRW_PREDICATE_ALIGN16_REPLICATE_Y; + case SWIZZLE_ZZZZ: + return BRW_PREDICATE_ALIGN16_REPLICATE_Z; + case SWIZZLE_WWWW: + return BRW_PREDICATE_ALIGN16_REPLICATE_W; + default: + debug_printf("Unexpected predicate: 0x%08x\n", + inst->dst.CondMask); + return BRW_PREDICATE_NORMAL; + } +#else + return BRW_PREDICATE_NORMAL; +#endif +} + +static void emit_insn(struct brw_vs_compile *c, + const struct tgsi_full_instruction *inst) +{ + unsigned opcode = inst->Instruction.Opcode; + unsigned label = inst->Label.Label; + struct brw_compile *p = &c->func; + struct brw_reg args[3], dst; + GLuint i; + +#if 0 + printf("%d: ", insn); + _mesa_print_instruction(inst); +#endif + + /* Get argument regs. + */ + for (i = 0; i < 3; i++) { + args[i] = get_arg(c, &inst->Src[i], i); + } + + /* Get dest regs. Note that it is possible for a reg to be both + * dst and arg, given the static allocation of registers. So + * care needs to be taken emitting multi-operation instructions. + */ + dst = get_dst(c, + inst->Dst[0].Register.File, + inst->Dst[0].Register.Index, + inst->Dst[0].Register.WriteMask); + + /* XXX: saturate + */ + if (inst->Instruction.Saturate != TGSI_SAT_NONE) { + debug_printf("Unsupported saturate in vertex shader"); + } + + switch (opcode) { + case TGSI_OPCODE_ABS: + brw_MOV(p, dst, brw_abs(args[0])); + break; + case TGSI_OPCODE_ADD: + brw_ADD(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_COS: + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_DP3: + brw_DP3(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP4: + brw_DP4(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DPH: + brw_DPH(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_NRM: + emit_nrm(c, dst, args[0], 3); + break; + case TGSI_OPCODE_NRM4: + emit_nrm(c, dst, args[0], 4); + break; + case TGSI_OPCODE_DST: + unalias2(c, dst, args[0], args[1], emit_dst_noalias); + break; + case TGSI_OPCODE_EXP: + unalias1(c, dst, args[0], emit_exp_noalias); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_ARL: + emit_arl(c, dst, args[0]); + break; + case TGSI_OPCODE_FLR: + brw_RNDD(p, dst, args[0]); + break; + case TGSI_OPCODE_FRC: + brw_FRC(p, dst, args[0]); + break; + case TGSI_OPCODE_LOG: + unalias1(c, dst, args[0], emit_log_noalias); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_LIT: + unalias1(c, dst, args[0], emit_lit_noalias); + break; + case TGSI_OPCODE_LRP: + unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); + break; + case TGSI_OPCODE_MAD: + brw_MOV(p, brw_acc_reg(), args[2]); + brw_MAC(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MAX: + emit_max(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MIN: + emit_min(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MOV: + brw_MOV(p, dst, args[0]); + break; + case TGSI_OPCODE_MUL: + brw_MUL(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_POW: + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, + brw_swizzle(args[0], 0,0,0,0), BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SIN: + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGE: + emit_sge(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLT: + emit_slt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SUB: + brw_ADD(p, dst, args[0], negate(args[1])); + break; + case TGSI_OPCODE_TRUNC: + /* round toward zero */ + brw_RNDZ(p, dst, args[0]); + break; + case TGSI_OPCODE_XPD: + emit_xpd(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_IF: + assert(c->if_depth < MAX_IF_DEPTH); + c->if_inst[c->if_depth] = brw_IF(p, BRW_EXECUTE_8); + /* Note that brw_IF smashes the predicate_control field. */ + c->if_inst[c->if_depth]->header.predicate_control = get_predicate(inst); + c->if_depth++; + break; + case TGSI_OPCODE_ELSE: + c->if_inst[c->if_depth-1] = brw_ELSE(p, c->if_inst[c->if_depth-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(c->if_depth > 0); + brw_ENDIF(p, c->if_inst[--c->if_depth]); + break; + case TGSI_OPCODE_BGNLOOP: + c->loop_inst[c->loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_BRK: + brw_set_predicate_control(p, get_predicate(inst)); + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CONT: + brw_set_predicate_control(p, get_predicate(inst)); + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + c->loop_depth--; + + if (c->chipset.is_igdng) + br = 2; + + inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > c->loop_inst[c->loop_depth]) { + inst0--; + if (inst0->header.opcode == TGSI_OPCODE_BRK) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == TGSI_OPCODE_CONT) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; + case TGSI_OPCODE_BRA: + brw_set_predicate_control(p, get_predicate(inst)); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1d(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(4)); + brw_save_call(p, label, p->nr_insn); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case TGSI_OPCODE_RET: + brw_ADD(p, get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1d(c->stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + break; + case TGSI_OPCODE_END: + c->end_offset = p->nr_insn; + /* this instruction will get patched later to jump past subroutine + * code, etc. + */ + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case TGSI_OPCODE_BGNSUB: + brw_save_label(p, p->nr_insn, p->nr_insn); + break; + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + default: + debug_printf("Unsupported opcode %i (%s) in vertex shader", + opcode, + tgsi_get_opcode_name(opcode)); + } + + /* Set the predication update on the last instruction of the native + * instruction sequence. + * + * This would be problematic if it was set on a math instruction, + * but that shouldn't be the case with the current GLSL compiler. + */ +#if 0 + /* XXX: disabled + */ + if (inst->CondUpdate) { + struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; + + assert(hw_insn->header.destreg__conditionalmod == 0); + hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; + } +#endif + + release_tmps(c); +} + + +/* Emit the vertex program instructions here. + */ +void brw_vs_emit(struct brw_vs_compile *c) +{ + struct brw_compile *p = &c->func; + const struct tgsi_token *tokens = c->vp->tokens; + struct brw_instruction *end_inst, *last_inst; + struct tgsi_parse_context parse; + struct tgsi_full_instruction *inst; + + if (BRW_DEBUG & DEBUG_VS) + tgsi_dump(c->vp->tokens, 0); + + c->stack_index = brw_indirect(0, 0); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_access_mode(p, BRW_ALIGN_16); + + + /* Static register allocation + */ + brw_vs_alloc_regs(c); + + if (c->vp->has_flow_control) { + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + } + + /* Instructions + */ + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + case TGSI_TOKEN_TYPE_IMMEDIATE: + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + inst = &parse.FullToken.FullInstruction; + emit_insn( c, inst ); + break; + + default: + assert( 0 ); + } + } + tgsi_parse_free( &parse ); + + end_inst = &p->store[c->end_offset]; + last_inst = &p->store[p->nr_insn]; + + /* The END instruction will be patched to jump to this code */ + emit_vertex_write(c); + + post_vs_emit(c, end_inst, last_inst); + + if (BRW_DEBUG & DEBUG_VS) { + debug_printf("vs-native:\n"); + brw_disasm(stderr, p->store, p->nr_insn); + } +} diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c new file mode 100644 index 00000000000..dadbb622e4d --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -0,0 +1,201 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_math.h" + + +#include "brw_debug.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +struct brw_vs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + + unsigned int curbe_offset; + + unsigned int nr_urb_entries, urb_size; + + unsigned int nr_surfaces; +}; + +static void +vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_VS_PROG */ + key->total_grf = brw->vs.prog_data->total_grf; + key->urb_entry_read_length = brw->vs.prog_data->urb_read_length; + key->curb_entry_read_length = brw->vs.prog_data->curb_read_length; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_vs_entries; + key->urb_size = brw->urb.vsize; + + /* BRW_NEW_NR_VS_SURFACES */ + key->nr_surfaces = brw->vs.nr_surfaces; + + /* PIPE_NEW_CLIP */ + if (brw->curr.ucp.nr) { + /* Note that we read in the userclip planes as well, hence + * clip_start: + */ + key->curbe_offset = brw->curbe.clip_start; + } + else { + key->curbe_offset = brw->curbe.vs_start; + } +} + +static enum pipe_error +vs_unit_create_from_key(struct brw_context *brw, + struct brw_vs_unit_key *key, + struct brw_winsys_reloc *reloc, + struct brw_winsys_buffer **bo_out) +{ + enum pipe_error ret; + struct brw_vs_unit_state vs; + int chipset_max_threads; + + memset(&vs, 0, sizeof(vs)); + + vs.thread0.kernel_start_pointer = 0; /* reloc */ + vs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + /* Choosing multiple program flow means that we may get 2-vertex threads, + * which will have the channel mask for dwords 4-7 enabled in the thread, + * and those dwords will be written to the second URB handle when we + * brw_urb_WRITE() results. + */ + vs.thread1.single_program_flow = 0; + + if (BRW_IS_IGDNG(brw)) + vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + vs.thread1.binding_table_entry_count = key->nr_surfaces; + + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; + vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + vs.thread3.dispatch_grf_start_reg = 1; + vs.thread3.urb_entry_read_offset = 0; + vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + + if (BRW_IS_IGDNG(brw)) + vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + else + vs.thread4.nr_urb_entries = key->nr_urb_entries; + + vs.thread4.urb_entry_allocation_size = key->urb_size - 1; + + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 72; + else if (BRW_IS_G4X(brw)) + chipset_max_threads = 32; + else + chipset_max_threads = 16; + + vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, + 1, chipset_max_threads) - 1; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; + + /* No samplers for ARB_vp programs: + */ + /* It has to be set to 0 for IGDNG + */ + vs.vs5.sampler_count = 0; + + if (BRW_DEBUG & DEBUG_STATS) + vs.thread4.stats_enable = 1; + + /* Vertex program always enabled: + */ + vs.vs6.vs_enable = 1; + + ret = brw_upload_cache(&brw->cache, BRW_VS_UNIT, + key, sizeof(*key), + reloc, 1, + &vs, sizeof(vs), + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + +static int prepare_vs_unit(struct brw_context *brw) +{ + struct brw_vs_unit_key key; + enum pipe_error ret; + struct brw_winsys_reloc reloc[1]; + unsigned grf_reg_count; + + vs_unit_populate_key(brw, &key); + + grf_reg_count = (align(key.total_grf, 16) / 16 - 1); + + /* Emit VS program relocation */ + make_reloc(&reloc[0], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); + + + if (brw_search_cache(&brw->cache, BRW_VS_UNIT, + &key, sizeof(key), + reloc, 1, + NULL, + &brw->vs.state_bo)) + return PIPE_OK; + + ret = vs_unit_create_from_key(brw, &key, reloc, &brw->vs.state_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_vs_unit = { + .dirty = { + .mesa = (PIPE_NEW_CLIP), + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_VS_PROG + }, + .prepare = prepare_vs_unit, +}; diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c new file mode 100644 index 00000000000..177a5170d23 --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -0,0 +1,232 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_winsys.h" + +/* XXX: disabled true constant buffer functionality + */ + + +/* Creates a new VS constant buffer reflecting the current VS program's + * constants, if needed by the VS program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +#if 0 +static struct brw_winsys_buffer * +brw_vs_update_constant_buffer(struct brw_context *brw) +{ + /* XXX: true constant buffers + */ + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) + return NULL; + + const_buffer = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_SHADER_CONSTANTS, + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + brw->sws->bo_subdata(const_buffer, 0, size, params->ParameterValues, + NULL, 0); + + return const_buffer; +} +#endif + +/** + * Update the surface state for a VS constant buffer. + * + * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. + */ +#if 0 +static void +brw_update_vs_constant_surface( struct brw_context *brw, + GLuint surf) +{ + struct brw_surface_key key; + struct pipe_buffer *cb = brw->curr.vs_constants; + enum pipe_error ret; + + assert(surf == 0); + + /* If we're in this state update atom, we need to update VS constants, so + * free the old buffer and create a new one for the new contents. + */ + ret = brw_vs_update_constant_buffer(brw, &vp->const_buffer); + if (ret) + return ret; + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (vp->const_buffer == NULL) { + bo_reference(brw->vs.surf_bo[surf], NULL); + return PIPE_OK; + } + + memset(&key, 0, sizeof(key)); + + key.format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.bo = vp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + if (brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL, + &brw->vs.surf_bo[surf])) + return PIPE_OK; + + ret = brw_create_constant_surface(brw, &key + &brw->vs.surf_bo[surf]); + if (ret) + return ret; + + return PIPE_OK; +} +#endif + + +/** + * Constructs the binding table for the VS surface state. + */ +static enum pipe_error +brw_vs_get_binding_table(struct brw_context *brw, + struct brw_winsys_buffer **bo_out) +{ +#if 0 + static GLuint data[BRW_VS_MAX_SURF]; /* always zero */ + struct brw_winsys_reloc reloc[BRW_VS_MAX_SURF]; + int i; + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + make_reloc(&reloc[i], + BRW_USAGE_STATE, + 0, + i * 4, + brw->vs.surf_bo[i]); + } + + ret = brw_cache_data( &brw->surface_cache, + BRW_SS_SURF_BIND, + NULL, 0, + reloc, nr_reloc, + data, sizeof data, + NULL, NULL, + bo_out); + if (ret) + return ret; + + FREE(data); + return PIPE_OK; +#else + return PIPE_OK; +#endif +} + +/** + * Vertex shader surfaces (constant buffer). + * + * This consumes the state updates for the constant buffer needing + * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and + * CACHE_NEW_SURF_BIND for the binding table upload. + */ +static enum pipe_error prepare_vs_surfaces(struct brw_context *brw ) +{ + enum pipe_error ret; + +#if 0 + int i; + int nr_surfaces = 0; + + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + nr_surfaces = i + 1; + } + } + + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } +#endif + + /* Note that we don't end up updating the bind_bo if we don't have a + * surface to be pointing at. This should be relatively harmless, as it + * just slightly increases our working set size. + */ + if (brw->vs.nr_surfaces != 0) { + ret = brw_vs_get_binding_table(brw, &brw->vs.bind_bo); + if (ret) + return ret; + } + + return PIPE_OK; +} + +const struct brw_tracked_state brw_vs_surfaces = { + .dirty = { + .mesa = (PIPE_NEW_VERTEX_CONSTANTS | + PIPE_NEW_VERTEX_SHADER), + .brw = 0, + .cache = 0 + }, + .prepare = prepare_vs_surfaces, +}; + + + diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h new file mode 100644 index 00000000000..a242e31218a --- /dev/null +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -0,0 +1,309 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_WINSYS_H +#define BRW_WINSYS_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "pipe/p_refcnt.h" + +struct brw_winsys; +struct pipe_fence_handle; + +/* Not sure why the winsys needs this: + */ +#define BRW_BATCH_SIZE (32*1024) + +struct brw_winsys_screen; + +/* Need a tiny bit of information inside the abstract buffer struct: + */ +struct brw_winsys_buffer { + struct pipe_reference reference; + struct brw_winsys_screen *sws; + unsigned size; +}; + + +/* Should be possible to validate usages above against buffer creation + * types, below: + */ +enum brw_buffer_type +{ + BRW_BUFFER_TYPE_TEXTURE, + BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */ + BRW_BUFFER_TYPE_VERTEX, + BRW_BUFFER_TYPE_CURBE, + BRW_BUFFER_TYPE_QUERY, + BRW_BUFFER_TYPE_SHADER_CONSTANTS, + BRW_BUFFER_TYPE_SHADER_SCRATCH, + BRW_BUFFER_TYPE_BATCH, + BRW_BUFFER_TYPE_GENERAL_STATE, + BRW_BUFFER_TYPE_SURFACE_STATE, + BRW_BUFFER_TYPE_PIXEL, /* image uploads, pbo's, etc */ + BRW_BUFFER_TYPE_GENERIC, /* unknown */ + BRW_BUFFER_TYPE_MAX /* Count of possible values */ +}; + + +/* Describe the usage of a particular buffer in a relocation. The DRM + * winsys will translate these back to GEM read/write domain flags. + */ +enum brw_buffer_usage { + BRW_USAGE_STATE, /* INSTRUCTION, 0 */ + BRW_USAGE_QUERY_RESULT, /* INSTRUCTION, INSTRUCTION */ + BRW_USAGE_RENDER_TARGET, /* RENDER, 0 */ + BRW_USAGE_DEPTH_BUFFER, /* RENDER, RENDER */ + BRW_USAGE_BLIT_SOURCE, /* RENDER, 0 */ + BRW_USAGE_BLIT_DEST, /* RENDER, RENDER */ + BRW_USAGE_SAMPLER, /* SAMPLER, 0 */ + BRW_USAGE_VERTEX, /* VERTEX, 0 */ + BRW_USAGE_SCRATCH, /* 0, 0 */ + BRW_USAGE_MAX +}; + +enum brw_buffer_data_type { + BRW_DATA_GS_CC_VP, + BRW_DATA_GS_CC_UNIT, + BRW_DATA_GS_WM_PROG, + BRW_DATA_GS_SAMPLER_DEFAULT_COLOR, + BRW_DATA_GS_SAMPLER, + BRW_DATA_GS_WM_UNIT, + BRW_DATA_GS_SF_PROG, + BRW_DATA_GS_SF_VP, + BRW_DATA_GS_SF_UNIT, + BRW_DATA_GS_VS_UNIT, + BRW_DATA_GS_VS_PROG, + BRW_DATA_GS_GS_UNIT, + BRW_DATA_GS_GS_PROG, + BRW_DATA_GS_CLIP_VP, + BRW_DATA_GS_CLIP_UNIT, + BRW_DATA_GS_CLIP_PROG, + BRW_DATA_SS_SURFACE, + BRW_DATA_SS_SURF_BIND, + BRW_DATA_CONSTANT_BUFFER, + BRW_DATA_BATCH_BUFFER, + BRW_DATA_OTHER, + BRW_DATA_MAX +}; + + +/* Matches the i915_drm definitions: + */ +#define BRW_TILING_NONE 0 +#define BRW_TILING_X 1 +#define BRW_TILING_Y 2 + + +/* Relocations to be applied with subdata in a call to sws->bo_subdata, below. + * + * Effectively this encodes: + * + * (unsigned *)(subdata + offset) = bo->offset + delta + */ +struct brw_winsys_reloc { + enum brw_buffer_usage usage; /* debug only */ + unsigned delta; + unsigned offset; + struct brw_winsys_buffer *bo; +}; + +static INLINE void make_reloc(struct brw_winsys_reloc *reloc, + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *bo) +{ + reloc->usage = usage; + reloc->delta = delta; + reloc->offset = offset; + reloc->bo = bo; /* Note - note taking a reference yet */ +} + + + +struct brw_winsys_screen { + + + /** + * Buffer functions. + */ + + /*@{*/ + /** + * Create a buffer. + */ + enum pipe_error (*bo_alloc)(struct brw_winsys_screen *sws, + enum brw_buffer_type type, + unsigned size, + unsigned alignment, + struct brw_winsys_buffer **bo_out); + + /* Destroy a buffer when our refcount goes to zero: + */ + void (*bo_destroy)(struct brw_winsys_buffer *buffer); + + /* delta -- added to b2->offset, and written into buffer + * offset -- location above value is written to within buffer + */ + enum pipe_error (*bo_emit_reloc)(struct brw_winsys_buffer *buffer, + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *b2); + + enum pipe_error (*bo_exec)(struct brw_winsys_buffer *buffer, + unsigned bytes_used); + + enum pipe_error (*bo_subdata)(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, + size_t offset, + size_t size, + const void *data, + const struct brw_winsys_reloc *reloc, + unsigned nr_reloc ); + + boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer); + boolean (*bo_references)(struct brw_winsys_buffer *a, + struct brw_winsys_buffer *b); + + /* XXX: couldn't this be handled by returning true/false on + * bo_emit_reloc? + */ + enum pipe_error (*check_aperture_space)(struct brw_winsys_screen *iws, + struct brw_winsys_buffer **buffers, + unsigned count); + + /** + * Map a buffer. + */ + void *(*bo_map)(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, + unsigned offset, + unsigned length, + boolean write, + boolean discard, + boolean flush_explicit); + + void (*bo_flush_range)(struct brw_winsys_buffer *buffer, + unsigned offset, + unsigned length); + + /** + * Unmap a buffer. + */ + void (*bo_unmap)(struct brw_winsys_buffer *buffer); + /*@}*/ + + + /* Wait for buffer to go idle. Similar to map+unmap, but doesn't + * mark buffer contents as dirty. + */ + void (*bo_wait_idle)(struct brw_winsys_buffer *buffer); + + /** + * Destroy the winsys. + */ + void (*destroy)(struct brw_winsys_screen *iws); +}; + +static INLINE void * +bo_map_read(struct brw_winsys_screen *sws, struct brw_winsys_buffer *buf) +{ + return sws->bo_map( buf, + BRW_DATA_OTHER, + 0, buf->size, + FALSE, FALSE, FALSE ); +} + +static INLINE void +bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf) +{ + struct brw_winsys_buffer *old_buf = *ptr; + + if (pipe_reference(&(*ptr)->reference, &buf->reference)) + old_buf->sws->bo_destroy(old_buf); + + *ptr = buf; +} + + +/** + * Create brw pipe_screen. + */ +struct pipe_screen *brw_create_screen(struct brw_winsys_screen *iws, unsigned pci_id); + +/** + * Create a brw pipe_context. + */ +struct pipe_context *brw_create_context(struct pipe_screen *screen); + +/** + * Get the brw_winsys buffer backing the texture. + * + * TODO UGLY + */ +struct pipe_texture; +boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, + struct brw_winsys_buffer **buffer, + unsigned *stride); + +/** + * Wrap a brw_winsys buffer with a texture blanket. + * + * TODO UGLY + */ +struct pipe_texture * +brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, + const struct pipe_texture *template, + unsigned pitch, + unsigned tiling, + struct brw_winsys_buffer *buffer); + + +/************************************************************************* + * Cooperative dumping between winsys and driver. TODO: make this + * driver-only by wrapping calls to winsys->bo_subdata(). + */ + +#ifdef DEBUG +extern int BRW_DUMP; +#else +#define BRW_DUMP 0 +#endif + +#define DUMP_ASM 0x1 +#define DUMP_STATE 0x2 +#define DUMP_BATCH 0x4 + +void brw_dump_data( unsigned pci_id, + enum brw_buffer_data_type data_type, + unsigned offset, + const void *data, + size_t size ); + + +#endif diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c new file mode 100644 index 00000000000..f8f6a539bc9 --- /dev/null +++ b/src/gallium/drivers/i965/brw_winsys_debug.c @@ -0,0 +1,87 @@ +#include "brw_winsys.h" +#include "brw_disasm.h" +#include "brw_structs_dump.h" +#include "brw_structs.h" +#include "intel_decode.h" + + +void brw_dump_data( unsigned pci_id, + enum brw_buffer_data_type data_type, + unsigned offset, + const void *data, + size_t size ) +{ + if (BRW_DUMP & DUMP_ASM) { + switch (data_type) { + case BRW_DATA_GS_WM_PROG: + case BRW_DATA_GS_SF_PROG: + case BRW_DATA_GS_VS_PROG: + case BRW_DATA_GS_GS_PROG: + case BRW_DATA_GS_CLIP_PROG: + brw_disasm( stderr, data, size / sizeof(struct brw_instruction) ); + break; + default: + break; + } + } + + if (BRW_DUMP & DUMP_STATE) { + switch (data_type) { + case BRW_DATA_GS_CC_VP: + brw_dump_cc_viewport( data ); + break; + case BRW_DATA_GS_CC_UNIT: + brw_dump_cc_unit_state( data ); + break; + case BRW_DATA_GS_SAMPLER_DEFAULT_COLOR: + brw_dump_sampler_default_color( data ); + break; + case BRW_DATA_GS_SAMPLER: + brw_dump_sampler_state( data ); + break; + case BRW_DATA_GS_WM_UNIT: + brw_dump_wm_unit_state( data ); + break; + case BRW_DATA_GS_SF_VP: + brw_dump_sf_viewport( data ); + break; + case BRW_DATA_GS_SF_UNIT: + brw_dump_sf_unit_state( data ); + break; + case BRW_DATA_GS_VS_UNIT: + brw_dump_vs_unit_state( data ); + break; + case BRW_DATA_GS_GS_UNIT: + brw_dump_gs_unit_state( data ); + break; + case BRW_DATA_GS_CLIP_VP: + brw_dump_clipper_viewport( data ); + break; + case BRW_DATA_GS_CLIP_UNIT: + brw_dump_clip_unit_state( data ); + break; + case BRW_DATA_SS_SURFACE: + brw_dump_surface_state( data ); + break; + case BRW_DATA_SS_SURF_BIND: + break; + case BRW_DATA_OTHER: + break; + case BRW_DATA_CONSTANT_BUFFER: + break; + default: + break; + } + } + + if (BRW_DUMP & DUMP_BATCH) { + switch (data_type) { + case BRW_DATA_BATCH_BUFFER: + intel_decode(data, size / 4, offset, pci_id); + break; + default: + break; + } + } +} + diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c new file mode 100644 index 00000000000..fdf820a9aae --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm.c @@ -0,0 +1,319 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ +#include "tgsi/tgsi_info.h" + +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "brw_state.h" +#include "brw_debug.h" +#include "brw_pipe_rast.h" + + +/** Return number of src args for given instruction */ +GLuint brw_wm_nr_args( GLuint opcode ) +{ + switch (opcode) { + case WM_FRONTFACING: + case WM_PIXELXY: + return 0; + case WM_CINTERP: + case WM_WPOSXY: + case WM_DELTAXY: + return 1; + case WM_LINTERP: + case WM_PIXELW: + return 2; + case WM_FB_WRITE: + case WM_PINTERP: + return 3; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + /* sampler arg is held as a field in the instruction, not in an + * actual register: + */ + return tgsi_get_opcode_info(opcode)->num_src - 1; + + default: + assert(opcode < MAX_OPCODE); + return tgsi_get_opcode_info(opcode)->num_src; + } +} + + +GLuint brw_wm_is_scalar_result( GLuint opcode ) +{ + switch (opcode) { + case TGSI_OPCODE_COS: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_DST: + return 1; + + default: + return 0; + } +} + + +/** + * Do GPU code generation for shaders without flow control. Shaders + * without flow control instructions can more readily be analysed for + * SSA-style optimizations. + */ +static void +brw_wm_linear_shader_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + * Divide by two because we operate on 16 pixels at a time and require + * two GRF entries for each logical shader register. + */ + c->grf_limit = BRW_WM_MAX_GRF / 2; + + brw_wm_pass2(c); + + /* how many general-purpose registers are used */ + c->prog_data.total_grf = c->max_wm_grf; + + /* Scratch space is used for register spilling */ + if (c->last_scratch) { + c->prog_data.total_scratch = c->last_scratch + 0x40; + } + else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); +} + + +/** + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. + */ +static enum pipe_error do_wm_prog( struct brw_context *brw, + struct brw_fragment_shader *fp, + struct brw_wm_prog_key *key, + struct brw_winsys_buffer **bo_out) +{ + enum pipe_error ret; + struct brw_wm_compile *c; + const GLuint *program; + GLuint program_size; + + if (brw->wm.compile_data == NULL) { + brw->wm.compile_data = MALLOC(sizeof(*brw->wm.compile_data)); + if (!brw->wm.compile_data) + return PIPE_ERROR_OUT_OF_MEMORY; + } + + c = brw->wm.compile_data; + memset(c, 0, sizeof *c); + + c->key = *key; + c->fp = fp; + c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/ + + brw_init_compile(brw, &c->func); + + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + if (fp->has_flow_control) { + c->dispatch_width = 8; + /* XXX: GLSL support + */ + exit(1); + /* brw_wm_branching_shader_emit(brw, c); */ + } + else { + c->dispatch_width = 16; + brw_wm_linear_shader_emit(brw, c); + } + + if (BRW_DEBUG & DEBUG_WM) + debug_printf("\n"); + + /* get the program + */ + ret = brw_get_program(&c->func, &program, &program_size); + if (ret) + return ret; + + ret = brw_upload_cache( &brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + &brw->wm.prog_data, + bo_out ); + if (ret) + return ret; + + return PIPE_OK; +} + + + +static void brw_wm_populate_key( struct brw_context *brw, + struct brw_wm_prog_key *key ) +{ + unsigned lookup, line_aa; + unsigned i; + + memset(key, 0, sizeof(*key)); + + /* PIPE_NEW_FRAGMENT_SHADER + * PIPE_NEW_DEPTH_STENCIL_ALPHA + */ + lookup = (brw->curr.zstencil->iz_lookup | + brw->curr.fragment_shader->iz_lookup); + + + /* PIPE_NEW_RAST + * BRW_NEW_REDUCED_PRIMITIVE + */ + switch (brw->reduced_primitive) { + case PIPE_PRIM_POINTS: + line_aa = AA_NEVER; + break; + case PIPE_PRIM_LINES: + line_aa = (brw->curr.rast->templ.line_smooth ? + AA_ALWAYS : AA_NEVER); + break; + default: + line_aa = brw->curr.rast->unfilled_aa_line; + break; + } + + brw_wm_lookup_iz(line_aa, + lookup, + brw->curr.fragment_shader->uses_depth, + key); + + /* PIPE_NEW_RAST */ + key->flat_shade = brw->curr.rast->templ.flatshade; + + + /* PIPE_NEW_BOUND_TEXTURES */ + for (i = 0; i < brw->curr.num_textures; i++) { + const struct brw_texture *tex = brw_texture(brw->curr.texture[i]); + + if (tex->base.format == PIPE_FORMAT_YCBCR) + key->yuvtex_mask |= 1 << i; + + if (tex->base.format == PIPE_FORMAT_YCBCR_REV) + key->yuvtex_swap_mask |= 1 << i; + + /* XXX: shadow texture + */ + /* key->shadowtex_mask |= 1<<i; */ + } + + /* CACHE_NEW_VS_PROG */ + key->vp_nr_outputs = brw->vs.prog_data->nr_outputs; + + key->nr_cbufs = brw->curr.fb.nr_cbufs; + + key->nr_inputs = brw->curr.fragment_shader->info.num_inputs; + + /* The unique fragment program ID */ + key->program_string_id = brw->curr.fragment_shader->id; +} + + +static enum pipe_error brw_prepare_wm_prog(struct brw_context *brw) +{ + struct brw_wm_prog_key key; + struct brw_fragment_shader *fs = brw->curr.fragment_shader; + enum pipe_error ret; + + brw_wm_populate_key(brw, &key); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + NULL, 0, + &brw->wm.prog_data, + &brw->wm.prog_bo)) + return PIPE_OK; + + ret = do_wm_prog(brw, fs, &key, &brw->wm.prog_bo); + if (ret) + return ret; + + return PIPE_OK; +} + + +const struct brw_tracked_state brw_wm_prog = { + .dirty = { + .mesa = (PIPE_NEW_FRAGMENT_SHADER | + PIPE_NEW_DEPTH_STENCIL_ALPHA | + PIPE_NEW_RAST | + PIPE_NEW_NR_CBUFS | + PIPE_NEW_BOUND_TEXTURES), + .brw = (BRW_NEW_WM_INPUT_DIMENSIONS | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG, + }, + .prepare = brw_prepare_wm_prog +}; + diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h new file mode 100644 index 00000000000..f1ca9f63696 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm.h @@ -0,0 +1,344 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_WM_H +#define BRW_WM_H + +#include "brw_context.h" +#include "brw_eu.h" + +#define SATURATE (1<<5) + +/* A big lookup table is used to figure out which and how many + * additional regs will inserted before the main payload in the WM + * program execution. These mainly relate to depth and stencil + * processing and the early-depth-test optimization. + */ +#define IZ_PS_KILL_ALPHATEST_BIT 0x1 +#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 +#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 +#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 +#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 +#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 +#define IZ_BIT_MAX 0x40 + +#define AA_NEVER 0 +#define AA_SOMETIMES 1 +#define AA_ALWAYS 2 + +struct brw_wm_prog_key { + GLuint source_depth_reg:3; + GLuint aa_dest_stencil_reg:3; + GLuint dest_depth_reg:3; + GLuint nr_depth_regs:3; + GLuint computes_depth:1; + GLuint source_depth_to_render_target:1; + GLuint flat_shade:1; + GLuint runtime_check_aads_emit:1; + + GLuint shadowtex_mask:16; + GLuint yuvtex_mask:16; + GLuint yuvtex_swap_mask:16; /* UV swaped */ + + GLuint vp_nr_outputs:6; + GLuint nr_inputs:6; + GLuint nr_cbufs:3; + GLuint has_flow_control:1; + + GLuint program_string_id; +}; + + +/* A bit of a glossary: + * + * brw_wm_value: A computed value or program input. Values are + * constant, they are created once and are never modified. When a + * fragment program register is written or overwritten, new values are + * created fresh, preserving the rule that values are constant. + * + * brw_wm_ref: A reference to a value. Wherever a value used is by an + * instruction or as a program output, that is tracked with an + * instance of this struct. All references to a value occur after it + * is created. After the last reference, a value is dead and can be + * discarded. + * + * brw_wm_grf: Represents a physical hardware register. May be either + * empty or hold a value. Register allocation is the process of + * assigning values to grf registers. This occurs in pass2 and the + * brw_wm_grf struct is not used before that. + * + * Fragment program registers: These are time-varying constructs that + * are hard to reason about and which we translate away in pass0. A + * single fragment program register element (eg. temp[0].x) will be + * translated to one or more brw_wm_value structs, one for each time + * that temp[0].x is written to during the program. + */ + + + +/* Used in pass2 to track register allocation. + */ +struct brw_wm_grf { + struct brw_wm_value *value; + GLuint nextuse; +}; + +struct brw_wm_value { + struct brw_reg hw_reg; /* emitted to this reg, may not always be there */ + struct brw_wm_ref *lastuse; + struct brw_wm_grf *resident; + GLuint contributes_to_output:1; + GLuint spill_slot:16; /* if non-zero, spill immediately after calculation */ +}; + +struct brw_wm_ref { + struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */ + struct brw_wm_value *value; + struct brw_wm_ref *prevuse; + GLuint unspill_reg:7; /* unspill to reg */ + GLuint emitted:1; + GLuint insn:24; +}; + +struct brw_wm_instruction { + struct brw_wm_value *dst[4]; + struct brw_wm_ref *src[3][4]; + GLuint opcode:8; + GLuint saturate:1; + GLuint writemask:4; + GLuint sampler:4; + GLuint tex_unit:4; /* texture/sampler unit for texture instructions */ + GLuint target:4; /* TGSI_TEXTURE_x for texture instructions, + * target binding table index for FB_WRITE + */ + GLuint eot:1; /* End of thread indicator for FB_WRITE*/ +}; + + +#define BRW_WM_MAX_INSN 2048 +#define BRW_WM_MAX_GRF 128 /* hardware limit */ +#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) +#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) +#define BRW_WM_MAX_PARAM 256 +#define BRW_WM_MAX_CONST 256 +#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS +#define BRW_WM_MAX_SUBROUTINE 16 + + +/* New opcodes to track internal operations required for WM unit. + * These are added early so that the registers used can be tracked, + * freed and reused like those of other instructions. + */ +#define MAX_OPCODE TGSI_OPCODE_LAST +#define WM_PIXELXY (MAX_OPCODE) +#define WM_DELTAXY (MAX_OPCODE + 1) +#define WM_PIXELW (MAX_OPCODE + 2) +#define WM_LINTERP (MAX_OPCODE + 3) +#define WM_PINTERP (MAX_OPCODE + 4) +#define WM_CINTERP (MAX_OPCODE + 5) +#define WM_WPOSXY (MAX_OPCODE + 6) +#define WM_FB_WRITE (MAX_OPCODE + 7) +#define WM_FRONTFACING (MAX_OPCODE + 8) +#define MAX_WM_OPCODE (MAX_OPCODE + 9) + +#define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT) +#define PAYLOAD_DEPTH (PIPE_MAX_SHADER_INPUTS) /* ?? */ + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +struct brw_fp_src { + unsigned file:4; + unsigned index:16; + unsigned swizzle:8; + unsigned indirect:1; + unsigned negate:1; + unsigned abs:1; +}; + +struct brw_fp_dst { + unsigned file:4; + unsigned index:16; + unsigned writemask:4; + unsigned indirect:1; + unsigned saturate:1; +}; + +struct brw_fp_instruction { + struct brw_fp_dst dst; + struct brw_fp_src src[3]; + unsigned opcode:8; + unsigned target:8; /* XXX: special usage for FB_WRITE */ + unsigned tex_unit:4; + unsigned sampler:4; + unsigned pad:8; +}; + + +struct brw_wm_compile { + struct brw_compile func; + struct brw_wm_prog_key key; + struct brw_wm_prog_data prog_data; + + struct brw_fragment_shader *fp; + + GLfloat (*env_param)[4]; + + enum { + START, + PASS2_DONE + } state; + + /* Initial pass - translate fp instructions to fp instructions, + * simplifying and adding instructions for interpolation and + * framebuffer writes. + */ + struct { + GLfloat v[4]; + unsigned nr; + } immediate[BRW_WM_MAX_CONST+3]; + GLuint nr_immediates; + + struct brw_fp_instruction fp_instructions[BRW_WM_MAX_INSN]; + GLuint nr_fp_insns; + GLuint fp_temp; + GLuint fp_interp_emitted; + GLuint fp_fragcolor_emitted; + GLuint fp_first_internal_temp; + + struct brw_fp_src fp_pixel_xy; + struct brw_fp_src fp_delta_xy; + struct brw_fp_src fp_pixel_w; + + + /* Subsequent passes using SSA representation: + */ + struct brw_wm_value vreg[BRW_WM_MAX_VREG]; + GLuint nr_vreg; + + struct brw_wm_value creg[BRW_WM_MAX_PARAM]; + GLuint nr_creg; + + struct { + struct brw_wm_value depth[4]; /* includes r0/r1 */ + struct brw_wm_value input_interp[PIPE_MAX_SHADER_INPUTS]; + } payload; + + + const struct brw_wm_ref *pass0_fp_reg[BRW_FILE_PAYLOAD+1][256][4]; + + struct brw_wm_ref undef_ref; + struct brw_wm_value undef_value; + + struct brw_wm_ref refs[BRW_WM_MAX_REF]; + GLuint nr_refs; + + struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; + GLuint nr_insns; + + struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; + + GLuint grf_limit; + GLuint max_wm_grf; + GLuint last_scratch; + + GLuint cur_inst; /**< index of current instruction */ + + GLboolean out_of_regs; /**< ran out of GRF registers? */ + + /** Mapping from Mesa registers to hardware registers */ + struct { + GLboolean inited; + struct brw_reg reg; + } wm_regs[BRW_FILE_PAYLOAD+1][256][4]; + + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; + struct brw_reg stack; + struct brw_reg emit_mask_reg; + GLuint tmp_regs[BRW_WM_MAX_GRF]; + GLuint tmp_index; + GLuint tmp_max; + GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + GLuint dispatch_width; + + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; + + GLuint error; +}; + + +GLuint brw_wm_nr_args( GLuint opcode ); +GLuint brw_wm_is_scalar_result( GLuint opcode ); + +int brw_wm_pass_fp( struct brw_wm_compile *c ); +void brw_wm_pass0( struct brw_wm_compile *c ); +void brw_wm_pass1( struct brw_wm_compile *c ); +void brw_wm_pass2( struct brw_wm_compile *c ); +void brw_wm_emit( struct brw_wm_compile *c ); + +void brw_wm_print_value( struct brw_wm_compile *c, + struct brw_wm_value *value ); + +void brw_wm_print_ref( struct brw_wm_compile *c, + struct brw_wm_ref *ref ); + +void brw_wm_print_insn( struct brw_wm_compile *c, + struct brw_wm_instruction *inst ); + +void brw_wm_print_program( struct brw_wm_compile *c, + const char *stage ); + +void brw_wm_print_fp_program( struct brw_wm_compile *c, + const char *stage ); + +void brw_wm_lookup_iz( GLuint line_aa, + GLuint lookup, + GLboolean ps_uses_depth, + struct brw_wm_prog_key *key ); + +void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c); + +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0); + +#endif diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c new file mode 100644 index 00000000000..6434c6acf73 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c @@ -0,0 +1,165 @@ +/* XXX: Constant buffers disabled + */ + + +/** + * Create the constant buffer surface. Vertex/fragment shader constants will be + * read from this buffer with Data Port Read instructions/messages. + */ +enum pipe_error +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key, + struct brw_winsys_buffer **bo_out ) +{ + const GLint w = key->width - 1; + struct brw_winsys_buffer *bo; + struct brw_winsys_reloc reloc[1]; + enum pipe_error ret; + + /* Emit relocation to surface contents */ + make_reloc(&reloc[0], + BRW_USAGE_SAMPLER, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = BRW_SURFACE_BUFFER; + surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + surf.ss1.base_addr = 0; /* reloc */ + + surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ + + ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, + key, sizeof(*key), + reloc, Elements(reloc), + &surf, sizeof(surf), + NULL, NULL, + &bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + + + +/** + * Update the surface state for a WM constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static enum pipe_error +brw_update_wm_constant_surface( struct brw_context *brw, + GLuint surf) +{ + struct brw_surface_key key; + struct brw_fragment_shader *fp = brw->curr.fragment_shader; + struct pipe_buffer *cbuf = brw->curr.fragment_constants; + int pitch = cbuf->size / (4 * sizeof(float)); + enum pipe_error ret; + + /* If we're in this state update atom, we need to update WM constants, so + * free the old buffer and create a new one for the new contents. + */ + ret = brw_wm_update_constant_buffer(brw, &fp->const_buffer); + if (ret) + return ret; + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (cbuf == NULL) { + bo_reference(&brw->wm.surf_bo[surf], NULL); + return PIPE_OK; + } + + memset(&key, 0, sizeof(key)); + + key.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + key.ss0.surface_type = BRW_SURFACE_BUFFER; + key.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + key.bo = brw_buffer(cbuf)->bo; + + key.ss2.width = (pitch-1) & 0x7f; /* bits 6:0 of size or width */ + key.ss2.height = ((pitch-1) >> 7) & 0x1fff; /* bits 19:7 of size or width */ + key.ss3.depth = ((pitch-1) >> 20) & 0x7f; /* bits 26:20 of size or width */ + key.ss3.pitch = (pitch * 4 * sizeof(float)) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ + + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + if (brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, 1, + NULL, + &brw->wm.surf_bo[surf])) + return PIPE_OK; + + ret = brw_create_constant_surface(brw, &key, &brw->wm.surf_bo[surf]); + if (ret) + return ret; + + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + return PIPE_OK; +} + +/** + * Updates surface / buffer for fragment shader constant buffer, if + * one is required. + * + * This consumes the state updates for the constant buffer, and produces + * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for + * inclusion in the binding table. + */ +static enum pipe_error prepare_wm_constant_surface(struct brw_context *brw ) +{ + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; + + ret = brw_wm_update_constant_buffer(brw, + &fp->const_buffer); + if (ret) + return ret; + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + if (brw->wm.surf_bo[surf] != NULL) { + bo_reference(&brw->wm.surf_bo[surf], NULL); + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + } + return PIPE_OK; + } + + ret = brw_update_wm_constant_surface(ctx, surf); + if (ret) + return ret; + + return PIPE_OK +} + +const struct brw_tracked_state brw_wm_constant_surface = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constant_surface, +}; diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c new file mode 100644 index 00000000000..3d11fa074cc --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -0,0 +1,256 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "tgsi/tgsi_info.h" + +#include "brw_context.h" +#include "brw_wm.h" + +static void print_writemask( unsigned writemask ) +{ + if (writemask != BRW_WRITEMASK_XYZW) + debug_printf(".%s%s%s%s", + (writemask & BRW_WRITEMASK_X) ? "x" : "", + (writemask & BRW_WRITEMASK_Y) ? "y" : "", + (writemask & BRW_WRITEMASK_Z) ? "z" : "", + (writemask & BRW_WRITEMASK_W) ? "w" : ""); +} + +static void print_swizzle( unsigned swizzle ) +{ + char *swz = "xyzw"; + if (swizzle != BRW_SWIZZLE_XYZW) + debug_printf(".%c%c%c%c", + swz[BRW_GET_SWZ(swizzle, X)], + swz[BRW_GET_SWZ(swizzle, Y)], + swz[BRW_GET_SWZ(swizzle, Z)], + swz[BRW_GET_SWZ(swizzle, W)]); +} + +static void print_opcode( unsigned opcode ) +{ + switch (opcode) { + case WM_PIXELXY: + debug_printf("PIXELXY"); + break; + case WM_DELTAXY: + debug_printf("DELTAXY"); + break; + case WM_PIXELW: + debug_printf("PIXELW"); + break; + case WM_WPOSXY: + debug_printf("WPOSXY"); + break; + case WM_PINTERP: + debug_printf("PINTERP"); + break; + case WM_LINTERP: + debug_printf("LINTERP"); + break; + case WM_CINTERP: + debug_printf("CINTERP"); + break; + case WM_FB_WRITE: + debug_printf("FB_WRITE"); + break; + case WM_FRONTFACING: + debug_printf("FRONTFACING"); + break; + default: + debug_printf("%s", tgsi_get_opcode_info(opcode)->mnemonic); + break; + } +} + +void brw_wm_print_value( struct brw_wm_compile *c, + struct brw_wm_value *value ) +{ + assert(value); + if (c->state >= PASS2_DONE) + brw_print_reg(value->hw_reg); + else if( value == &c->undef_value ) + debug_printf("undef"); + else if( value - c->vreg >= 0 && + value - c->vreg < BRW_WM_MAX_VREG) + debug_printf("r%d", value - c->vreg); + else if (value - c->creg >= 0 && + value - c->creg < BRW_WM_MAX_PARAM) + debug_printf("c%d", value - c->creg); + else if (value - c->payload.input_interp >= 0 && + value - c->payload.input_interp < PIPE_MAX_SHADER_INPUTS) + debug_printf("i%d", value - c->payload.input_interp); + else if (value - c->payload.depth >= 0 && + value - c->payload.depth < PIPE_MAX_SHADER_INPUTS) + debug_printf("d%d", value - c->payload.depth); + else + debug_printf("?"); +} + +void brw_wm_print_ref( struct brw_wm_compile *c, + struct brw_wm_ref *ref ) +{ + struct brw_reg hw_reg = ref->hw_reg; + + if (ref->unspill_reg) + debug_printf("UNSPILL(%x)/", ref->value->spill_slot); + + if (c->state >= PASS2_DONE) + brw_print_reg(ref->hw_reg); + else { + debug_printf("%s", hw_reg.negate ? "-" : ""); + debug_printf("%s", hw_reg.abs ? "abs/" : ""); + brw_wm_print_value(c, ref->value); + if ((hw_reg.nr&1) || hw_reg.subnr) { + debug_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); + } + } +} + +void brw_wm_print_insn( struct brw_wm_compile *c, + struct brw_wm_instruction *inst ) +{ + GLuint i, arg; + GLuint nr_args = brw_wm_nr_args(inst->opcode); + + debug_printf("["); + for (i = 0; i < 4; i++) { + if (inst->dst[i]) { + brw_wm_print_value(c, inst->dst[i]); + if (inst->dst[i]->spill_slot) + debug_printf("/SPILL(%x)",inst->dst[i]->spill_slot); + } + else + debug_printf("#"); + if (i < 3) + debug_printf(","); + } + debug_printf("]"); + print_writemask(inst->writemask); + + debug_printf(" = "); + print_opcode(inst->opcode); + + if (inst->saturate) + debug_printf("_SAT"); + + for (arg = 0; arg < nr_args; arg++) { + + debug_printf(" ["); + + for (i = 0; i < 4; i++) { + if (inst->src[arg][i]) { + brw_wm_print_ref(c, inst->src[arg][i]); + } + else + debug_printf("%%"); + + if (i < 3) + debug_printf(","); + else + debug_printf("]"); + } + } + debug_printf("\n"); +} + +void brw_wm_print_program( struct brw_wm_compile *c, + const char *stage ) +{ + GLuint insn; + + debug_printf("%s:\n", stage); + for (insn = 0; insn < c->nr_insns; insn++) + brw_wm_print_insn(c, &c->instruction[insn]); + debug_printf("\n"); +} + +static const char *file_strings[TGSI_FILE_COUNT+1] = { + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMPLER", + "ADDR", + "IMM", + "LOOP", + "PAYLOAD" +}; + +static void brw_wm_print_fp_insn( struct brw_wm_compile *c, + struct brw_fp_instruction *inst ) +{ + GLuint i; + GLuint nr_args = brw_wm_nr_args(inst->opcode); + + print_opcode(inst->opcode); + if (inst->dst.saturate) + debug_printf("_SAT"); + debug_printf(" "); + + if (inst->dst.indirect) + debug_printf("["); + + debug_printf("%s[%d]", + file_strings[inst->dst.file], + inst->dst.index ); + print_writemask(inst->dst.writemask); + + if (inst->dst.indirect) + debug_printf("]"); + + debug_printf(nr_args ? ", " : "\n"); + + for (i = 0; i < nr_args; i++) { + debug_printf("%s%s%s[%d]%s", + inst->src[i].negate ? "-" : "", + inst->src[i].abs ? "ABS(" : "", + file_strings[inst->src[i].file], + inst->src[i].index, + inst->src[i].abs ? ")" : ""); + print_swizzle(inst->src[i].swizzle); + debug_printf("%s", i == nr_args - 1 ? "\n" : ", "); + } +} + + +void brw_wm_print_fp_program( struct brw_wm_compile *c, + const char *stage ) +{ + GLuint insn; + + debug_printf("%s:\n", stage); + for (insn = 0; insn < c->nr_fp_insns; insn++) + brw_wm_print_fp_insn(c, &c->fp_instructions[insn]); + debug_printf("\n"); +} + diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c new file mode 100644 index 00000000000..3d162a67e7f --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -0,0 +1,1521 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_math.h" +#include "tgsi/tgsi_info.h" + +#include "brw_context.h" +#include "brw_wm.h" +#include "brw_debug.h" +#include "brw_disasm.h" + +/* Not quite sure how correct this is - need to understand horiz + * vs. vertical strides a little better. + */ +static INLINE struct brw_reg sechalf( struct brw_reg reg ) +{ + if (reg.vstride) + reg.nr++; + return reg; +} + +/* Payload R0: + * + * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads, + * corresponding to each of the 16 execution channels. + * R0.1..8 -- ? + * R1.0 -- triangle vertex 0.X + * R1.1 -- triangle vertex 0.Y + * R1.2 -- quad 0 x,y coords (2 packed uwords) + * R1.3 -- quad 1 x,y coords (2 packed uwords) + * R1.4 -- quad 2 x,y coords (2 packed uwords) + * R1.5 -- quad 3 x,y coords (2 packed uwords) + * R1.6 -- ? + * R1.7 -- ? + * R1.8 -- ? + */ + + +static void emit_pixel_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + if (mask & BRW_WRITEMASK_X) { + brw_ADD(p, + vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + } + + if (mask & BRW_WRITEMASK_Y) { + brw_ADD(p, + vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw,5), 2, 4, 0), + brw_imm_v(0x11001100)); + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); +} + + + +static void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + if (mask & BRW_WRITEMASK_X) { + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + } + + if (mask & BRW_WRITEMASK_Y) { + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + + } +} + +static void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + struct brw_compile *p = &c->func; + + if (mask & BRW_WRITEMASK_X) { + /* X' = X */ + brw_MOV(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W)); + } + + /* XXX: is this needed any more, or is this a NOOP? + */ + if (mask & BRW_WRITEMASK_Y) { +#if 0 + /* Y' = height - 1 - Y */ + brw_ADD(p, + dst[1], + negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.drawable_height - 1)); +#else + brw_MOV(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W)); +#endif + } +} + + +static void emit_pixel_w( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) +{ + /* Don't need this if all you are doing is interpolating color, for + * instance. + */ + if (mask & BRW_WRITEMASK_W) { + struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); + + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + brw_LINE(p, brw_null_reg(), interp3, deltas[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + + /* Calc w */ + brw_math_16( p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} + + + +static void emit_linterp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas ) +{ + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } + } +} + + +static void emit_pinterp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w) +{ + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } + } + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MUL(p, dst[i], dst[i], w[3]); + } + } +} + + +static void emit_cinterp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0 ) +{ + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ + } + } +} + +/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ +static void emit_frontfacing( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask ) +{ + struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); + GLuint i; + + if (!(mask & BRW_WRITEMASK_XYZW)) + return; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(0.0)); + } + } + + /* bit 31 is "primitive is back face", so checking < (1 << 31) gives + * us front face + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(1.0)); + } + } + brw_set_predicate_control_flag_value(p, 0xff); +} + +/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input + * looking like: + * + * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br + * + * and we're trying to produce: + * + * DDX DDY + * dst: (q0.tr - q0.tl) (q0.tl - q0.bl) + * (q0.tr - q0.tl) (q0.tr - q0.br) + * (q0.br - q0.bl) (q0.tl - q0.bl) + * (q0.br - q0.bl) (q0.tr - q0.br) + * (q1.tr - q1.tl) (q1.tl - q1.bl) + * (q1.tr - q1.tl) (q1.tr - q1.br) + * (q1.br - q1.bl) (q1.tl - q1.bl) + * (q1.br - q1.bl) (q1.tr - q1.br) + * + * and add two more quads if in 16-pixel dispatch mode. + * + * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result + * for each pair, and vertstride = 2 jumps us 2 elements after processing a + * pair. But for DDY, it's harder, as we want to produce the pairs swizzled + * between each other. We could probably do it like ddx and swizzle the right + * order later, but bail for now and just produce + * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4) + */ +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0) +{ + int i; + struct brw_reg src0, src1; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + for (i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + if (is_ddx) { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 1, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); + } else { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 2, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); + } + brw_ADD(p, dst[i], src0, negate(src1)); + } + } + if (mask & SATURATE) + brw_set_saturate(p, 0); +} + +static void emit_alu1( struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0 ) +{ + GLuint i; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + func(p, dst[i], arg0[i]); + } + } + + if (mask & SATURATE) + brw_set_saturate(p, 0); +} + + +static void emit_alu2( struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + func(p, dst[i], arg0[i], arg1[i]); + } + } + + if (mask & SATURATE) + brw_set_saturate(p, 0); +} + + +static void emit_mad( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MUL(p, dst[i], arg0[i], arg1[i]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_ADD(p, dst[i], dst[i], arg2[i]); + brw_set_saturate(p, 0); + } + } +} + +static void emit_trunc( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_RNDZ(p, dst[i], arg0[i]); + } + } +} + +static void emit_lrp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2 ) +{ + GLuint i; + + /* Uses dst as a temporary: + */ + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + /* Can I use the LINE instruction for this? + */ + brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst[i], arg2[i]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[i], arg0[i], arg1[i]); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLuint cond, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(0)); + brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); + brw_MOV(p, dst[i], brw_imm_f(1.0)); + brw_set_predicate_control_flag_value(p, 0xff); + } + } +} + +static void emit_slt( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1); +} + +static void emit_sle( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1); +} + +static void emit_sgt( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1); +} + +static void emit_sge( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1); +} + +static void emit_seq( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1); +} + +static void emit_sne( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); +} + +static void emit_cmp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg2[i]); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0)); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg1[i]); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } +} + +static void emit_max( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg0[i]); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg1[i]); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } +} + +static void emit_min( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg1[i]); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg0[i]); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } +} + + +static void emit_dp3( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; + + if (!(mask & BRW_WRITEMASK_XYZW)) + return; /* Do not emit dead code */ + + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + + brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); + brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); + brw_set_saturate(p, 0); +} + + +static void emit_dp4( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; + + if (!(mask & BRW_WRITEMASK_XYZW)) + return; /* Do not emit dead code */ + + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + + brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); + brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); + brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]); + brw_set_saturate(p, 0); +} + + +static void emit_dph( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; + + if (!(mask & BRW_WRITEMASK_XYZW)) + return; /* Do not emit dead code */ + + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + + brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); + brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); + brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]); + brw_set_saturate(p, 0); +} + + +static void emit_xpd( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_W); + + for (i = 0 ; i < 3; i++) { + if (mask & (1<<i)) { + GLuint i2 = (i+2)%3; + GLuint i1 = (i+1)%3; + + brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[i], arg0[i1], arg1[i2]); + brw_set_saturate(p, 0); + } + } +} + + +static void emit_math1( struct brw_compile *p, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0 ) +{ + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; + + if (!(mask & BRW_WRITEMASK_XYZW)) + return; /* Do not emit dead code */ + + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + + brw_MOV(p, brw_message_reg(2), arg0[0]); + + /* Send two messages to perform all 16 operations: + */ + brw_math_16(p, + dst[dst_chan], + function, + (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_PRECISION_FULL); +} + + +static void emit_math2( struct brw_compile *p, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) +{ + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; + + if (!(mask & BRW_WRITEMASK_XYZW)) + return; /* Do not emit dead code */ + + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, brw_message_reg(2), arg0[0]); + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, brw_message_reg(3), arg1[0]); + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + + + /* Send two messages to perform all 16 operations: + */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, + dst[dst_chan], + function, + (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), + function, + (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 4, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + brw_pop_insn_state(p); +} + + + +static void emit_tex( struct brw_wm_compile *c, + const struct brw_wm_instruction *inst, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *coord, + GLuint sampler) +{ + struct brw_compile *p = &c->func; + GLuint msgLength, responseLength; + GLuint i, nr; + GLuint emit; + GLuint msg_type; + GLboolean shadow = FALSE; + + /* How many input regs are there? + */ + switch (inst->target) { + case TGSI_TEXTURE_1D: + emit = BRW_WRITEMASK_X; + nr = 1; + break; + case TGSI_TEXTURE_SHADOW1D: + emit = BRW_WRITEMASK_XW; + nr = 4; + shadow = TRUE; + break; + case TGSI_TEXTURE_2D: + emit = BRW_WRITEMASK_XY; + nr = 2; + break; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + emit = BRW_WRITEMASK_XYW; + nr = 4; + shadow = TRUE; + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + emit = BRW_WRITEMASK_XYZ; + nr = 3; + break; + default: + /* unexpected target */ + abort(); + } + + msgLength = 1; + + for (i = 0; i < nr; i++) { + static const GLuint swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msgLength+1), coord[swz[i]]); + else + brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0)); + msgLength += 2; + } + + responseLength = 8; /* always */ + + if (BRW_IS_IGDNG(p->brw)) { + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + } else { + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + } + + brw_SAMPLE(p, + retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), + BTI_TEXTURE(inst->tex_unit), + sampler, /* sampler index */ + inst->writemask, + msg_type, + responseLength, + msgLength, + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); +} + + +static void emit_txb( struct brw_wm_compile *c, + const struct brw_wm_instruction *inst, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *coord, + GLuint sampler ) +{ + struct brw_compile *p = &c->func; + GLuint msgLength; + GLuint msg_type; + /* Shadow ignored for txb. + */ + switch (inst->target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + brw_MOV(p, brw_message_reg(2), coord[0]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + brw_MOV(p, brw_message_reg(2), coord[0]); + brw_MOV(p, brw_message_reg(4), coord[1]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + brw_MOV(p, brw_message_reg(2), coord[0]); + brw_MOV(p, brw_message_reg(4), coord[1]); + brw_MOV(p, brw_message_reg(6), coord[2]); + break; + default: + /* unexpected target */ + abort(); + } + + brw_MOV(p, brw_message_reg(8), coord[3]); + msgLength = 9; + + if (BRW_IS_IGDNG(p->brw)) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + + brw_SAMPLE(p, + retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), + BTI_TEXTURE(inst->tex_unit), + sampler, /* sampler index */ + inst->writemask, + msg_type, + 8, /* responseLength */ + msgLength, + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); +} + + +static void emit_lit( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0 ) +{ + assert((mask & BRW_WRITEMASK_XW) == 0); + + if (mask & BRW_WRITEMASK_Y) { + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[1], arg0[0]); + brw_set_saturate(p, 0); + } + + if (mask & BRW_WRITEMASK_Z) { + emit_math2(p, BRW_MATH_FUNCTION_POW, + &dst[2], + BRW_WRITEMASK_X | (mask & SATURATE), + &arg0[1], + &arg0[3]); + } + + /* Ordinarily you'd use an iff statement to skip or shortcircuit + * some of the POW calculations above, but 16-wide iff statements + * seem to lock c1 hardware, so this is a nasty workaround: + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0)); + { + if (mask & BRW_WRITEMASK_Y) + brw_MOV(p, dst[1], brw_imm_f(0)); + + if (mask & BRW_WRITEMASK_Z) + brw_MOV(p, dst[2], brw_imm_f(0)); + } + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +/* Kill pixel - set execution mask to zero for those pixels which + * fail. + */ +static void emit_kil( struct brw_wm_compile *c, + struct brw_reg *arg0) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + GLuint i; + + /* XXX - usually won't need 4 compares! + */ + for (i = 0; i < 4; i++) { + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_AND(p, r0uw, brw_flag_reg(), r0uw); + brw_pop_insn_state(p); + } +} + +/* KILLP kills the pixels that are currently executing, not based on a test + * of the arguments. + */ +static void emit_killp( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ + brw_AND(p, r0uw, c->emit_mask_reg, r0uw); + brw_pop_insn_state(p); +} + +static void fire_fb_write( struct brw_wm_compile *c, + GLuint base_reg, + GLuint nr, + GLuint target, + GLuint eot ) +{ + struct brw_compile *p = &c->func; + + /* Pass through control information: + */ +/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + + /* Send framebuffer write message: */ +/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ + brw_fb_WRITE(p, + retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + target, + nr, + 0, + eot); +} + + +static void emit_aa( struct brw_wm_compile *c, + struct brw_reg *arg1, + GLuint reg ) +{ + struct brw_compile *p = &c->func; + GLuint comp = c->key.aa_dest_stencil_reg / 2; + GLuint off = c->key.aa_dest_stencil_reg % 2; + struct brw_reg aa = offset(arg1[comp], off); + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */ + brw_MOV(p, brw_message_reg(reg), aa); + brw_pop_insn_state(p); +} + + +/* Post-fragment-program processing. Send the results to the + * framebuffer. + * \param arg0 the fragment color + * \param arg1 the pass-through depth value + * \param arg2 the shader-computed depth value + */ +static void emit_fb_write( struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot) +{ + struct brw_compile *p = &c->func; + GLuint nr = 2; + GLuint channel; + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + /* I don't really understand how this achieves the color interleave + * (ie RGBARGBA) in the result: [Do the saturation here] + */ + { + brw_push_insn_state(p); + + for (channel = 0; channel < 4; channel++) { + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(nr + channel), + arg0[channel]); + + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(nr + channel + 4), + sechalf(arg0[channel])); + } + + /* skip over the regs populated above: + */ + nr += 8; + + brw_pop_insn_state(p); + } + + if (c->key.source_depth_to_render_target) + { + if (c->key.computes_depth) + brw_MOV(p, brw_message_reg(nr), arg2[2]); + else + brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ + + nr += 2; + } + + if (c->key.dest_depth_reg) + { + GLuint comp = c->key.dest_depth_reg / 2; + GLuint off = c->key.dest_depth_reg % 2; + + if (off != 0) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_pop_insn_state(p); + } + else { + brw_MOV(p, brw_message_reg(nr), arg1[comp]); + } + nr += 2; + } + + if (!c->key.runtime_check_aads_emit) { + if (c->key.aa_dest_stencil_reg) + emit_aa(c, arg1, 2); + + fire_fb_write(c, 0, nr, target, eot); + } + else { + struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + struct brw_reg ip = brw_ip_reg(); + struct brw_instruction *jmp; + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, + v1_null_ud, + get_element_ud(brw_vec8_grf(1,0), 6), + brw_imm_ud(1<<26)); + + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + emit_aa(c, arg1, 2); + fire_fb_write(c, 0, nr, target, eot); + /* note - thread killed in subroutine */ + } + brw_land_fwd_jump(p, jmp); + + /* ELSE: Shuffle up one register to fill in the hole left for AA: + */ + fire_fb_write(c, 1, nr-1, target, eot); + } +} + + +/** + * Move a GPR to scratch memory. + */ +static void emit_spill( struct brw_wm_compile *c, + struct brw_reg reg, + GLuint slot ) +{ + struct brw_compile *p = &c->func; + + /* + mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr } + */ + brw_MOV(p, brw_message_reg(2), reg); + + /* + mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } + send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } + */ + brw_dp_WRITE_16(p, + retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), + slot); +} + + +/** + * Load a GPR from scratch memory. + */ +static void emit_unspill( struct brw_wm_compile *c, + struct brw_reg reg, + GLuint slot ) +{ + struct brw_compile *p = &c->func; + + /* Slot 0 is the undef value. + */ + if (slot == 0) { + brw_MOV(p, reg, brw_imm_f(0)); + return; + } + + /* + mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask } + send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } + */ + + brw_dp_READ_16(p, + retype(vec16(reg), BRW_REGISTER_TYPE_UW), + slot); +} + + +/** + * Retrieve up to 4 GEN4 register pairs for the given wm reg: + * Args with unspill_reg != 0 will be loaded from scratch memory. + */ +static void get_argument_regs( struct brw_wm_compile *c, + struct brw_wm_ref *arg[], + struct brw_reg *regs ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (arg[i]) { + if (arg[i]->unspill_reg) + emit_unspill(c, + brw_vec8_grf(arg[i]->unspill_reg, 0), + arg[i]->value->spill_slot); + + regs[i] = arg[i]->hw_reg; + } + else { + regs[i] = brw_null_reg(); + } + } +} + + +/** + * For values that have a spill_slot!=0, write those regs to scratch memory. + */ +static void spill_values( struct brw_wm_compile *c, + struct brw_wm_value *values, + GLuint nr ) +{ + GLuint i; + + for (i = 0; i < nr; i++) + if (values[i].spill_slot) + emit_spill(c, values[i].hw_reg, values[i].spill_slot); +} + + +/* Emit the fragment program instructions here. + */ +void brw_wm_emit( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint insn; + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + /* Check if any of the payload regs need to be spilled: + */ + spill_values(c, c->payload.depth, 4); + spill_values(c, c->creg, c->nr_creg); + spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS); + + + for (insn = 0; insn < c->nr_insns; insn++) { + + struct brw_wm_instruction *inst = &c->instruction[insn]; + struct brw_reg args[3][4], dst[4]; + GLuint i, dst_flags; + + /* Get argument regs: + */ + for (i = 0; i < 3; i++) + get_argument_regs(c, inst->src[i], args[i]); + + /* Get dest regs: + */ + for (i = 0; i < 4; i++) + if (inst->dst[i]) + dst[i] = inst->dst[i]->hw_reg; + else + dst[i] = brw_null_reg(); + + /* Flags + */ + dst_flags = inst->writemask; + if (inst->saturate) + dst_flags |= SATURATE; + + switch (inst->opcode) { + /* Generated instructions for calculating triangle interpolants: + */ + case WM_PIXELXY: + emit_pixel_xy(p, dst, dst_flags); + break; + + case WM_DELTAXY: + emit_delta_xy(p, dst, dst_flags, args[0]); + break; + + case WM_WPOSXY: + emit_wpos_xy(c, dst, dst_flags, args[0]); + break; + + case WM_PIXELW: + emit_pixel_w(p, dst, dst_flags, args[0], args[1]); + break; + + case WM_LINTERP: + emit_linterp(p, dst, dst_flags, args[0], args[1]); + break; + + case WM_PINTERP: + emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case WM_CINTERP: + emit_cinterp(p, dst, dst_flags, args[0]); + break; + + case WM_FB_WRITE: + emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); + break; + + case WM_FRONTFACING: + emit_frontfacing(p, dst, dst_flags); + break; + + /* Straightforward arithmetic: + */ + case TGSI_OPCODE_ADD: + emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); + break; + + case TGSI_OPCODE_FRC: + emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_FLR: + emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_DDX: + emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]); + break; + + case TGSI_OPCODE_DDY: + emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); + break; + + case TGSI_OPCODE_DP3: + emit_dp3(p, dst, dst_flags, args[0], args[1]); + break; + + case TGSI_OPCODE_DP4: + emit_dp4(p, dst, dst_flags, args[0], args[1]); + break; + + case TGSI_OPCODE_DPH: + emit_dph(p, dst, dst_flags, args[0], args[1]); + break; + + case TGSI_OPCODE_TRUNC: + emit_trunc(p, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_LRP: + emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case TGSI_OPCODE_MAD: + emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case TGSI_OPCODE_MOV: + emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_MUL: + emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); + break; + + case TGSI_OPCODE_XPD: + emit_xpd(p, dst, dst_flags, args[0], args[1]); + break; + + /* Higher math functions: + */ + case TGSI_OPCODE_RCP: + emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_RSQ: + emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_SIN: + emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_COS: + emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_EX2: + emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_LG2: + emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); + break; + + case TGSI_OPCODE_SCS: + /* There is an scs math function, but it would need some + * fixup for 16-element execution. + */ + if (dst_flags & BRW_WRITEMASK_X) + emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]); + if (dst_flags & BRW_WRITEMASK_Y) + emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]); + break; + + case TGSI_OPCODE_POW: + emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); + break; + + /* Comparisons: + */ + case TGSI_OPCODE_CMP: + emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case TGSI_OPCODE_MAX: + emit_max(p, dst, dst_flags, args[0], args[1]); + break; + + case TGSI_OPCODE_MIN: + emit_min(p, dst, dst_flags, args[0], args[1]); + break; + + case TGSI_OPCODE_SLT: + emit_slt(p, dst, dst_flags, args[0], args[1]); + break; + + case TGSI_OPCODE_SLE: + emit_sle(p, dst, dst_flags, args[0], args[1]); + break; + case TGSI_OPCODE_SGT: + emit_sgt(p, dst, dst_flags, args[0], args[1]); + break; + case TGSI_OPCODE_SGE: + emit_sge(p, dst, dst_flags, args[0], args[1]); + break; + case TGSI_OPCODE_SEQ: + emit_seq(p, dst, dst_flags, args[0], args[1]); + break; + case TGSI_OPCODE_SNE: + emit_sne(p, dst, dst_flags, args[0], args[1]); + break; + + case TGSI_OPCODE_LIT: + emit_lit(p, dst, dst_flags, args[0]); + break; + + /* Texturing operations: + */ + case TGSI_OPCODE_TEX: + emit_tex(c, inst, dst, dst_flags, args[0], inst->sampler); + break; + + case TGSI_OPCODE_TXB: + emit_txb(c, inst, dst, dst_flags, args[0], inst->sampler); + break; + + case TGSI_OPCODE_KIL: + emit_kil(c, args[0]); + break; + + case TGSI_OPCODE_KILP: + emit_killp(c); + break; + + default: + debug_printf("Unsupported opcode %i (%s) in fragment shader\n", + inst->opcode, + tgsi_get_opcode_info(inst->opcode)->mnemonic); + } + + for (i = 0; i < 4; i++) + if (inst->dst[i] && inst->dst[i]->spill_slot) + emit_spill(c, + inst->dst[i]->hw_reg, + inst->dst[i]->spill_slot); + } + + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("wm-native:\n"); + brw_disasm(stderr, p->store, p->nr_insn); + } +} diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c new file mode 100644 index 00000000000..9c5b527f897 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -0,0 +1,1224 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "pipe/p_shader_tokens.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_util.h" + +#include "brw_wm.h" +#include "brw_util.h" +#include "brw_debug.h" + + +/*********************************************************************** + * Source regs + */ + +static struct brw_fp_src src_reg(GLuint file, GLuint idx) +{ + struct brw_fp_src reg; + reg.file = file; + reg.index = idx; + reg.swizzle = BRW_SWIZZLE_XYZW; + reg.indirect = 0; + reg.negate = 0; + reg.abs = 0; + return reg; +} + +static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst) +{ + return src_reg(dst.file, dst.index); +} + +static struct brw_fp_src src_undef( void ) +{ + return src_reg(TGSI_FILE_NULL, 0); +} + +static GLboolean src_is_undef(struct brw_fp_src src) +{ + return src.file == TGSI_FILE_NULL; +} + +static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w ) +{ + unsigned swz = reg.swizzle; + + reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 | + BRW_GET_SWZ(swz, y) << 2 | + BRW_GET_SWZ(swz, z) << 4 | + BRW_GET_SWZ(swz, w) << 6 ); + + return reg; +} + +static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x ) +{ + return src_swizzle(reg, x, x, x, x); +} + +static struct brw_fp_src src_abs( struct brw_fp_src src ) +{ + src.negate = 0; + src.abs = 1; + return src; +} + +static struct brw_fp_src src_negate( struct brw_fp_src src ) +{ + src.negate = 1; + src.abs = 0; + return src; +} + + +static int match_or_expand_immediate( const float *v, + unsigned nr, + float *v2, + unsigned *nr2, + unsigned *swizzle ) +{ + unsigned i, j; + + *swizzle = 0; + + for (i = 0; i < nr; i++) { + boolean found = FALSE; + + for (j = 0; j < *nr2 && !found; j++) { + if (v[i] == v2[j]) { + *swizzle |= j << (i * 2); + found = TRUE; + } + } + + if (!found) { + if (*nr2 >= 4) + return FALSE; + + v2[*nr2] = v[i]; + *swizzle |= *nr2 << (i * 2); + (*nr2)++; + } + } + + return TRUE; +} + + + +/* Internally generated immediates: overkill... + */ +static struct brw_fp_src src_imm( struct brw_wm_compile *c, + const GLfloat *v, + unsigned nr) +{ + unsigned i, j; + unsigned swizzle; + + /* Could do a first pass where we examine all existing immediates + * without expanding. + */ + + for (i = 0; i < c->nr_immediates; i++) { + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + if (c->nr_immediates < Elements(c->immediate)) { + i = c->nr_immediates++; + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + c->error = 1; + return src_undef(); + +out: + /* Make sure that all referenced elements are from this immediate. + * Has the effect of making size-one immediates into scalars. + */ + for (j = nr; j < 4; j++) + swizzle |= (swizzle & 0x3) << (j * 2); + + return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ), + BRW_GET_SWZ(swizzle, X), + BRW_GET_SWZ(swizzle, Y), + BRW_GET_SWZ(swizzle, Z), + BRW_GET_SWZ(swizzle, W) ); +} + + + +static struct brw_fp_src src_imm1f( struct brw_wm_compile *c, + GLfloat f ) +{ + return src_imm(c, &f, 1); +} + +static struct brw_fp_src src_imm4f( struct brw_wm_compile *c, + GLfloat x, + GLfloat y, + GLfloat z, + GLfloat w) +{ + GLfloat f[4] = {x,y,z,w}; + return src_imm(c, f, 4); +} + + + +/*********************************************************************** + * Dest regs + */ + +static struct brw_fp_dst dst_reg(GLuint file, GLuint idx) +{ + struct brw_fp_dst reg; + reg.file = file; + reg.index = idx; + reg.writemask = BRW_WRITEMASK_XYZW; + reg.indirect = 0; + reg.saturate = 0; + return reg; +} + +static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask ) +{ + reg.writemask &= mask; + return reg; +} + +static struct brw_fp_dst dst_undef( void ) +{ + return dst_reg(TGSI_FILE_NULL, 0); +} + +static boolean dst_is_undef( struct brw_fp_dst dst ) +{ + return dst.file == TGSI_FILE_NULL; +} + +static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag ) +{ + reg.saturate = flag; + return reg; +} + +static struct brw_fp_dst get_temp( struct brw_wm_compile *c ) +{ + int bit = ffs( ~c->fp_temp ); + + if (!bit) { + debug_printf("%s: out of temporaries\n", __FILE__); + } + + c->fp_temp |= 1<<(bit-1); + return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1)); +} + + +static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp ) +{ + c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp)); +} + + +/*********************************************************************** + * Instructions + */ + +static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c) +{ + return &c->fp_instructions[c->nr_fp_insns++]; +} + +static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + GLuint tex_unit, + GLuint target, + GLuint sampler, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + struct brw_fp_instruction *inst = get_fp_inst(c); + + if (tex_unit || target) + assert(op == TGSI_OPCODE_TXP || + op == TGSI_OPCODE_TXB || + op == TGSI_OPCODE_TEX || + op == WM_FB_WRITE); + + inst->opcode = op; + inst->dst = dest; + inst->tex_unit = tex_unit; + inst->target = target; + inst->sampler = sampler; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + + return inst; +} + + +static INLINE void emit_op3(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2); +} + + +static INLINE void emit_op2(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1) +{ + emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef()); +} + +static INLINE void emit_op1(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0) +{ + emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef()); +} + +static INLINE void emit_op0(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest) +{ + emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef()); +} + + + +/* Many opcodes produce the same value across all the result channels. + * We'd rather not have to support that splatting in the opcode implementations, + * and brw_wm_pass*.c wants to optimize them out by shuffling references around + * anyway. We can easily get both by emitting the opcode to one channel, and + * then MOVing it to the others, which brw_wm_pass*.c already understands. + */ +static void emit_scalar_insn(struct brw_wm_compile *c, + unsigned opcode, + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + unsigned first_chan = ffs(dst.writemask) - 1; + unsigned first_mask = 1 << first_chan; + + if (dst.writemask == 0) + return; + + emit_op3( c, opcode, + dst_mask(dst, first_mask), + src0, src1, src2 ); + + if (dst.writemask != first_mask) { + emit_op1(c, TGSI_OPCODE_MOV, + dst_mask(dst, ~first_mask), + src_scalar(src_reg_from_dst(dst), first_chan)); + } +} + + +/*********************************************************************** + * Special instructions for interpolation and other tasks + */ + +static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->fp_pixel_xy)) { + struct brw_fp_dst pixel_xy = get_temp(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); + + + /* Emit the out calculations, and hold onto the results. Use + * two instructions as a temporary is required. + */ + /* pixel_xy.xy = PIXELXY payload[0]; + */ + emit_op1(c, + WM_PIXELXY, + dst_mask(pixel_xy, BRW_WRITEMASK_XY), + payload_r0_depth); + + c->fp_pixel_xy = src_reg_from_dst(pixel_xy); + } + + return c->fp_pixel_xy; +} + +static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->fp_delta_xy)) { + struct brw_fp_dst delta_xy = get_temp(c); + struct brw_fp_src pixel_xy = get_pixel_xy(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); + + /* deltas.xy = DELTAXY pixel_xy, payload[0] + */ + emit_op3(c, + WM_DELTAXY, + dst_mask(delta_xy, BRW_WRITEMASK_XY), + pixel_xy, + payload_r0_depth, + src_undef()); + + c->fp_delta_xy = src_reg_from_dst(delta_xy); + } + + return c->fp_delta_xy; +} + +static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->fp_pixel_w)) { + struct brw_fp_dst pixel_w = get_temp(c); + struct brw_fp_src deltas = get_delta_xy(c); + + /* XXX: assuming position is always first -- valid? + */ + struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0); + + /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x + */ + emit_op3(c, + WM_PIXELW, + dst_mask(pixel_w, BRW_WRITEMASK_W), + interp_wpos, + deltas, + src_undef()); + + + c->fp_pixel_w = src_reg_from_dst(pixel_w); + } + + return c->fp_pixel_w; +} + + +/*********************************************************************** + * Emit INTERP instructions ahead of first use of each attrib. + */ + +static void emit_interp( struct brw_wm_compile *c, + GLuint idx, + GLuint semantic, + GLuint interp_mode ) +{ + struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx); + struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx); + struct brw_fp_src deltas = get_delta_xy(c); + + /* Need to use PINTERP on attributes which have been + * multiplied by 1/W in the SF program, and LINTERP on those + * which have not: + */ + switch (semantic) { + case TGSI_SEMANTIC_POSITION: + /* Have to treat wpos.xy specially: + */ + emit_op1(c, + WM_WPOSXY, + dst_mask(dst, BRW_WRITEMASK_XY), + get_pixel_xy(c)); + + /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op2(c, + WM_LINTERP, + dst_mask(dst, BRW_WRITEMASK_ZW), + interp, + deltas); + break; + + case TGSI_SEMANTIC_COLOR: + if (c->key.flat_shade) { + emit_op1(c, + WM_CINTERP, + dst, + interp); + } + else if (interp_mode == TGSI_INTERPOLATE_LINEAR) { + emit_op2(c, + WM_LINTERP, + dst, + interp, + deltas); + } + else { + emit_op3(c, + WM_PINTERP, + dst, + interp, + deltas, + get_pixel_w(c)); + } + + break; + + case TGSI_SEMANTIC_FOG: + /* Interpolate the fog coordinate */ + emit_op3(c, + WM_PINTERP, + dst_mask(dst, BRW_WRITEMASK_X), + interp, + deltas, + get_pixel_w(c)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_YZ), + src_imm1f(c, 0.0)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src_imm1f(c, 1.0)); + break; + + case TGSI_SEMANTIC_FACE: + /* XXX review/test this case */ + emit_op0(c, + WM_FRONTFACING, + dst_mask(dst, BRW_WRITEMASK_X)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_YZ), + src_imm1f(c, 0.0)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src_imm1f(c, 1.0)); + break; + + case TGSI_SEMANTIC_PSIZE: + /* XXX review/test this case */ + emit_op3(c, + WM_PINTERP, + dst_mask(dst, BRW_WRITEMASK_XY), + interp, + deltas, + get_pixel_w(c)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_Z), + src_imm1f(c, 0.0f)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src_imm1f(c, 1.0f)); + break; + + default: + switch (interp_mode) { + case TGSI_INTERPOLATE_CONSTANT: + emit_op1(c, + WM_CINTERP, + dst, + interp); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_op2(c, + WM_LINTERP, + dst, + interp, + deltas); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_op3(c, + WM_PINTERP, + dst, + interp, + deltas, + get_pixel_w(c)); + break; + } + break; + } +} + + +/*********************************************************************** + * Expand various instructions here to simpler forms. + */ +static void precalc_dst( struct brw_wm_compile *c, + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1 ) +{ + if (dst.writemask & BRW_WRITEMASK_Y) { + /* dst.y = mul src0.y, src1.y + */ + emit_op2(c, + TGSI_OPCODE_MUL, + dst_mask(dst, BRW_WRITEMASK_Y), + src0, + src1); + } + + if (dst.writemask & BRW_WRITEMASK_XZ) { + /* dst.z = mov src0.zzzz + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_Z), + src_scalar(src0, Z)); + + /* dst.x = imm1f(1.0) + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), + src_imm1f(c, 1.0)); + } + if (dst.writemask & BRW_WRITEMASK_W) { + /* dst.w = mov src1.w + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src1); + } +} + + +static void precalc_lit( struct brw_wm_compile *c, + struct brw_fp_dst dst, + struct brw_fp_src src0 ) +{ + if (dst.writemask & BRW_WRITEMASK_XW) { + /* dst.xw = imm(1.0f) + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0), + src_imm1f(c, 1.0f)); + } + + if (dst.writemask & BRW_WRITEMASK_YZ) { + emit_op1(c, + TGSI_OPCODE_LIT, + dst_mask(dst, BRW_WRITEMASK_YZ), + src0); + } +} + + +/** + * Some TEX instructions require extra code, cube map coordinate + * normalization, or coordinate scaling for RECT textures, etc. + * This function emits those extra instructions and the TEX + * instruction itself. + */ +static void precalc_tex( struct brw_wm_compile *c, + struct brw_fp_dst dst, + unsigned target, + unsigned unit, + struct brw_fp_src src0, + struct brw_fp_src sampler ) +{ + struct brw_fp_src coord = src_undef(); + struct brw_fp_dst tmp = dst_undef(); + + assert(unit < BRW_MAX_TEX_UNIT); + + /* Cubemap: find longest component of coord vector and normalize + * it. + */ + if (target == TGSI_TEXTURE_CUBE) { + struct brw_fp_src tmpsrc; + + tmp = get_temp(c); + tmpsrc = src_reg_from_dst(tmp); + + /* tmp = abs(src0) */ + emit_op1(c, + TGSI_OPCODE_MOV, + tmp, + src_abs(src0)); + + /* tmp.X = MAX(tmp.X, tmp.Y) */ + emit_op2(c, TGSI_OPCODE_MAX, + dst_mask(tmp, BRW_WRITEMASK_X), + src_scalar(tmpsrc, X), + src_scalar(tmpsrc, Y)); + + /* tmp.X = MAX(tmp.X, tmp.Z) */ + emit_op2(c, TGSI_OPCODE_MAX, + dst_mask(tmp, BRW_WRITEMASK_X), + tmpsrc, + src_scalar(tmpsrc, Z)); + + /* tmp.X = 1 / tmp.X */ + emit_op1(c, TGSI_OPCODE_RCP, + dst_mask(tmp, BRW_WRITEMASK_X), + tmpsrc); + + /* tmp = src0 * tmp.xxxx */ + emit_op2(c, TGSI_OPCODE_MUL, + tmp, + src0, + src_scalar(tmpsrc, X)); + + coord = tmpsrc; + } + else if (target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT) { + /* XXX: need a mechanism for internally generated constants. + */ + coord = src0; + } + else { + coord = src0; + } + + /* Need to emit YUV texture conversions by hand. Probably need to + * do this here - the alternative is in brw_wm_emit.c, but the + * conversion requires allocating a temporary variable which we + * don't have the facility to do that late in the compilation. + */ + if (c->key.yuvtex_mask & (1 << unit)) { + /* convert ycbcr to RGBA */ + GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); + struct brw_fp_dst tmp = get_temp(c); + struct brw_fp_src tmpsrc = src_reg_from_dst(tmp); + struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 ); + struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 ); + + /* tmp = TEX ... + */ + emit_tex_op(c, + TGSI_OPCODE_TEX, + dst_saturate(tmp, dst.saturate), + unit, + target, + sampler.index, + coord, + src_undef(), + src_undef()); + + /* tmp.xyz = ADD TMP, C0 + */ + emit_op2(c, TGSI_OPCODE_ADD, + dst_mask(tmp, BRW_WRITEMASK_XYZ), + tmpsrc, + C0); + + /* YUV.y = MUL YUV.y, C0.w + */ + emit_op2(c, TGSI_OPCODE_MUL, + dst_mask(tmp, BRW_WRITEMASK_Y), + tmpsrc, + src_scalar(C0, W)); + + /* + * if (UV swaped) + * RGB.xyz = MAD YUV.zzx, C1, YUV.y + * else + * RGB.xyz = MAD YUV.xxz, C1, YUV.y + */ + + emit_op3(c, TGSI_OPCODE_MAD, + dst_mask(dst, BRW_WRITEMASK_XYZ), + ( swap_uv ? + src_swizzle(tmpsrc, Z,Z,X,X) : + src_swizzle(tmpsrc, X,X,Z,Z)), + C1, + src_scalar(tmpsrc, Y)); + + /* RGB.y = MAD YUV.z, C1.w, RGB.y + */ + emit_op3(c, + TGSI_OPCODE_MAD, + dst_mask(dst, BRW_WRITEMASK_Y), + src_scalar(tmpsrc, Z), + src_scalar(C1, W), + src_scalar(src_reg_from_dst(dst), Y)); + + release_temp(c, tmp); + } + else { + /* ordinary RGBA tex instruction */ + emit_tex_op(c, + TGSI_OPCODE_TEX, + dst, + unit, + target, + sampler.index, + coord, + src_undef(), + src_undef()); + } + + /* XXX: add GL_EXT_texture_swizzle support to gallium -- by + * generating shader varients in mesa state tracker. + */ + + /* Release this temp if we ended up allocating it: + */ + if (!dst_is_undef(tmp)) + release_temp(c, tmp); +} + + +/** + * Check if the given TXP instruction really needs the divide-by-W step. + */ +static GLboolean projtex( struct brw_wm_compile *c, + unsigned target, + struct brw_fp_src src ) +{ + /* Only try to detect the simplest cases. Could detect (later) + * cases where we are trying to emit code like RCP {1.0}, MUL x, + * {1.0}, and so on. + * + * More complex cases than this typically only arise from + * user-provided fragment programs anyway: + */ + if (target == TGSI_TEXTURE_CUBE) + return GL_FALSE; /* ut2004 gun rendering !?! */ + + if (src.file == TGSI_FILE_INPUT && + BRW_GET_SWZ(src.swizzle, W) == W && + c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE) + return GL_FALSE; + + return GL_TRUE; +} + + +/** + * Emit code for TXP. + */ +static void precalc_txp( struct brw_wm_compile *c, + struct brw_fp_dst dst, + unsigned target, + unsigned unit, + struct brw_fp_src src0, + struct brw_fp_src sampler ) +{ + if (projtex(c, target, src0)) { + struct brw_fp_dst tmp = get_temp(c); + + /* tmp0.w = RCP inst.arg[0][3] + */ + emit_op1(c, + TGSI_OPCODE_RCP, + dst_mask(tmp, BRW_WRITEMASK_W), + src_scalar(src0, W)); + + /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww + */ + emit_op2(c, + TGSI_OPCODE_MUL, + dst_mask(tmp, BRW_WRITEMASK_XYZ), + src0, + src_scalar(src_reg_from_dst(tmp), W)); + + /* dst = TEX tmp0 + */ + precalc_tex(c, + dst, + target, + unit, + src_reg_from_dst(tmp), + sampler ); + + release_temp(c, tmp); + } + else + { + /* dst = TEX src0 + */ + precalc_tex(c, dst, target, unit, src0, sampler); + } +} + + +/* XXX: note this returns a src_reg. + */ +static struct brw_fp_src +find_output_by_semantic( struct brw_wm_compile *c, + unsigned semantic, + unsigned index ) +{ + const struct tgsi_shader_info *info = &c->fp->info; + unsigned i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return src_reg( TGSI_FILE_OUTPUT, i ); + + /* If not found, return some arbitrary immediate value: + * + * XXX: this is a good idea but immediates are up generating extra + * curbe entries atm, as they would have in the original driver. + */ + return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */ +} + + +static void emit_fb_write( struct brw_wm_compile *c ) +{ + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); + struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0); + GLuint i; + + + outdepth = src_scalar(outdepth, Z); + + for (i = 0 ; i < c->key.nr_cbufs; i++) { + struct brw_fp_src outcolor; + + outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); + + /* Use emit_tex_op so that we can specify the inst->target + * field, which is abused to contain the FB write target and the + * EOT marker + */ + emit_tex_op(c, WM_FB_WRITE, + dst_undef(), + (i == c->key.nr_cbufs - 1), /* EOT */ + i, + 0, /* no sampler */ + outcolor, + payload_r0_depth, + outdepth); + } +} + + +static struct brw_fp_dst translate_dst( struct brw_wm_compile *c, + const struct tgsi_full_dst_register *dst, + unsigned saturate ) +{ + struct brw_fp_dst out; + + out.file = dst->Register.File; + out.index = dst->Register.Index; + out.writemask = dst->Register.WriteMask; + out.indirect = dst->Register.Indirect; + out.saturate = (saturate == TGSI_SAT_ZERO_ONE); + + if (out.indirect) { + assert(dst->Indirect.File == TGSI_FILE_ADDRESS); + assert(dst->Indirect.Index == 0); + } + + return out; +} + + +static struct brw_fp_src translate_src( struct brw_wm_compile *c, + const struct tgsi_full_src_register *src ) +{ + struct brw_fp_src out; + + out.file = src->Register.File; + out.index = src->Register.Index; + out.indirect = src->Register.Indirect; + + out.swizzle = ((src->Register.SwizzleX << 0) | + (src->Register.SwizzleY << 2) | + (src->Register.SwizzleZ << 4) | + (src->Register.SwizzleW << 6)); + + switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) { + case TGSI_UTIL_SIGN_CLEAR: + out.abs = 1; + out.negate = 0; + break; + + case TGSI_UTIL_SIGN_SET: + out.abs = 1; + out.negate = 1; + break; + + case TGSI_UTIL_SIGN_TOGGLE: + out.abs = 0; + out.negate = 1; + break; + + case TGSI_UTIL_SIGN_KEEP: + default: + out.abs = 0; + out.negate = 0; + break; + } + + if (out.indirect) { + assert(src->Indirect.File == TGSI_FILE_ADDRESS); + assert(src->Indirect.Index == 0); + } + + return out; +} + + + +static void emit_insn( struct brw_wm_compile *c, + const struct tgsi_full_instruction *inst ) +{ + unsigned opcode = inst->Instruction.Opcode; + struct brw_fp_dst dst; + struct brw_fp_src src[3]; + int i; + + dst = translate_dst( c, &inst->Dst[0], + inst->Instruction.Saturate ); + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) + src[i] = translate_src( c, &inst->Src[i] ); + + switch (opcode) { + case TGSI_OPCODE_ABS: + emit_op1(c, TGSI_OPCODE_MOV, + dst, + src_abs(src[0])); + break; + + case TGSI_OPCODE_SUB: + emit_op2(c, TGSI_OPCODE_ADD, + dst, + src[0], + src_negate(src[1])); + break; + + case TGSI_OPCODE_SCS: + emit_op1(c, TGSI_OPCODE_SCS, + dst_mask(dst, BRW_WRITEMASK_XY), + src[0]); + break; + + case TGSI_OPCODE_DST: + precalc_dst(c, dst, src[0], src[1]); + break; + + case TGSI_OPCODE_LIT: + precalc_lit(c, dst, src[0]); + break; + + case TGSI_OPCODE_TEX: + precalc_tex(c, dst, + inst->Texture.Texture, + src[1].index, /* use sampler unit for tex idx */ + src[0], /* coord */ + src[1]); /* sampler */ + break; + + case TGSI_OPCODE_TXP: + precalc_txp(c, dst, + inst->Texture.Texture, + src[1].index, /* use sampler unit for tex idx */ + src[0], /* coord */ + src[1]); /* sampler */ + break; + + case TGSI_OPCODE_TXB: + /* XXX: TXB not done + */ + precalc_tex(c, dst, + inst->Texture.Texture, + src[1].index, /* use sampler unit for tex idx*/ + src[0], + src[1]); + break; + + case TGSI_OPCODE_XPD: + emit_op2(c, TGSI_OPCODE_XPD, + dst_mask(dst, BRW_WRITEMASK_XYZ), + src[0], + src[1]); + break; + + case TGSI_OPCODE_KIL: + emit_op1(c, TGSI_OPCODE_KIL, + dst_mask(dst_undef(), 0), + src[0]); + break; + + case TGSI_OPCODE_END: + emit_fb_write(c); + break; + default: + if (!c->key.has_flow_control && + brw_wm_is_scalar_result(opcode)) + emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]); + else + emit_op3(c, opcode, dst, src[0], src[1], src[2]); + break; + } +} + +/** + * Initial pass for fragment program code generation. + * This function is used by both the GLSL and non-GLSL paths. + */ +int brw_wm_pass_fp( struct brw_wm_compile *c ) +{ + struct brw_fragment_shader *fs = c->fp; + struct tgsi_parse_context parse; + struct tgsi_full_instruction *inst; + struct tgsi_full_declaration *decl; + const float *imm; + GLuint size; + GLuint i; + + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("pre-fp:\n"); + tgsi_dump(fs->tokens, 0); + } + + c->fp_pixel_xy = src_undef(); + c->fp_delta_xy = src_undef(); + c->fp_pixel_w = src_undef(); + c->nr_fp_insns = 0; + c->nr_immediates = 0; + + + /* Loop over all instructions doing assorted simplifications and + * transformations. + */ + tgsi_parse_init( &parse, fs->tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* Turn intput declarations into special WM_* instructions. + * + * XXX: For non-branching shaders, consider deferring variable + * initialization as late as possible to minimize register + * usage. This is how the original BRW driver worked. + * + * In a branching shader, must preamble instructions at decl + * time, as instruction order in the shader does not + * correspond to the order instructions are executed in the + * wild. + * + * This is where special instructions such as WM_CINTERP, + * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from the payload registers and pixel + * position. + */ + decl = &parse.FullToken.FullDeclaration; + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned attrib; + + first = decl->Range.First; + last = decl->Range.Last; + mask = decl->Declaration.UsageMask; + + for (attrib = first; attrib <= last; attrib++) { + emit_interp(c, + attrib, + decl->Semantic.Name, + decl->Declaration.Interpolate ); + } + } + + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* Unlike VS programs we can probably manage fine encoding + * immediate values directly into the emitted EU + * instructions, as we probably only need to reference one + * float value per instruction. Just save the data for now + * and use directly later. + */ + i = c->nr_immediates++; + imm = &parse.FullToken.FullImmediate.u[i].Float; + size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + + if (c->nr_immediates >= BRW_WM_MAX_CONST) + return PIPE_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < size; i++) + c->immediate[c->nr_immediates].v[i] = imm[i]; + + for (; i < 4; i++) + c->immediate[c->nr_immediates].v[i] = 0.0; + + c->immediate[c->nr_immediates].nr = size; + c->nr_immediates++; + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + inst = &parse.FullToken.FullInstruction; + emit_insn(c, inst); + break; + } + } + + if (BRW_DEBUG & DEBUG_WM) { + brw_wm_print_fp_program( c, "pass_fp" ); + debug_printf("\n"); + } + + return c->error; +} + diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c new file mode 100644 index 00000000000..3b3afc39d3c --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -0,0 +1,2032 @@ +#include "util/u_math.h" + + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" + + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst, + GLuint component); + + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + + /* really, no free GRF regs found */ + if (!c->out_of_regs) { + /* print warning once per compilation */ + debug_printf("%s: ran out of registers for fragment program", __FUNCTION__); + c->out_of_regs = GL_TRUE; + } + + return -1; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + +/** + * Record the mapping of a Mesa register to a hardware register. + */ +static void set_reg(struct brw_wm_compile *c, int file, int index, + int component, struct brw_reg reg) +{ + c->wm_regs[file][index][component].reg = reg; + c->wm_regs[file][index][component].inited = GL_TRUE; +} + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + struct brw_reg reg; + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + int r = alloc_grf(c); + if (r < 0) { + /*printf("Out of temps in %s\n", __FUNCTION__);*/ + r = 50; /* XXX random register! */ + } + c->tmp_regs[ c->tmp_max++ ] = r; + } + + /* form the GRF register */ + reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); + return reg; + +} + +/** + * Save current temp register info. + * There must be a matching call to release_tmps(). + */ +static int mark_tmps(struct brw_wm_compile *c) +{ + return c->tmp_index; +} + +static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index ) +{ + return brw_vec8_grf( c->tmp_regs[ index ], 0 ); +} + +static void release_tmps(struct brw_wm_compile *c, int mark) +{ + c->tmp_index = mark; +} + +/** + * Convert Mesa src register to brw register. + * + * Since we're running in SOA mode each Mesa register corresponds to four + * hardware registers. We allocate the hardware registers as needed here. + * + * \param file register file, one of PROGRAM_x + * \param index register number + * \param component src component (X=0, Y=1, Z=2, W=3) + * \param nr not used?!? + * \param neg negate value? + * \param abs take absolute value? + */ +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component, + int nr, GLuint neg, GLuint abs) +{ + struct brw_reg reg; + switch (file) { + case TGSI_FILE_NULL: + return brw_null_reg(); + + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case BRW_FILE_PAYLOAD: + break; + + default: + debug_printf("%s: Unexpected file type\n", __FUNCTION__); + return brw_null_reg(); + } + + assert(index < 256); + assert(component < 4); + + /* see if we've already allocated a HW register for this Mesa register */ + if (c->wm_regs[file][index][component].inited) { + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; + } + else { + /* no, allocate new register */ + int grf = alloc_grf(c); + /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ + if (grf < 0) { + /* totally out of temps */ + grf = 51; /* XXX random register! */ + } + + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ + + set_reg(c, file, index, component, reg); + } + + if (neg & (1 << component)) { + reg = negate(reg); + } + if (abs) + reg = brw_abs(reg); + return reg; +} + + + + +/** + * Find first/last instruction that references each temporary register. + */ +GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, + GLuint numInstructions, + GLint intBegin[MAX_PROGRAM_TEMPS], + GLint intEnd[MAX_PROGRAM_TEMPS]) +{ + struct loop_info + { + GLuint Start, End; /**< Start, end instructions of loop */ + }; + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLuint i; + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < numInstructions; i++) { + const struct prog_instruction *inst = instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = 3; + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + } + + return GL_TRUE; +} + + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[BRW_WM_MAX_TEMPS]; + GLint intEnd[BRW_WM_MAX_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->fp_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < BRW_WM_MAX_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited) { + int r = c->wm_regs[TGSI_FILE_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + +/** + * Preallocate registers. This sets up the Mesa to hardware register + * mapping for certain registers, such as constants (uniforms/state vars) + * and shader inputs. + */ +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + struct brw_reg reg; + int urb_read_length = 0; + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; + + for (i = 0; i < 4; i++) { + if (i < c->key.nr_depth_regs) + reg = brw_vec8_grf(i * 2, 0); + else + reg = brw_vec8_grf(0, 0); + set_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, i, reg); + } + reg_index += 2 * c->key.nr_depth_regs; + + /* constants */ + { + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; + + /* use a real constant buffer, or just use a section of the GRF? */ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->fp->use_const_buffer = GL_TRUE; + else + c->fp->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ + + if (c->fp->use_const_buffer) { + /* We'll use a real constant buffer and fetch constants from + * it with a dataport read message. + */ + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + } + else { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, TGSI_FILE_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + reg_index += c->nr_creg; + } + } + + /* fragment shader inputs */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + int fp_input; + + if (i >= VERT_RESULT_VAR0) + fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; + else if (i <= VERT_RESULT_TEX7) + fp_input = i; + else + fp_input = -1; + + if (fp_input >= 0 && inputs & (1 << fp_input)) { + urb_read_length = reg_index; + reg = brw_vec8_grf(reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, TGSI_FILE_PAYLOAD, fp_input, j, reg); + } + if (c->key.nr_vp_outputs > i) { + reg_index += 2; + } + } + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = urb_read_length; + c->prog_data.curb_read_length = c->nr_creg; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); + + /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ + prealloc_grf(c, 126); + prealloc_grf(c, 127); + + for (i = 0; i < c->nr_fp_insns; i++) { + const struct brw_fp_instruction *inst = &c->fp_instructions[i]; + struct brw_reg dst[4]; + + switch (inst->Opcode) { + case OPCODE_TEX: + case OPCODE_TXB: + /* Allocate the channels of texture results contiguously, + * since they are written out that way by the sampler unit. + */ + for (j = 0; j < 4; j++) { + dst[j] = get_dst_reg(c, inst, j); + if (j != 0) + assert(dst[j].nr == dst[j - 1].nr + 1); + } + break; + default: + break; + } + } + + /* An instruction may reference up to three constants. + * They'll be found in these registers. + * XXX alloc these on demand! + */ + if (c->fp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); + } + } +#if 0 + printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); +#endif +} + + +/** + * Check if any of the instruction's src registers are constants, uniforms, + * or statevars. If so, fetch any constants that we don't already have in + * the three GRF slots. + */ +static void fetch_constants(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint i; + + /* loop over instruction src regs */ + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + if (src->File == TGSI_FILE_IMMEDIATE || + src->File == TGSI_FILE_CONSTANT) { + c->current_const[i].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); +#endif + + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ + ); + } + } +} + + +/** + * Convert Mesa dst register to brw register. + */ +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst, + GLuint component) +{ + const int nr = 1; + return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, + 0, 0); +} + + +static struct brw_reg +get_src_reg_const(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst, + GLuint srcRegIndex, GLuint component) +{ + /* We should have already fetched the constant from the constant + * buffer in fetch_constants(). Now we just have to return a + * register description that extracts the needed component and + * smears it across all eight vector components. + */ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + struct brw_reg const_reg; + + assert(component < 4); + assert(srcRegIndex < 3); + assert(c->current_const[srcRegIndex].index != -1); + const_reg = c->current_const[srcRegIndex].reg; + + /* extract desired float from the const_reg, and smear */ + const_reg = stride(const_reg, 0, 1, 0); + const_reg.subnr = component * 4; + + if (src->Negate) + const_reg = negate(const_reg); + if (src->Abs) + const_reg = brw_abs(const_reg); + +#if 0 + printf(" form const[%d].%d for arg %d, reg %d\n", + c->current_const[srcRegIndex].index, + component, + srcRegIndex, + const_reg.nr); +#endif + + return const_reg; +} + + +/** + * Convert Mesa src register to brw register. + */ +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + const GLuint nr = 1; + const GLuint component = BRW_GET_SWZ(src->Swizzle, channel); + + /* Extended swizzle terms */ + if (component == SWIZZLE_ZERO) { + return brw_imm_f(0.0F); + } + else if (component == SWIZZLE_ONE) { + return brw_imm_f(1.0F); + } + + if (c->fp->use_const_buffer && + (src->File == TGSI_FILE_STATE_VAR || + src->File == TGSI_FILE_CONSTANT || + src->File == TGSI_FILE_UNIFORM)) { + return get_src_reg_const(c, inst, srcRegIndex, component); + } + else { + /* other type of source register */ + return get_reg(c, src->File, src->Index, component, nr, + src->Negate, src->Abs); + } +} + + +/** + * Same as \sa get_src_reg() but if the register is a immediate, emit + * a brw_reg encoding the immediate. + * Note that a brw instruction only allows one src operand to be a immediate. + * For instructions with more than one operand, only the second can be a + * immediate. This means that we treat some immediates as constants + * (which why TGSI_FILE_IMMEDIATE is checked in fetch_constants()). + * + */ +static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + if (src->File == TGSI_FILE_IMMEDIATE) { + /* an immediate */ + const int component = BRW_GET_SWZ(src->Swizzle, channel); + const GLfloat *param = + c->fp->program.Base.Parameters->ParameterValues[src->Index]; + GLfloat value = param[component]; + if (src->Negate) + value = -value; + if (src->Abs) + value = FABSF(value); +#if 0 + printf(" form immed value %f for chan %d\n", value, channel); +#endif + return brw_imm_f(value); + } + else { + return get_src_reg(c, inst, srcRegIndex, channel); + } +} + + +/** + * Subroutines are minimal support for resusable instruction sequences. + * They are implemented as simply as possible to minimise overhead: there + * is no explicit support for communication between the caller and callee + * other than saving the return address in a temporary register, nor is + * there any automatic local storage. This implies that great care is + * required before attempting reentrancy or any kind of nested + * subroutine invocations. + */ +static void invoke_subroutine( struct brw_wm_compile *c, + enum _subroutine subroutine, + void (*emit)( struct brw_wm_compile * ) ) +{ + struct brw_compile *p = &c->func; + + assert( subroutine < BRW_WM_MAX_SUBROUTINE ); + + if( c->subroutines[ subroutine ] ) { + /* subroutine previously emitted: reuse existing instructions */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + int here = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) ); + + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), + brw_imm_d( ( c->subroutines[ subroutine ] - + here - 1 ) << 4 ) ); + brw_pop_insn_state(p); + + release_tmps( c, mark ); + } else { + /* previously unused subroutine: emit, and mark for later reuse */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + struct brw_instruction *calc; + int base = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) ); + brw_pop_insn_state(p); + + c->subroutines[ subroutine ] = p->nr_insn; + + emit( c ); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV( p, brw_ip_reg(), return_address ); + brw_pop_insn_state(p); + + brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) ); + + release_tmps( c, mark ); + } +} + +static void emit_trunc( struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); + brw_RNDZ(p, dst, src); + } + } + brw_set_saturate(p, 0); +} + +static void emit_mov( struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i); + /* XXX some moves from immediate value don't work reliably!!! */ + /*src = get_src_reg_imm(c, inst, 0, i);*/ + src = get_src_reg(c, inst, 0, i); + brw_MOV(p, dst, src); + } + } + brw_set_saturate(p, 0); +} + +static void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + + struct brw_reg dst0, dst1; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } +} + +static void emit_delta_xy(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg dst0, dst1, src0, src1; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + src0 = get_src_reg(c, inst, 0, 0); + src1 = get_src_reg(c, inst, 0, 1); + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + dst0, + retype(src0, BRW_REGISTER_TYPE_UW), + negate(r1)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + dst1, + retype(src1, BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + + } +} + +static void fire_fb_write( struct brw_wm_compile *c, + GLuint base_reg, + GLuint nr, + GLuint target, + GLuint eot) +{ + struct brw_compile *p = &c->func; + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + target, + nr, + 0, + eot); +} + +static void emit_fb_write(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + GLuint target, eot; + struct brw_reg src0; + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, inst, 0, channel); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + + if (c->key.source_depth_to_render_target) { + if (c->key.computes_depth) { + src0 = get_src_reg(c, inst, 2, 2); + brw_MOV(p, brw_message_reg(nr), src0); + } + else { + src0 = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } + + if (c->key.dest_depth_reg) { + const GLuint comp = c->key.dest_depth_reg / 2; + const GLuint off = c->key.dest_depth_reg % 2; + + if (off != 0) { + /* XXX this code needs review/testing */ + struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); + struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1_1); + brw_pop_insn_state(p); + } + else + { + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); + } + nr += 2; + } + + target = inst->Aux >> 1; + eot = inst->Aux & 1; + fire_fb_write(c, 0, nr, target, eot); +} + +static void emit_pixel_w( struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + if (mask & WRITEMASK_W) { + struct brw_reg dst, src0, delta0, delta1; + struct brw_reg interp3; + + dst = get_dst_reg(c, inst, 3); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + + interp3 = brw_vec1_grf(src0.nr+1, 4); + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + brw_LINE(p, brw_null_reg(), interp3, delta0); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); + + /* Calc w */ + brw_math_16( p, dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} + +static void emit_linterp(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0; + GLuint nr, i; + + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + nr = src0.nr; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_LINE(p, brw_null_reg(), interp[i], delta0); + brw_MAC(p, dst, suboffset(interp[i],1), delta1); + } + } +} + +static void emit_cinterp(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, src0; + GLuint nr, i; + + src0 = get_src_reg(c, inst, 0, 0); + nr = src0.nr; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_pinterp(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0, w; + GLuint nr, i; + + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + w = get_src_reg(c, inst, 2, 3); + nr = src0.nr; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_LINE(p, brw_null_reg(), interp[i], delta0); + brw_MAC(p, dst, suboffset(interp[i],1), + delta1); + brw_MUL(p, dst, dst, w); + } + } +} + +/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ +static void emit_frontfacing(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); + struct brw_reg dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, brw_imm_f(0.0)); + } + } + + /* bit 31 is "primitive is back face", so checking < (1 << 31) gives + * us front face + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_set_predicate_control_flag_value(p, 0xff); +} + +static void emit_xpd(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + for (i = 0; i < 4; i++) { + GLuint i2 = (i+2)%3; + GLuint i1 = (i+1)%3; + if (mask & (1<<i)) { + struct brw_reg src0, src1, dst; + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, inst, 0, i2)); + src1 = get_src_reg_imm(c, inst, 1, i1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, inst, 0, i1); + src1 = get_src_reg_imm(c, inst, 1, i2); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); + } + + dst = get_dst_reg(c, inst, dst_chan); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); + } + dst = get_dst_reg(c, inst, dst_chan); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); + } + dst = get_dst_reg(c, inst, dst_chan); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, dst, src1[3]); + brw_set_saturate(p, 0); +} + +/** + * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. + * Note that the result of the function is smeared across the dest + * register's X, Y, Z and W channels (subject to writemasking of course). + */ +static void emit_math1(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst, GLuint func) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + /* Get first component of source register */ + dst = get_dst_reg(c, inst, dst_chan); + src0 = get_src_reg(c, inst, 0, 0); + + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, + dst, + func, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_rcp(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); +} + +static void emit_rsq(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); +} + +static void emit_sin(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); +} + +static void emit_cos(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); +} + +static void emit_ex2(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); +} + +static void emit_lg2(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); +} + +static void emit_add(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + brw_ADD(p, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + +static void emit_arl(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, addr_reg; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); + brw_set_saturate(p, 0); +} + + +static void emit_mul(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + brw_MUL(p, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + +static void emit_frc(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); + brw_FRC(p, dst, src0); + } + } + if (inst->SaturateMode != SATURATE_OFF) + brw_set_saturate(p, 0); +} + +static void emit_flr(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); + brw_RNDD(p, dst, src0); + } + } + brw_set_saturate(p, 0); +} + + +static void emit_min_max(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + const GLuint mask = inst->DstReg.WriteMask; + const int mark = mark_tmps(c); + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg real_dst = get_dst_reg(c, inst, i); + struct brw_reg src0 = get_src_reg(c, inst, 0, i); + struct brw_reg src1 = get_src_reg(c, inst, 1, i); + struct brw_reg dst; + /* if dst==src0 or dst==src1 we need to use a temp reg */ + GLboolean use_temp = brw_same_reg(dst, src0) || + brw_same_reg(dst, src1); + if (use_temp) + dst = alloc_tmp(c); + else + dst = real_dst; + + /* + printf(" Min/max: dst %d src0 %d src1 %d\n", + dst.nr, src0.nr, src1.nr); + */ + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + if (inst->Opcode == OPCODE_MIN) + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + else + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); + + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + if (use_temp) + brw_MOV(p, real_dst, dst); + } + } + brw_pop_insn_state(p); + release_tmps(c, mark); +} + +static void emit_pow(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + dst = get_dst_reg(c, inst, dst_chan); + src0 = get_src_reg_imm(c, inst, 0, 0); + src1 = get_src_reg_imm(c, inst, 1, 0); + + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); + + brw_math(p, + dst, + BRW_MATH_FUNCTION_POW, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + int mark = mark_tmps(c); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + + src1 = get_src_reg_imm(c, inst, 1, i); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, inst, 2, i); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c, mark); + } +} + +/** + * For GLSL shaders, this KIL will be unconditional. + * It may be contained inside an IF/ENDIF structure of course. + */ +static void emit_kil(struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + src2 = get_src_reg_imm(c, inst, 2, i); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst, GLuint cond) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + brw_pop_insn_state(p); + } + } +} + +static void emit_slt(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_L); +} + +static void emit_sle(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_LE); +} + +static void emit_sgt(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_G); +} + +static void emit_sge(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_GE); +} + +static void emit_seq(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_EQ); +} + +static void emit_sne(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); +} + +static INLINE struct brw_reg high_words( struct brw_reg reg ) +{ + return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), + 0, 8, 2 ); +} + +static INLINE struct brw_reg low_words( struct brw_reg reg ) +{ + return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); +} + +static INLINE struct brw_reg even_bytes( struct brw_reg reg ) +{ + return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); +} + +static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) +{ + return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), + 0, 16, 2 ); +} + + + +static void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0[2], dst[2]; + + dst[0] = get_dst_reg(c, inst, 0); + dst[1] = get_dst_reg(c, inst, 1); + + src0[0] = get_src_reg(c, inst, 0, 0); + src0[1] = get_src_reg(c, inst, 0, 1); + + /* Calculate the pixel offset from window bottom left into destination + * X and Y channels. + */ + if (mask & WRITEMASK_X) { + /* X' = X */ + brw_MOV(p, + dst[0], + retype(src0[0], BRW_REGISTER_TYPE_W)); + } + + if (mask & WRITEMASK_Y) { + /* Y' = height - 1 - Y */ + brw_ADD(p, + dst[1], + negate(retype(src0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.drawable_height - 1)); + } +} + +/* TODO + BIAS on SIMD8 not working yet... + */ +static void emit_txb(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + /* Note: tex_unit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->tex_unit; + GLuint i; + GLuint msg_type; + + assert(unit < BRW_MAX_TEX_UNIT); + + payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, inst, 0, i); + + switch (inst->tex_target) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + default: + /* invalid target */ + abort(); + } + brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ + + if (BRW_IS_IGDNG(p->brw)) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; + } else { + /* Does it work well on SIMD8? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + msg_type, /* msg_type */ + 4, /* response_length */ + 4, /* msg_length */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); +} + + +static void emit_tex(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + /* Note: tex_unit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->tex_unit; + GLuint msg_len; + GLuint i, nr; + GLuint emit; + GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; + GLuint msg_type; + + assert(unit < BRW_MAX_TEX_UNIT); + + payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, inst, 0, i); + + switch (inst->tex_target) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: + emit = WRITEMASK_XYZ; + nr = 3; + break; + default: + /* invalid target */ + abort(); + } + msg_len = 1; + + /* move/load S, T, R coords */ + for (i = 0; i < nr; i++) { + static const GLuint swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); + else + brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); + msg_len += 1; + } + + if (shadow) { + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ + brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ + } + + if (BRW_IS_IGDNG(p->brw)) { + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG; + } else { + /* Does it work for shadow on SIMD8 ? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + msg_type, /* msg_type */ + 4, /* response_length */ + shadow ? 6 : 4, /* msg_length */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +} + + +/** + * Resolve subroutine calls after code emit is done. + */ +static void post_wm_emit( struct brw_wm_compile *c ) +{ + brw_resolve_cals(&c->func); +} + +static void +get_argument_regs(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst, + int index, + struct brw_reg *regs, + int mask) +{ + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1 << i)) + regs[i] = get_src_reg(c, inst, index, i); + } +} + +static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_compile *c) +{ +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + GLuint i, if_depth = 0, loop_depth = 0; + struct brw_compile *p = &c->func; + struct brw_indirect stack_index = brw_indirect(0, 0); + + c->out_of_regs = GL_FALSE; + + prealloc_reg(c); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + for (i = 0; i < c->nr_fp_insns; i++) { + const struct brw_fp_instruction *inst = &c->fp_instructions[i]; + int dst_flags; + struct brw_reg args[3][4], dst[4]; + int j; + + c->cur_inst = i; + +#if 0 + debug_printf("Inst %d: ", i); + _mesa_print_instruction(inst); +#endif + + /* fetch any constants that this instruction needs */ + if (c->fp->use_const_buffer) + fetch_constants(c, inst); + + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + + switch (inst->Opcode) { + case WM_PIXELXY: + emit_pixel_xy(c, inst); + break; + case WM_DELTAXY: + emit_delta_xy(c, inst); + break; + case WM_PIXELW: + emit_pixel_w(c, inst); + break; + case WM_LINTERP: + emit_linterp(c, inst); + break; + case WM_PINTERP: + emit_pinterp(c, inst); + break; + case WM_CINTERP: + emit_cinterp(c, inst); + break; + case WM_WPOSXY: + emit_wpos_xy(c, inst); + break; + case WM_FB_WRITE: + emit_fb_write(c, inst); + break; + case WM_FRONTFACING: + emit_frontfacing(c, inst); + break; + case OPCODE_ADD: + emit_add(c, inst); + break; + case OPCODE_ARL: + emit_arl(c, inst); + break; + case OPCODE_FRC: + emit_frc(c, inst); + break; + case OPCODE_FLR: + emit_flr(c, inst); + break; + case OPCODE_LRP: + emit_lrp(c, inst); + break; + case OPCODE_TRUNC: + emit_trunc(c, inst); + break; + case OPCODE_MOV: + emit_mov(c, inst); + break; + case OPCODE_DP3: + emit_dp3(c, inst); + break; + case OPCODE_DP4: + emit_dp4(c, inst); + break; + case OPCODE_XPD: + emit_xpd(c, inst); + break; + case OPCODE_DPH: + emit_dph(c, inst); + break; + case OPCODE_RCP: + emit_rcp(c, inst); + break; + case OPCODE_RSQ: + emit_rsq(c, inst); + break; + case OPCODE_SIN: + emit_sin(c, inst); + break; + case OPCODE_COS: + emit_cos(c, inst); + break; + case OPCODE_EX2: + emit_ex2(c, inst); + break; + case OPCODE_LG2: + emit_lg2(c, inst); + break; + case OPCODE_MIN: + case OPCODE_MAX: + emit_min_max(c, inst); + break; + case OPCODE_DDX: + case OPCODE_DDY: + for (j = 0; j < 4; j++) { + if (inst->DstReg.WriteMask & (1 << j)) + dst[j] = get_dst_reg(c, inst, j); + else + dst[j] = brw_null_reg(); + } + get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW); + emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), + args[0]); + break; + case OPCODE_SLT: + emit_slt(c, inst); + break; + case OPCODE_SLE: + emit_sle(c, inst); + break; + case OPCODE_SGT: + emit_sgt(c, inst); + break; + case OPCODE_SGE: + emit_sge(c, inst); + break; + case OPCODE_SEQ: + emit_seq(c, inst); + break; + case OPCODE_SNE: + emit_sne(c, inst); + break; + case OPCODE_MUL: + emit_mul(c, inst); + break; + case OPCODE_POW: + emit_pow(c, inst); + break; + case OPCODE_MAD: + emit_mad(c, inst); + break; + case OPCODE_TEX: + emit_tex(c, inst); + break; + case OPCODE_TXB: + emit_txb(c, inst); + break; + case OPCODE_KIL_NV: + emit_kil(c); + break; + case OPCODE_IF: + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); + break; + case OPCODE_ELSE: + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); + break; + case OPCODE_ENDIF: + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); + break; + case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; + case OPCODE_ENDSUB: + /* no-op */ + break; + case OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + brw_save_call(&c->func, inst->label, p->nr_insn); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case OPCODE_RET: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); + + break; + case OPCODE_BGNLOOP: + /* XXX may need to invalidate the current_constant regs */ + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (BRW_IS_IGDNG(brw)) + br = 2; + + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; + default: + debug_printf("unsupported IR in fragment shader %d\n", + inst->Opcode); + } + + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + post_wm_emit(c); + + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("wm-native:\n"); + brw_disasm(stderr, p->store, p->nr_insn); + } +} + +/** + * Do GPU code generation for shaders that use GLSL features such as + * flow control. Other shaders will be compiled with the + */ +void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("%s:\n", __FUNCTION__); + } + + /* initial instruction translation/simplification */ + brw_wm_pass_fp(c); + + /* actual code generation */ + brw_wm_emit_branching_shader(brw, c); + + if (BRW_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "brw_wm_branching_shader_emit done"); + } + + c->prog_data.total_grf = num_grf_used(c); + c->prog_data.total_scratch = 0; +} diff --git a/src/gallium/drivers/i965/brw_wm_iz.c b/src/gallium/drivers/i965/brw_wm_iz.c new file mode 100644 index 00000000000..6f1e9fcc3c1 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_iz.c @@ -0,0 +1,156 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_wm.h" + + +#undef P /* prompted depth */ +#undef C /* computed */ +#undef N /* non-promoted? */ + +#define P 0 +#define C 1 +#define N 2 + +const struct { + GLuint mode:2; + GLuint sd_present:1; + GLuint sd_to_rt:1; + GLuint dd_present:1; + GLuint ds_present:1; +} wm_iz_table[IZ_BIT_MAX] = +{ + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 0, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 } +}; + +/** + * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES + * \param lookup bitmask of IZ_* flags + */ +void brw_wm_lookup_iz( GLuint line_aa, + GLuint lookup, + GLboolean ps_uses_depth, + struct brw_wm_prog_key *key ) +{ + GLuint reg = 2; + + assert (lookup < IZ_BIT_MAX); + + if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) + key->computes_depth = 1; + + if (wm_iz_table[lookup].sd_present || ps_uses_depth) { + key->source_depth_reg = reg; + reg += 2; + } + + if (wm_iz_table[lookup].sd_to_rt) + key->source_depth_to_render_target = 1; + + if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { + key->aa_dest_stencil_reg = reg; + key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + line_aa == AA_SOMETIMES); + reg++; + } + + if (wm_iz_table[lookup].dd_present) { + key->dest_depth_reg = reg; + reg+=2; + } + + key->nr_depth_regs = (reg+1)/2; +} + diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c new file mode 100644 index 00000000000..0bacad2b0f0 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -0,0 +1,366 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "brw_debug.h" +#include "brw_wm.h" + + + +/*********************************************************************** + */ + +static struct brw_wm_ref *get_ref( struct brw_wm_compile *c ) +{ + assert(c->nr_refs < BRW_WM_MAX_REF); + return &c->refs[c->nr_refs++]; +} + +static struct brw_wm_value *get_value( struct brw_wm_compile *c) +{ + assert(c->nr_refs < BRW_WM_MAX_VREG); + return &c->vreg[c->nr_vreg++]; +} + +/** return pointer to a newly allocated instruction */ +static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) +{ + assert(c->nr_insns < BRW_WM_MAX_INSN); + return &c->instruction[c->nr_insns++]; +} + +/*********************************************************************** + */ + +/** Init the "undef" register */ +static void pass0_init_undef( struct brw_wm_compile *c) +{ + struct brw_wm_ref *ref = &c->undef_ref; + ref->value = &c->undef_value; + ref->hw_reg = brw_vec8_grf(0, 0); + ref->insn = 0; + ref->prevuse = NULL; +} + +/** Set a FP register to a value */ +static void pass0_set_fpreg_value( struct brw_wm_compile *c, + GLuint file, + GLuint idx, + GLuint component, + struct brw_wm_value *value ) +{ + struct brw_wm_ref *ref = get_ref(c); + ref->value = value; + ref->hw_reg = brw_vec8_grf(0, 0); + ref->insn = 0; + ref->prevuse = NULL; + c->pass0_fp_reg[file][idx][component] = ref; +} + +/** Set a FP register to a ref */ +static void pass0_set_fpreg_ref( struct brw_wm_compile *c, + GLuint file, + GLuint idx, + GLuint component, + const struct brw_wm_ref *src_ref ) +{ + c->pass0_fp_reg[file][idx][component] = src_ref; +} + +static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, + unsigned idx, + unsigned component) +{ + GLuint i = idx * 4 + component; + + if (i >= BRW_WM_MAX_PARAM) { + debug_printf("%s: out of params\n", __FUNCTION__); + c->prog_data.error = 1; + return NULL; + } + else { + struct brw_wm_ref *ref = get_ref(c); + + c->nr_creg = MAX2(c->nr_creg, (i+16)/16); + + /* Push the offsets into hw_reg. These will be added to the + * real register numbers once one is allocated in pass2. + */ + ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8); + ref->value = &c->creg[i/16]; + ref->insn = 0; + ref->prevuse = NULL; + + return ref; + } +} + + + + +/* Lookup our internal registers + */ +static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, + GLuint file, + GLuint idx, + GLuint component ) +{ + const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component]; + + if (!ref) { + switch (file) { + case TGSI_FILE_INPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_OUTPUT: + case BRW_FILE_PAYLOAD: + /* should already be done?? */ + break; + + case TGSI_FILE_CONSTANT: + ref = get_param_ref(c, + c->fp->info.immediate_count + idx, + component); + break; + + case TGSI_FILE_IMMEDIATE: + ref = get_param_ref(c, + idx, + component); + break; + + default: + assert(0); + break; + } + + c->pass0_fp_reg[file][idx][component] = ref; + } + + if (!ref) + ref = &c->undef_ref; + + return ref; +} + + + +/*********************************************************************** + * Straight translation to internal instruction format + */ + +static void pass0_set_dst( struct brw_wm_compile *c, + struct brw_wm_instruction *out, + const struct brw_fp_instruction *inst, + GLuint writemask ) +{ + const struct brw_fp_dst dst = inst->dst; + GLuint i; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) { + out->dst[i] = get_value(c); + pass0_set_fpreg_value(c, dst.file, dst.index, i, out->dst[i]); + } + } + + out->writemask = writemask; +} + + +static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, + struct brw_fp_src src, + GLuint i ) +{ + return pass0_get_reg(c, src.file, src.index, BRW_GET_SWZ(src.swizzle,i)); +} + + +static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, + struct brw_fp_src src, + GLuint i, + struct brw_wm_instruction *insn) +{ + const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i); + struct brw_wm_ref *newref = get_ref(c); + + newref->value = ref->value; + newref->hw_reg = ref->hw_reg; + + if (insn) { + newref->insn = insn - c->instruction; + newref->prevuse = newref->value->lastuse; + newref->value->lastuse = newref; + } + + if (src.negate) + newref->hw_reg.negate ^= 1; + + if (src.abs) { + newref->hw_reg.negate = 0; + newref->hw_reg.abs = 1; + } + + return newref; +} + + +static void +translate_insn(struct brw_wm_compile *c, + const struct brw_fp_instruction *inst) +{ + struct brw_wm_instruction *out = get_instruction(c); + GLuint writemask = inst->dst.writemask; + GLuint nr_args = brw_wm_nr_args(inst->opcode); + GLuint i, j; + + /* Copy some data out of the instruction + */ + out->opcode = inst->opcode; + out->saturate = inst->dst.saturate; + out->tex_unit = inst->tex_unit; + out->target = inst->target; + + /* Nasty hack: + */ + out->eot = (inst->opcode == WM_FB_WRITE && + inst->tex_unit != 0); + + + /* Args: + */ + for (i = 0; i < nr_args; i++) { + for (j = 0; j < 4; j++) { + out->src[i][j] = get_new_ref(c, inst->src[i], j, out); + } + } + + /* Dst: + */ + pass0_set_dst(c, out, inst, writemask); +} + + + +/*********************************************************************** + * Optimize moves and swizzles away: + */ +static void pass0_precalc_mov( struct brw_wm_compile *c, + const struct brw_fp_instruction *inst ) +{ + const struct brw_fp_dst dst = inst->dst; + GLuint writemask = dst.writemask; + struct brw_wm_ref *refs[4]; + GLuint i; + + /* Get the effect of a MOV by manipulating our register table: + * First get all refs, then assign refs. This ensures that "in-place" + * swizzles such as: + * MOV t, t.xxyx + * are handled correctly. Previously, these two steps were done in + * one loop and the above case was incorrectly handled. + */ + for (i = 0; i < 4; i++) { + refs[i] = get_new_ref(c, inst->src[0], i, NULL); + } + for (i = 0; i < 4; i++) { + if (writemask & (1 << i)) { + pass0_set_fpreg_ref( c, dst.file, dst.index, i, refs[i]); + } + } +} + + +/* Initialize payload "registers". + */ +static void pass0_init_payload( struct brw_wm_compile *c ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + GLuint j = i >= c->key.nr_depth_regs ? 0 : i; + pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, PAYLOAD_DEPTH, i, + &c->payload.depth[j] ); + } + + for (i = 0; i < c->key.nr_inputs; i++) + pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, i, 0, + &c->payload.input_interp[i] ); +} + + +/*********************************************************************** + * PASS 0 + * + * Work forwards to give each calculated value a unique number. Where + * an instruction produces duplicate values (eg DP3), all are given + * the same number. + * + * Translate away swizzling and eliminate non-saturating moves. + * + * Translate instructions from our fp_instruction structs to our + * internal brw_wm_instruction representation. + */ +void brw_wm_pass0( struct brw_wm_compile *c ) +{ + GLuint insn; + + c->nr_vreg = 0; + c->nr_insns = 0; + + pass0_init_undef(c); + pass0_init_payload(c); + + for (insn = 0; insn < c->nr_fp_insns; insn++) { + const struct brw_fp_instruction *inst = &c->fp_instructions[insn]; + + /* Optimize away moves, otherwise emit translated instruction: + */ + switch (inst->opcode) { + case TGSI_OPCODE_MOV: + if (!inst->dst.saturate) { + pass0_precalc_mov(c, inst); + } + else { + translate_insn(c, inst); + } + break; + default: + translate_insn(c, inst); + break; + } + } + + if (BRW_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass0"); + } +} diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c new file mode 100644 index 00000000000..005747f00ba --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -0,0 +1,292 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_wm.h" +#include "brw_debug.h" + + +static GLuint get_tracked_mask(struct brw_wm_compile *c, + struct brw_wm_instruction *inst) +{ + GLuint i; + for (i = 0; i < 4; i++) { + if (inst->writemask & (1<<i)) { + if (!inst->dst[i]->contributes_to_output) { + inst->writemask &= ~(1<<i); + inst->dst[i] = 0; + } + } + } + + return inst->writemask; +} + +/* Remove a reference from a value's usage chain. + */ +static void unlink_ref(struct brw_wm_ref *ref) +{ + struct brw_wm_value *value = ref->value; + + if (ref == value->lastuse) { + value->lastuse = ref->prevuse; + } + else { + struct brw_wm_ref *i = value->lastuse; + while (i->prevuse != ref) i = i->prevuse; + i->prevuse = ref->prevuse; + } +} + +static void track_arg(struct brw_wm_compile *c, + struct brw_wm_instruction *inst, + GLuint arg, + GLuint readmask) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + struct brw_wm_ref *ref = inst->src[arg][i]; + if (ref) { + if (readmask & (1<<i)) { + ref->value->contributes_to_output = 1; + } + else { + unlink_ref(ref); + inst->src[arg][i] = NULL; + } + } + } +} + +static GLuint get_texcoord_mask( GLuint tex_idx ) +{ + switch (tex_idx) { + case TGSI_TEXTURE_1D: + return BRW_WRITEMASK_X; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + return BRW_WRITEMASK_XY; + case TGSI_TEXTURE_3D: + return BRW_WRITEMASK_XYZ; + case TGSI_TEXTURE_CUBE: + return BRW_WRITEMASK_XYZ; + + case TGSI_TEXTURE_SHADOW1D: + return BRW_WRITEMASK_XZ; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + return BRW_WRITEMASK_XYZ; + default: + assert(0); + return 0; + } +} + + +/* Step two: Basically this is dead code elimination. + * + * Iterate backwards over instructions, noting which values + * contribute to the final result. Adjust writemasks to only + * calculate these values. + */ +void brw_wm_pass1( struct brw_wm_compile *c ) +{ + GLint insn; + + for (insn = c->nr_insns-1; insn >= 0; insn--) { + struct brw_wm_instruction *inst = &c->instruction[insn]; + GLuint writemask; + GLuint read0, read1, read2; + + if (inst->opcode == TGSI_OPCODE_KIL) { + track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); /* All args contribute to final */ + continue; + } + + if (inst->opcode == WM_FB_WRITE) { + track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); + track_arg(c, inst, 1, BRW_WRITEMASK_XYZW); + if (c->key.source_depth_to_render_target && + c->key.computes_depth) + track_arg(c, inst, 2, BRW_WRITEMASK_Z); + else + track_arg(c, inst, 2, 0); + continue; + } + + /* Lookup all the registers which were written by this + * instruction and get a mask of those that contribute to the output: + */ + writemask = get_tracked_mask(c, inst); + if (!writemask) { + GLuint arg; + for (arg = 0; arg < 3; arg++) + track_arg(c, inst, arg, 0); + continue; + } + + read0 = 0; + read1 = 0; + read2 = 0; + + /* Mark all inputs which contribute to the marked outputs: + */ + switch (inst->opcode) { + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_FRC: + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_TRUNC: + read0 = writemask; + break; + + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MUL: + read0 = writemask; + read1 = writemask; + break; + + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + read0 = writemask; + break; + + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_CMP: + case TGSI_OPCODE_LRP: + read0 = writemask; + read1 = writemask; + read2 = writemask; + break; + + case TGSI_OPCODE_XPD: + if (writemask & BRW_WRITEMASK_X) read0 |= BRW_WRITEMASK_YZ; + if (writemask & BRW_WRITEMASK_Y) read0 |= BRW_WRITEMASK_XZ; + if (writemask & BRW_WRITEMASK_Z) read0 |= BRW_WRITEMASK_XY; + read1 = read0; + break; + + case TGSI_OPCODE_COS: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SCS: + case WM_CINTERP: + case WM_PIXELXY: + read0 = BRW_WRITEMASK_X; + break; + + case TGSI_OPCODE_POW: + read0 = BRW_WRITEMASK_X; + read1 = BRW_WRITEMASK_X; + break; + + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + read0 = get_texcoord_mask(inst->target); + break; + + case TGSI_OPCODE_TXB: + read0 = get_texcoord_mask(inst->target) | BRW_WRITEMASK_W; + break; + + case WM_WPOSXY: + read0 = writemask & BRW_WRITEMASK_XY; + break; + + case WM_DELTAXY: + read0 = writemask & BRW_WRITEMASK_XY; + read1 = BRW_WRITEMASK_X; + break; + + case WM_PIXELW: + read0 = BRW_WRITEMASK_X; + read1 = BRW_WRITEMASK_XY; + break; + + case WM_LINTERP: + read0 = BRW_WRITEMASK_X; + read1 = BRW_WRITEMASK_XY; + break; + + case WM_PINTERP: + read0 = BRW_WRITEMASK_X; /* interpolant */ + read1 = BRW_WRITEMASK_XY; /* deltas */ + read2 = BRW_WRITEMASK_W; /* pixel w */ + break; + + case TGSI_OPCODE_DP3: + read0 = BRW_WRITEMASK_XYZ; + read1 = BRW_WRITEMASK_XYZ; + break; + + case TGSI_OPCODE_DPH: + read0 = BRW_WRITEMASK_XYZ; + read1 = BRW_WRITEMASK_XYZW; + break; + + case TGSI_OPCODE_DP4: + read0 = BRW_WRITEMASK_XYZW; + read1 = BRW_WRITEMASK_XYZW; + break; + + case TGSI_OPCODE_LIT: + read0 = BRW_WRITEMASK_XYW; + break; + + case TGSI_OPCODE_DST: + case WM_FRONTFACING: + case TGSI_OPCODE_KILP: + default: + break; + } + + track_arg(c, inst, 0, read0); + track_arg(c, inst, 1, read1); + track_arg(c, inst, 2, read2); + } + + if (BRW_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass1"); + } +} diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c new file mode 100644 index 00000000000..19248b45195 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -0,0 +1,334 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_debug.h" +#include "brw_wm.h" + + +/* Use these to force spilling so that that functionality can be + * tested with known-good examples rather than having to construct new + * tests. + */ +#define TEST_PAYLOAD_SPILLS 0 +#define TEST_DST_SPILLS 0 + +static void spill_value(struct brw_wm_compile *c, + struct brw_wm_value *value); + +static void prealloc_reg(struct brw_wm_compile *c, + struct brw_wm_value *value, + GLuint reg) +{ + if (value->lastuse) { + /* Set nextuse to zero, it will be corrected by + * update_register_usage(). + */ + c->pass2_grf[reg].value = value; + c->pass2_grf[reg].nextuse = 0; + + value->resident = &c->pass2_grf[reg]; + value->hw_reg = brw_vec8_grf(reg*2, 0); + + if (TEST_PAYLOAD_SPILLS) + spill_value(c, value); + } +} + + +/* Initialize all the register values. Do the initial setup + * calculations for interpolants. + */ +static void init_registers( struct brw_wm_compile *c ) +{ + GLuint reg = 0; + GLuint j; + + for (j = 0; j < c->grf_limit; j++) + c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; + + /* Pre-allocate incoming payload regs: + */ + for (j = 0; j < c->key.nr_depth_regs; j++) + prealloc_reg(c, &c->payload.depth[j], reg++); + + for (j = 0; j < c->nr_creg; j++) + prealloc_reg(c, &c->creg[j], reg++); + + reg++; /* XXX: skip over position output */ + + /* XXX: currently just hope the VS outputs line up with FS inputs: + */ + for (j = 0; j < c->key.nr_inputs; j++) + prealloc_reg(c, &c->payload.input_interp[j], reg++); + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2; + c->prog_data.curb_read_length = c->nr_creg * 2; + + /* Note this allocation: + */ + c->max_wm_grf = reg * 2; +} + + +/* Update the nextuse value for each register in our file. + */ +static void update_register_usage(struct brw_wm_compile *c, + GLuint thisinsn) +{ + GLuint i; + + for (i = 1; i < c->grf_limit; i++) { + struct brw_wm_grf *grf = &c->pass2_grf[i]; + + /* Only search those which can change: + */ + if (grf->nextuse < thisinsn) { + const struct brw_wm_ref *ref = grf->value->lastuse; + + /* Has last use of value been passed? + */ + if (ref->insn < thisinsn) { + grf->value->resident = 0; + grf->value = 0; + grf->nextuse = BRW_WM_MAX_INSN; + } + else { + /* Else loop through chain to update: + */ + while (ref->prevuse && ref->prevuse->insn >= thisinsn) + ref = ref->prevuse; + + grf->nextuse = ref->insn; + } + } + } +} + + +static void spill_value(struct brw_wm_compile *c, + struct brw_wm_value *value) +{ + /* Allocate a spill slot. Note that allocations start from 0x40 - + * the first slot is reserved to mean "undef" in brw_wm_emit.c + */ + if (!value->spill_slot) { + c->last_scratch += 0x40; + value->spill_slot = c->last_scratch; + } + + /* The spill will be done in brw_wm_emit.c immediately after the + * value is calculated, so we can just take this reg without any + * further work. + */ + value->resident->value = NULL; + value->resident->nextuse = BRW_WM_MAX_INSN; + value->resident = NULL; +} + + + +/* Search for contiguous region with the most distant nearest + * member. Free regs count as very distant. + * + * TODO: implement spill-to-reg so that we can rearrange discontigous + * free regs and then spill the oldest non-free regs in sequence. + * This would mean inserting instructions in this pass. + */ +static GLuint search_contiguous_regs(struct brw_wm_compile *c, + GLuint nr, + GLuint thisinsn) +{ + struct brw_wm_grf *grf = c->pass2_grf; + GLuint furthest = 0; + GLuint reg = 0; + GLuint i, j; + + /* Start search at 1: r0 is special and can't be used or spilled. + */ + for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) { + GLuint group_nextuse = BRW_WM_MAX_INSN; + + for (j = 0; j < nr; j++) { + if (grf[i+j].nextuse < group_nextuse) + group_nextuse = grf[i+j].nextuse; + } + + if (group_nextuse > furthest) { + furthest = group_nextuse; + reg = i; + } + } + + assert(furthest != thisinsn); + + /* Any non-empty regs will need to be spilled: + */ + for (j = 0; j < nr; j++) + if (grf[reg+j].value) + spill_value(c, grf[reg+j].value); + + return reg; +} + + +static void alloc_contiguous_dest(struct brw_wm_compile *c, + struct brw_wm_value *dst[], + GLuint nr, + GLuint thisinsn) +{ + GLuint reg = search_contiguous_regs(c, nr, thisinsn); + GLuint i; + + for (i = 0; i < nr; i++) { + if (!dst[i]) { + /* Need to grab a dummy value in TEX case. Don't introduce + * it into the tracking scheme. + */ + dst[i] = &c->vreg[c->nr_vreg++]; + } + else { + assert(!dst[i]->resident); + assert(c->pass2_grf[reg+i].nextuse != thisinsn); + + c->pass2_grf[reg+i].value = dst[i]; + c->pass2_grf[reg+i].nextuse = thisinsn; + + dst[i]->resident = &c->pass2_grf[reg+i]; + } + + dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0); + } + + if ((reg+nr)*2 > c->max_wm_grf) + c->max_wm_grf = (reg+nr) * 2; +} + + +static void load_args(struct brw_wm_compile *c, + struct brw_wm_instruction *inst) +{ + GLuint thisinsn = inst - c->instruction; + GLuint i,j; + + for (i = 0; i < 3; i++) { + for (j = 0; j < 4; j++) { + struct brw_wm_ref *ref = inst->src[i][j]; + + if (ref) { + if (!ref->value->resident) { + /* Need to bring the value in from scratch space. The code for + * this will be done in brw_wm_emit.c, here we just do the + * register allocation and mark the ref as requiring a fill. + */ + GLuint reg = search_contiguous_regs(c, 1, thisinsn); + + c->pass2_grf[reg].value = ref->value; + c->pass2_grf[reg].nextuse = thisinsn; + + ref->value->resident = &c->pass2_grf[reg]; + + /* Note that a fill is required: + */ + ref->unspill_reg = reg*2; + } + + /* Adjust the hw_reg to point at the value's current location: + */ + assert(ref->value == ref->value->resident->value); + ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2; + } + } + } +} + + + +/* Step 3: Work forwards once again. Perform register allocations, + * taking into account instructions like TEX which require contiguous + * result registers. Where necessary spill registers to scratch space + * and reload later. + */ +void brw_wm_pass2( struct brw_wm_compile *c ) +{ + GLuint insn; + GLuint i; + + init_registers(c); + + for (insn = 0; insn < c->nr_insns; insn++) { + struct brw_wm_instruction *inst = &c->instruction[insn]; + + /* Update registers' nextuse values: + */ + update_register_usage(c, insn); + + /* May need to unspill some args. + */ + load_args(c, inst); + + /* Allocate registers to hold results: + */ + switch (inst->opcode) { + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXP: + alloc_contiguous_dest(c, inst->dst, 4, insn); + break; + + default: + for (i = 0; i < 4; i++) { + if (inst->writemask & (1<<i)) { + assert(inst->dst[i]); + alloc_contiguous_dest(c, &inst->dst[i], 1, insn); + } + } + break; + } + + if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) { + for (i = 0; i < 4; i++) + if (inst->dst[i]) + spill_value(c, inst->dst[i]); + } + } + + if (BRW_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass2"); + } + + c->state = PASS2_DONE; + + if (BRW_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass2/done"); + } +} diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c new file mode 100644 index 00000000000..a8bc31c9cef --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -0,0 +1,229 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_math.h" +#include "util/u_format.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_screen.h" + + +/* Samplers aren't strictly wm state from the hardware's perspective, + * but that is the only situation in which we use them in this driver. + */ + + + +static enum pipe_error +upload_default_color( struct brw_context *brw, + const GLfloat *color, + struct brw_winsys_buffer **bo_out ) +{ + struct brw_sampler_default_color sdc; + enum pipe_error ret; + + COPY_4V(sdc.color, color); + + ret = brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, + NULL, 0, bo_out ); + if (ret) + return ret; + + return PIPE_OK; +} + + +struct wm_sampler_key { + int sampler_count; + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; +}; + + +/** Sets up the cache key for sampler state for all texture units */ +static void +brw_wm_sampler_populate_key(struct brw_context *brw, + struct wm_sampler_key *key) +{ + int i; + + memset(key, 0, sizeof(*key)); + + key->sampler_count = MIN2(brw->curr.num_textures, + brw->curr.num_samplers); + + for (i = 0; i < key->sampler_count; i++) { + const struct brw_texture *tex = brw_texture(brw->curr.texture[i]); + const struct brw_sampler *sampler = brw->curr.sampler[i]; + struct brw_sampler_state *entry = &key->sampler[i]; + + entry->ss0 = sampler->ss0; + entry->ss1 = sampler->ss1; + entry->ss2.default_color_pointer = 0; /* reloc */ + entry->ss3 = sampler->ss3; + + /* Cube-maps on 965 and later must use the same wrap mode for all 3 + * coordinate dimensions. Futher, only CUBE and CLAMP are valid. + */ + if (tex->base.target == PIPE_TEXTURE_CUBE) { + if (FALSE && + (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST || + sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)) { + entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } else { + entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + } + } else if (tex->base.target == PIPE_TEXTURE_1D) { + /* There's a bug in 1D texture sampling - it actually pays + * attention to the wrap_t value, though it should not. + * Override the wrap_t value here to GL_REPEAT to keep + * any nonexistent border pixels from floating in. + */ + entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + } + } +} + + +static enum pipe_error +brw_wm_sampler_update_default_colors(struct brw_context *brw) +{ + enum pipe_error ret; + int nr = MIN2(brw->curr.num_textures, + brw->curr.num_samplers); + int i; + + for (i = 0; i < nr; i++) { + const struct brw_texture *tex = brw_texture(brw->curr.texture[i]); + const struct brw_sampler *sampler = brw->curr.sampler[i]; + const float *bc; + + if (util_format_is_depth_or_stencil(tex->base.format)) { + float bordercolor[4] = { + sampler->border_color[0], + sampler->border_color[0], + sampler->border_color[0], + sampler->border_color[0] + }; + + bc = bordercolor; + } + else { + bc = sampler->border_color; + } + + /* GL specs that border color for depth textures is taken from the + * R channel, while the hardware uses A. Spam R into all the + * channels for safety. + */ + ret = upload_default_color(brw, + bc, + &brw->wm.sdc_bo[i]); + if (ret) + return ret; + } + + return PIPE_OK; +} + + + +/* All samplers must be uploaded in a single contiguous array. + */ +static int upload_wm_samplers( struct brw_context *brw ) +{ + struct wm_sampler_key key; + struct brw_winsys_reloc reloc[BRW_MAX_TEX_UNIT]; + enum pipe_error ret; + int i; + + brw_wm_sampler_update_default_colors(brw); + brw_wm_sampler_populate_key(brw, &key); + + if (brw->wm.sampler_count != key.sampler_count) { + brw->wm.sampler_count = key.sampler_count; + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + } + + if (brw->wm.sampler_count == 0) { + bo_reference(&brw->wm.sampler_bo, NULL); + return PIPE_OK; + } + + /* Emit SDC relocations */ + for (i = 0; i < key.sampler_count; i++) { + make_reloc( &reloc[i], + BRW_USAGE_SAMPLER, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); + } + + + if (brw_search_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + reloc, key.sampler_count, + NULL, + &brw->wm.sampler_bo)) + return PIPE_OK; + + /* If we didnt find it in the cache, compute the state and put it in the + * cache. + */ + ret = brw_upload_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + reloc, key.sampler_count, + &key.sampler, sizeof(key.sampler), + NULL, NULL, + &brw->wm.sampler_bo); + if (ret) + return ret; + + + return 0; +} + +const struct brw_tracked_state brw_wm_samplers = { + .dirty = { + .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLERS, + .brw = 0, + .cache = 0 + }, + .prepare = upload_wm_samplers, +}; + + diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c new file mode 100644 index 00000000000..ee970ac75bc --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -0,0 +1,339 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "util/u_math.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_wm.h" +#include "brw_debug.h" +#include "brw_pipe_rast.h" + +/*********************************************************************** + * WM unit - fragment programs and rasterization + */ + +struct brw_wm_unit_key { + unsigned int total_grf, total_scratch; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int dispatch_grf_start_reg; + + unsigned int curbe_offset; + unsigned int urb_size; + + unsigned int max_threads; + + unsigned int nr_surfaces, sampler_count; + GLboolean uses_depth, computes_depth, uses_kill, has_flow_control; + GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; + GLfloat offset_units, offset_factor; +}; + +static void +wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) +{ + const struct brw_fragment_shader *fp = brw->curr.fragment_shader; + + memset(key, 0, sizeof(*key)); + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + key->max_threads = 1; + else { + /* WM maximum threads is number of EUs times number of threads per EU. */ + if (BRW_IS_IGDNG(brw)) + key->max_threads = 12 * 6; + else if (BRW_IS_G4X(brw)) + key->max_threads = 10 * 5; + else + key->max_threads = 8 * 4; + } + + /* CACHE_NEW_WM_PROG */ + key->total_grf = brw->wm.prog_data->total_grf; + key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; + key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; + key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + key->total_scratch = align(brw->wm.prog_data->total_scratch, 1024); + + /* BRW_NEW_URB_FENCE */ + key->urb_size = brw->urb.vsize; + + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.wm_start; + + /* BRW_NEW_NR_SURFACEs */ + key->nr_surfaces = brw->wm.nr_surfaces; + + /* CACHE_NEW_SAMPLER */ + key->sampler_count = brw->wm.sampler_count; + + /* PIPE_NEW_RAST */ + key->polygon_stipple = brw->curr.rast->templ.poly_stipple_enable; + + /* PIPE_NEW_FRAGMENT_PROGRAM */ + key->uses_depth = fp->uses_depth; + key->computes_depth = fp->info.writes_z; + + /* PIPE_NEW_DEPTH_BUFFER + * + * Override for NULL depthbuffer case, required by the Pixel Shader Computed + * Depth field. + */ + if (brw->curr.fb.zsbuf == NULL) + key->computes_depth = 0; + + /* PIPE_NEW_DEPTH_STENCIL_ALPHA */ + key->uses_kill = (fp->info.uses_kill || + brw->curr.zstencil->cc3.alpha_test); + + key->has_flow_control = fp->has_flow_control; + + /* temporary sanity check assertion */ + assert(fp->has_flow_control == 0); + + /* PIPE_NEW_QUERY */ + key->stats_wm = (brw->query.stats_wm != 0); + + /* PIPE_NEW_RAST */ + key->line_stipple = brw->curr.rast->templ.line_stipple_enable; + + + key->offset_enable = (brw->curr.rast->templ.offset_cw || + brw->curr.rast->templ.offset_ccw); + + key->offset_units = brw->curr.rast->templ.offset_units; + key->offset_factor = brw->curr.rast->templ.offset_scale; +} + +/** + * Setup wm hardware state. See page 225 of Volume 2 + */ +static enum pipe_error +wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, + struct brw_winsys_reloc *reloc, + unsigned nr_reloc, + struct brw_winsys_buffer **bo_out) +{ + struct brw_wm_unit_state wm; + enum pipe_error ret; + + memset(&wm, 0, sizeof(wm)); + + wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = 0; /* reloc */ + wm.thread1.depth_coef_urb_read_offset = 1; + wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + if (BRW_IS_IGDNG(brw)) + wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + wm.thread1.binding_table_entry_count = key->nr_surfaces; + + if (key->total_scratch != 0) { + wm.thread2.scratch_space_base_pointer = 0; /* reloc */ + wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; + } else { + wm.thread2.scratch_space_base_pointer = 0; + wm.thread2.per_thread_scratch_space = 0; + } + + wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; + wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.urb_entry_read_offset = 0; + wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + + if (BRW_IS_IGDNG(brw)) + wm.wm4.sampler_count = 0; /* hardware requirement */ + else + wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + + /* reloc */ + wm.wm4.sampler_state_pointer = 0; + + wm.wm5.program_uses_depth = key->uses_depth; + wm.wm5.program_computes_depth = key->computes_depth; + wm.wm5.program_uses_killpixel = key->uses_kill; + + if (key->has_flow_control) + wm.wm5.enable_8_pix = 1; + else + wm.wm5.enable_16_pix = 1; + + wm.wm5.max_threads = key->max_threads - 1; + wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ + wm.wm5.legacy_line_rast = 0; + wm.wm5.legacy_global_depth_bias = 0; + wm.wm5.early_depth_test = 1; /* never need to disable */ + wm.wm5.line_aa_region_width = 0; + wm.wm5.line_endcap_aa_region_width = 1; + + wm.wm5.polygon_stipple = key->polygon_stipple; + + if (key->offset_enable) { + wm.wm5.depth_offset = 1; + /* Something wierd going on with legacy_global_depth_bias, + * offset_constant, scaling and MRD. This value passes glean + * but gives some odd results elsewere (eg. the + * quad-offset-units test). + */ + wm.global_depth_offset_constant = key->offset_units * 2; + + /* This is the only value that passes glean: + */ + wm.global_depth_offset_scale = key->offset_factor; + } + + wm.wm5.line_stipple = key->line_stipple; + + if ((BRW_DEBUG & DEBUG_STATS) || key->stats_wm) + wm.wm4.stats_enable = 1; + + ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT, + key, sizeof(*key), + reloc, nr_reloc, + &wm, sizeof(wm), + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + + +static enum pipe_error upload_wm_unit( struct brw_context *brw ) +{ + struct brw_wm_unit_key key; + struct brw_winsys_reloc reloc[3]; + unsigned nr_reloc = 0; + enum pipe_error ret; + unsigned grf_reg_count; + unsigned per_thread_scratch_space; + unsigned stats_enable; + unsigned sampler_count; + + wm_unit_populate_key(brw, &key); + + + /* Allocate the necessary scratch space if we haven't already. Don't + * bother reducing the allocation later, since we use scratch so + * rarely. + */ + assert(key.total_scratch <= 12 * 1024); + if (key.total_scratch) { + GLuint total = key.total_scratch * key.max_threads; + + /* Do we need a new buffer: + */ + if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) + bo_reference(&brw->wm.scratch_bo, NULL); + + if (brw->wm.scratch_bo == NULL) { + ret = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_SHADER_SCRATCH, + total, + 4096, + &brw->wm.scratch_bo); + if (ret) + return ret; + } + } + + + /* XXX: temporary: + */ + grf_reg_count = (align(key.total_grf, 16) / 16 - 1); + per_thread_scratch_space = key.total_scratch / 1024 - 1; + stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm; + sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4; + + /* Emit WM program relocation */ + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (key.total_scratch != 0) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_SCRATCH, + per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_bo); + } + + /* Emit sampler state relocation */ + if (key.sampler_count != 0) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + stats_enable | (sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + } + + + if (brw_search_cache(&brw->cache, BRW_WM_UNIT, + &key, sizeof(key), + reloc, nr_reloc, + NULL, + &brw->wm.state_bo)) + return PIPE_OK; + + ret = wm_unit_create_from_key(brw, &key, + reloc, nr_reloc, + &brw->wm.state_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_wm_unit = { + .dirty = { + .mesa = (PIPE_NEW_FRAGMENT_SHADER | + PIPE_NEW_DEPTH_BUFFER | + PIPE_NEW_RAST | + PIPE_NEW_DEPTH_STENCIL_ALPHA | + PIPE_NEW_QUERY), + + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_WM_SURFACES), + + .cache = (CACHE_NEW_WM_PROG | + CACHE_NEW_SAMPLER) + }, + .prepare = upload_wm_unit, +}; + diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c new file mode 100644 index 00000000000..f92b8198ed3 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -0,0 +1,294 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "pipe/p_format.h" + +#include "brw_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_screen.h" + + + + +static enum pipe_error +brw_update_texture_surface( struct brw_context *brw, + struct brw_texture *tex, + struct brw_winsys_buffer **bo_out) +{ + struct brw_winsys_reloc reloc[1]; + enum pipe_error ret; + + /* Emit relocation to surface contents */ + make_reloc(&reloc[0], + BRW_USAGE_SAMPLER, + 0, + offsetof(struct brw_surface_state, ss1), + tex->bo); + + if (brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &tex->ss, sizeof tex->ss, + reloc, Elements(reloc), + NULL, + bo_out)) + return PIPE_OK; + + ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, + &tex->ss, sizeof tex->ss, + reloc, Elements(reloc), + &tex->ss, sizeof tex->ss, + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + + + + + + + + +/** + * Sets up a surface state structure to point at the given region. + * While it is only used for the front/back buffer currently, it should be + * usable for further buffers when doing ARB_draw_buffer support. + */ +static enum pipe_error +brw_update_render_surface(struct brw_context *brw, + struct brw_surface *surface, + struct brw_winsys_buffer **bo_out) +{ + struct brw_surf_ss0 blend_ss0 = brw->curr.blend->ss0; + struct brw_surface_state ss; + struct brw_winsys_reloc reloc[1]; + enum pipe_error ret; + + /* XXX: we will only be rendering to this surface: + */ + make_reloc(&reloc[0], + BRW_USAGE_RENDER_TARGET, + 0, + offsetof(struct brw_surface_state, ss1), + surface->bo); + + /* Surfaces are potentially shared between contexts, so can't + * scribble the in-place ss0 value in the surface. + */ + memcpy(&ss, &surface->ss, sizeof ss); + + ss.ss0.color_blend = blend_ss0.color_blend; + ss.ss0.writedisable_blue = blend_ss0.writedisable_blue; + ss.ss0.writedisable_green = blend_ss0.writedisable_green; + ss.ss0.writedisable_red = blend_ss0.writedisable_red; + ss.ss0.writedisable_alpha = blend_ss0.writedisable_alpha; + + if (brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &ss, sizeof(ss), + reloc, Elements(reloc), + NULL, + bo_out)) + return PIPE_OK; + + ret = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &ss, sizeof ss, + reloc, Elements(reloc), + &ss, sizeof ss, + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + + +/** + * Constructs the binding table for the WM surface state, which maps unit + * numbers to surface state objects. + */ +static enum pipe_error +brw_wm_get_binding_table(struct brw_context *brw, + struct brw_winsys_buffer **bo_out ) +{ + enum pipe_error ret; + struct brw_winsys_reloc reloc[BRW_WM_MAX_SURF]; + uint32_t data[BRW_WM_MAX_SURF]; + GLuint nr_relocs = 0; + GLuint data_size = brw->wm.nr_surfaces * sizeof data[0]; + int i; + + assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + assert(brw->wm.nr_surfaces > 0); + + /* Emit binding table relocations to surface state + */ + for (i = 0; i < brw->wm.nr_surfaces; i++) { + if (brw->wm.surf_bo[i]) { + make_reloc(&reloc[nr_relocs++], + BRW_USAGE_STATE, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); + } + } + + /* Note there is no key for this search beyond the values in the + * relocation array: + */ + if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + reloc, nr_relocs, + NULL, + bo_out)) + return PIPE_OK; + + /* Upload zero data, will all be overwitten with relocation + * offsets: + */ + for (i = 0; i < brw->wm.nr_surfaces; i++) + data[i] = 0; + + ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + reloc, nr_relocs, + data, data_size, + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + +static enum pipe_error prepare_wm_surfaces(struct brw_context *brw ) +{ + enum pipe_error ret; + int nr_surfaces = 0; + GLuint i; + + /* PIPE_NEW_COLOR_BUFFERS | PIPE_NEW_BLEND + * + * Update surfaces for drawing buffers. Mixes in colormask and + * blend state. + * + * XXX: no color buffer case + */ + for (i = 0; i < brw->curr.fb.nr_cbufs; i++) { + ret = brw_update_render_surface(brw, + brw_surface(brw->curr.fb.cbufs[i]), + &brw->wm.surf_bo[BTI_COLOR_BUF(i)]); + if (ret) + return ret; + + nr_surfaces = BTI_COLOR_BUF(i) + 1; + } + + + + /* PIPE_NEW_FRAGMENT_CONSTANTS + */ +#if 0 + if (brw->curr.fragment_constants) { + ret = brw_update_fragment_constant_surface( + brw, + brw->curr.fragment_constants, + &brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS]); + + if (ret) + return ret; + + nr_surfaces = BTI_FRAGMENT_CONSTANTS + 1; + } + else { + bo_reference(&brw->wm.surf_bo[SURF_FRAG_CONSTANTS], NULL); + } +#endif + + + /* PIPE_NEW_TEXTURE + */ + for (i = 0; i < brw->curr.num_textures; i++) { + ret = brw_update_texture_surface(brw, + brw_texture(brw->curr.texture[i]), + &brw->wm.surf_bo[BTI_TEXTURE(i)]); + if (ret) + return ret; + + nr_surfaces = BTI_TEXTURE(i) + 1; + } + + /* Clear any inactive entries: + */ + for (i = brw->curr.fb.nr_cbufs; i < BRW_MAX_DRAW_BUFFERS; i++) + bo_reference(&brw->wm.surf_bo[BTI_COLOR_BUF(i)], NULL); + + if (!brw->curr.fragment_constants) + bo_reference(&brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS], NULL); + + /* XXX: no pipe_max_textures define?? */ + for (i = brw->curr.num_textures; i < PIPE_MAX_SAMPLERS; i++) + bo_reference(&brw->wm.surf_bo[BTI_TEXTURE(i)], NULL); + + if (brw->wm.nr_surfaces != nr_surfaces) { + brw->wm.nr_surfaces = nr_surfaces; + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; + } + + ret = brw_wm_get_binding_table(brw, &brw->wm.bind_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_wm_surfaces = { + .dirty = { + .mesa = (PIPE_NEW_COLOR_BUFFERS | + PIPE_NEW_BOUND_TEXTURES | + PIPE_NEW_FRAGMENT_CONSTANTS | + PIPE_NEW_BLEND), + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_WM_SURFACES), + .cache = 0 + }, + .prepare = prepare_wm_surfaces, +}; + + + diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c new file mode 100644 index 00000000000..3166958bad3 --- /dev/null +++ b/src/gallium/drivers/i965/intel_decode.c @@ -0,0 +1,1790 @@ +/* -*- c-basic-offset: 4 -*- */ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** @file intel_decode.c + * This file contains code to print out batchbuffer contents in a + * human-readable format. + * + * The current version only supports i915 packets, and only pretty-prints a + * subset of them. The intention is for it to make just a best attempt to + * decode, but never crash in the process. + */ + +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <inttypes.h> + +#include "intel_decode.h" + +/*#include "intel_chipset.h"*/ +#define IS_965(x) 1 /* XXX */ +#define IS_9XX(x) 1 /* XXX */ + +#define BUFFER_FAIL(_count, _len, _name) do { \ + fprintf(out, "Buffer size too small in %s (%d < %d)\n", \ + (_name), (_count), (_len)); \ + (*failures)++; \ + return count; \ +} while (0) + +static FILE *out; +static uint32_t saved_s2 = 0, saved_s4 = 0; +static char saved_s2_set = 0, saved_s4_set = 0; + +static float +int_as_float(uint32_t intval) +{ + union intfloat { + uint32_t i; + float f; + } uval; + + uval.i = intval; + return uval.f; +} + +static void +instr_out(const uint32_t *data, uint32_t hw_offset, unsigned int index, + char *fmt, ...) +{ + va_list va; + + fprintf(out, "0x%08x: 0x%08x:%s ", hw_offset + index * 4, data[index], + index == 0 ? "" : " "); + va_start(va, fmt); + vfprintf(out, fmt, va); + va_end(va); +} + + +static int +decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode; + + struct { + uint32_t opcode; + int len_mask; + int min_len; + int max_len; + char *name; + } opcodes_mi[] = { + { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" }, + { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" }, + { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" }, + { 0x04, 0, 1, 1, "MI_FLUSH" }, + { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" }, + { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" }, + { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" }, + { 0x00, 0, 1, 1, "MI_NOOP" }, + { 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" }, + { 0x07, 0, 1, 1, "MI_REPORT_HEAD" }, + { 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" }, + { 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" }, + { 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" }, + { 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" }, + { 0x02, 0, 1, 1, "MI_USER_INTERRUPT" }, + { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" }, + }; + + + for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]); + opcode++) { + if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { + unsigned int len = 1, i; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_mi[opcode].name); + if (opcodes_mi[opcode].max_len > 1) { + len = (data[0] & opcodes_mi[opcode].len_mask) + 2; + if (len < opcodes_mi[opcode].min_len || + len > opcodes_mi[opcode].max_len) + { + fprintf(out, "Bad length (%d) in %s, [%d, %d]\n", + len, opcodes_mi[opcode].name, + opcodes_mi[opcode].min_len, + opcodes_mi[opcode].max_len); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_mi[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(data, hw_offset, 0, "MI UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode, len; + char *format = NULL; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_2d[] = { + { 0x40, 5, 5, "COLOR_BLT" }, + { 0x43, 6, 6, "SRC_COPY_BLT" }, + { 0x01, 8, 8, "XY_SETUP_BLT" }, + { 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" }, + { 0x03, 3, 3, "XY_SETUP_CLIP_BLT" }, + { 0x24, 2, 2, "XY_PIXEL_BLT" }, + { 0x25, 3, 3, "XY_SCANLINES_BLT" }, + { 0x26, 4, 4, "Y_TEXT_BLT" }, + { 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" }, + { 0x50, 6, 6, "XY_COLOR_BLT" }, + { 0x51, 6, 6, "XY_PAT_BLT" }, + { 0x76, 8, 8, "XY_PAT_CHROMA_BLT" }, + { 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" }, + { 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" }, + { 0x52, 9, 9, "XY_MONO_PAT_BLT" }, + { 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" }, + { 0x53, 8, 8, "XY_SRC_COPY_BLT" }, + { 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" }, + { 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" }, + { 0x55, 9, 9, "XY_FULL_BLT" }, + { 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" }, + { 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" }, + { 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" }, + { 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" }, + { 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" }, + }; + + switch ((data[0] & 0x1fc00000) >> 22) { + case 0x50: + instr_out(data, hw_offset, 0, + "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 11) & 1); + + len = (data[0] & 0x000000ff) + 2; + if (len != 6) + fprintf(out, "Bad count in XY_COLOR_BLT\n"); + if (count < 6) + BUFFER_FAIL(count, len, "XY_COLOR_BLT"); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + instr_out(data, hw_offset, 1, "format %s, pitch %d, " + "clipping %sabled\n", format, + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); + instr_out(data, hw_offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(data, hw_offset, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]); + instr_out(data, hw_offset, 5, "color\n"); + return len; + case 0x53: + instr_out(data, hw_offset, 0, + "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, " + "src tile %d, dst tile %d)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 15) & 1, + (data[0] >> 11) & 1); + + len = (data[0] & 0x000000ff) + 2; + if (len != 8) + fprintf(out, "Bad count in XY_SRC_COPY_BLT\n"); + if (count < 8) + BUFFER_FAIL(count, len, "XY_SRC_COPY_BLT"); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + instr_out(data, hw_offset, 1, "format %s, dst pitch %d, " + "clipping %sabled\n", format, + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); + instr_out(data, hw_offset, 2, "dst (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(data, hw_offset, 3, "dst (%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]); + instr_out(data, hw_offset, 5, "src (%d,%d)\n", + data[5] & 0xffff, data[5] >> 16); + instr_out(data, hw_offset, 6, "src pitch %d\n", + (short)(data[6] & 0xffff)); + instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]); + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]); + opcode++) { + if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) { + unsigned int i; + + len = 1; + instr_out(data, hw_offset, 0, "%s\n", opcodes_2d[opcode].name); + if (opcodes_2d[opcode].max_len > 1) { + len = (data[0] & 0x000000ff) + 2; + if (len < opcodes_2d[opcode].min_len || + len > opcodes_2d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_2d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_2d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(data, hw_offset, 0, "2D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + switch ((data[0] & 0x00f80000) >> 19) { + case 0x11: + instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n"); + return 1; + case 0x10: + instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n"); + return 1; + case 0x01: + instr_out(data, hw_offset, 0, "3DSTATE_MAP_COORD_SET_I830\n"); + return 1; + case 0x0a: + instr_out(data, hw_offset, 0, "3DSTATE_MAP_CUBE_I830\n"); + return 1; + case 0x05: + instr_out(data, hw_offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n"); + return 1; + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +/** Sets the string dstname to describe the destination of the PS instruction */ +static void +i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask) +{ + uint32_t a0 = data[i]; + int dst_nr = (a0 >> 14) & 0xf; + char dstmask[8]; + char *sat; + + if (do_mask) { + if (((a0 >> 10) & 0xf) == 0xf) { + dstmask[0] = 0; + } else { + int dstmask_index = 0; + + dstmask[dstmask_index++] = '.'; + if (a0 & (1 << 10)) + dstmask[dstmask_index++] = 'x'; + if (a0 & (1 << 11)) + dstmask[dstmask_index++] = 'y'; + if (a0 & (1 << 12)) + dstmask[dstmask_index++] = 'z'; + if (a0 & (1 << 13)) + dstmask[dstmask_index++] = 'w'; + dstmask[dstmask_index++] = 0; + } + + if (a0 & (1 << 22)) + sat = ".sat"; + else + sat = ""; + } else { + dstmask[0] = 0; + sat = ""; + } + + switch ((a0 >> 19) & 0x7) { + case 0: + if (dst_nr > 15) + fprintf(out, "bad destination reg R%d\n", dst_nr); + sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat); + break; + case 4: + if (dst_nr > 0) + fprintf(out, "bad destination reg oC%d\n", dst_nr); + sprintf(dstname, "oC%s%s", dstmask, sat); + break; + case 5: + if (dst_nr > 0) + fprintf(out, "bad destination reg oD%d\n", dst_nr); + sprintf(dstname, "oD%s%s", dstmask, sat); + break; + case 6: + if (dst_nr > 2) + fprintf(out, "bad destination reg U%d\n", dst_nr); + sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); + break; + default: + sprintf(dstname, "RESERVED"); + break; + } +} + +static char * +i915_get_channel_swizzle(uint32_t select) +{ + switch (select & 0x7) { + case 0: + return (select & 8) ? "-x" : "x"; + case 1: + return (select & 8) ? "-y" : "y"; + case 2: + return (select & 8) ? "-z" : "z"; + case 3: + return (select & 8) ? "-w" : "w"; + case 4: + return (select & 8) ? "-0" : "0"; + case 5: + return (select & 8) ? "-1" : "1"; + default: + return (select & 8) ? "-bad" : "bad"; + } +} + +static void +i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + if (src_nr > 15) + fprintf(out, "bad src reg %s\n", name); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + fprintf(out, "bad src reg T%d\n", src_nr); + sprintf(name, "RESERVED"); + } + break; + case 2: + sprintf(name, "C%d", src_nr); + if (src_nr > 31) + fprintf(out, "bad src reg %s\n", name); + break; + case 4: + sprintf(name, "oC"); + if (src_nr > 0) + fprintf(out, "bad src reg oC%d\n", src_nr); + break; + case 5: + sprintf(name, "oD"); + if (src_nr > 0) + fprintf(out, "bad src reg oD%d\n", src_nr); + break; + case 6: + sprintf(name, "U%d", src_nr); + if (src_nr > 2) + fprintf(out, "bad src reg %s\n", name); + break; + default: + fprintf(out, "bad src reg type %d\n", src_type); + sprintf(name, "RESERVED"); + break; + } +} + +static void +i915_get_instruction_src0(const uint32_t *data, int i, char *srcname) +{ + uint32_t a0 = data[i]; + uint32_t a1 = data[i + 1]; + int src_nr = (a0 >> 2) & 0x1f; + char *swizzle_x = i915_get_channel_swizzle((a1 >> 28) & 0xf); + char *swizzle_y = i915_get_channel_swizzle((a1 >> 24) & 0xf); + char *swizzle_z = i915_get_channel_swizzle((a1 >> 20) & 0xf); + char *swizzle_w = i915_get_channel_swizzle((a1 >> 16) & 0xf); + char swizzle[100]; + + i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +i915_get_instruction_src1(const uint32_t *data, int i, char *srcname) +{ + uint32_t a1 = data[i + 1]; + uint32_t a2 = data[i + 2]; + int src_nr = (a1 >> 8) & 0x1f; + char *swizzle_x = i915_get_channel_swizzle((a1 >> 4) & 0xf); + char *swizzle_y = i915_get_channel_swizzle((a1 >> 0) & 0xf); + char *swizzle_z = i915_get_channel_swizzle((a2 >> 28) & 0xf); + char *swizzle_w = i915_get_channel_swizzle((a2 >> 24) & 0xf); + char swizzle[100]; + + i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +i915_get_instruction_src2(const uint32_t *data, int i, char *srcname) +{ + uint32_t a2 = data[i + 2]; + int src_nr = (a2 >> 16) & 0x1f; + char *swizzle_x = i915_get_channel_swizzle((a2 >> 12) & 0xf); + char *swizzle_y = i915_get_channel_swizzle((a2 >> 8) & 0xf); + char *swizzle_z = i915_get_channel_swizzle((a2 >> 4) & 0xf); + char *swizzle_w = i915_get_channel_swizzle((a2 >> 0) & 0xf); + char swizzle[100]; + + i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + if (src_nr > 15) + fprintf(out, "bad src reg %s\n", name); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + fprintf(out, "bad src reg T%d\n", src_nr); + sprintf(name, "RESERVED"); + } + break; + case 4: + sprintf(name, "oC"); + if (src_nr > 0) + fprintf(out, "bad src reg oC%d\n", src_nr); + break; + case 5: + sprintf(name, "oD"); + if (src_nr > 0) + fprintf(out, "bad src reg oD%d\n", src_nr); + break; + default: + fprintf(out, "bad src reg type %d\n", src_type); + sprintf(name, "RESERVED"); + break; + } +} + +static void +i915_decode_alu1(const uint32_t *data, uint32_t hw_offset, + int i, char *instr_prefix, char *op_name) +{ + char dst[100], src0[100]; + + i915_get_instruction_dst(data, i, dst, 1); + i915_get_instruction_src0(data, i, src0); + + instr_out(data, hw_offset, i++, "%s: %s %s, %s\n", instr_prefix, + op_name, dst, src0); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_alu2(const uint32_t *data, uint32_t hw_offset, + int i, char *instr_prefix, char *op_name) +{ + char dst[100], src0[100], src1[100]; + + i915_get_instruction_dst(data, i, dst, 1); + i915_get_instruction_src0(data, i, src0); + i915_get_instruction_src1(data, i, src1); + + instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_alu3(const uint32_t *data, uint32_t hw_offset, + int i, char *instr_prefix, char *op_name) +{ + char dst[100], src0[100], src1[100], src2[100]; + + i915_get_instruction_dst(data, i, dst, 1); + i915_get_instruction_src0(data, i, src0); + i915_get_instruction_src1(data, i, src1); + i915_get_instruction_src2(data, i, src2); + + instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1, src2); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_tex(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix, + char *tex_name) +{ + uint32_t t0 = data[i]; + uint32_t t1 = data[i + 1]; + char dst_name[100]; + char addr_name[100]; + int sampler_nr; + + i915_get_instruction_dst(data, i, dst_name, 0); + i915_get_instruction_addr((t1 >> 24) & 0x7, + (t1 >> 17) & 0xf, + addr_name); + sampler_nr = t0 & 0xf; + + instr_out(data, hw_offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix, + tex_name, dst_name, sampler_nr, addr_name); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_dcl(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix) +{ + uint32_t d0 = data[i]; + char *sampletype; + int dcl_nr = (d0 >> 14) & 0xf; + char *dcl_x = d0 & (1 << 10) ? "x" : ""; + char *dcl_y = d0 & (1 << 11) ? "y" : ""; + char *dcl_z = d0 & (1 << 12) ? "z" : ""; + char *dcl_w = d0 & (1 << 13) ? "w" : ""; + char dcl_mask[10]; + + switch ((d0 >> 19) & 0x3) { + case 1: + sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); + if (strcmp(dcl_mask, ".") == 0) + fprintf(out, "bad (empty) dcl mask\n"); + + if (dcl_nr > 10) + fprintf(out, "bad T%d dcl register number\n", dcl_nr); + if (dcl_nr < 8) { + if (strcmp(dcl_mask, ".x") != 0 && + strcmp(dcl_mask, ".xy") != 0 && + strcmp(dcl_mask, ".xz") != 0 && + strcmp(dcl_mask, ".w") != 0 && + strcmp(dcl_mask, ".xyzw") != 0) { + fprintf(out, "bad T%d.%s dcl mask\n", dcl_nr, dcl_mask); + } + instr_out(data, hw_offset, i++, "%s: DCL T%d%s\n", instr_prefix, + dcl_nr, dcl_mask); + } else { + if (strcmp(dcl_mask, ".xz") == 0) + fprintf(out, "errataed bad dcl mask %s\n", dcl_mask); + else if (strcmp(dcl_mask, ".xw") == 0) + fprintf(out, "errataed bad dcl mask %s\n", dcl_mask); + else if (strcmp(dcl_mask, ".xzw") == 0) + fprintf(out, "errataed bad dcl mask %s\n", dcl_mask); + + if (dcl_nr == 8) { + instr_out(data, hw_offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 9) { + instr_out(data, hw_offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 10) { + instr_out(data, hw_offset, i++, "%s: DCL FOG%s\n", instr_prefix, + dcl_mask); + } + } + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + break; + case 3: + switch ((d0 >> 22) & 0x3) { + case 0: + sampletype = "2D"; + break; + case 1: + sampletype = "CUBE"; + break; + case 2: + sampletype = "3D"; + break; + default: + sampletype = "RESERVED"; + break; + } + if (dcl_nr > 15) + fprintf(out, "bad S%d dcl register number\n", dcl_nr); + instr_out(data, hw_offset, i++, "%s: DCL S%d %s\n", instr_prefix, + dcl_nr, sampletype); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + break; + default: + instr_out(data, hw_offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + } +} + +static void +i915_decode_instruction(const uint32_t *data, uint32_t hw_offset, + int i, char *instr_prefix) +{ + switch ((data[i] >> 24) & 0x1f) { + case 0x0: + instr_out(data, hw_offset, i++, "%s: NOP\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + break; + case 0x01: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "ADD"); + break; + case 0x02: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOV"); + break; + case 0x03: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "MUL"); + break; + case 0x04: + i915_decode_alu3(data, hw_offset, i, instr_prefix, "MAD"); + break; + case 0x05: + i915_decode_alu3(data, hw_offset, i, instr_prefix, "DP2ADD"); + break; + case 0x06: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP3"); + break; + case 0x07: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP4"); + break; + case 0x08: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "FRC"); + break; + case 0x09: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "RCP"); + break; + case 0x0a: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "RSQ"); + break; + case 0x0b: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "EXP"); + break; + case 0x0c: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "LOG"); + break; + case 0x0d: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "CMP"); + break; + case 0x0e: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "MIN"); + break; + case 0x0f: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "MAX"); + break; + case 0x10: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "FLR"); + break; + case 0x11: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOD"); + break; + case 0x12: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "TRC"); + break; + case 0x13: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "SGE"); + break; + case 0x14: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "SLT"); + break; + case 0x15: + i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLD"); + break; + case 0x16: + i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDP"); + break; + case 0x17: + i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDB"); + break; + case 0x19: + i915_decode_dcl(data, hw_offset, i, instr_prefix); + break; + default: + instr_out(data, hw_offset, i++, "%s: unknown\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + break; + } +} + +static int +decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) +{ + unsigned int len, i, c, opcode, word, map, sampler, instr; + char *format; + + struct { + uint32_t opcode; + int i830_only; + int min_len; + int max_len; + char *name; + } opcodes_3d_1d[] = { + { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, + { 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" }, + { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" }, + { 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, + { 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, + { 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, + { 0x98, 0, 2, 2, "3DSTATE_DEFAULT_Z" }, + { 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, + { 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" }, + { 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, + { 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, + { 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" }, + { 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" }, + { 0x8f, 0, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" }, + { 0x81, 0, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" }, + { 0x83, 0, 2, 2, "3DSTATE_SPAN_STIPPLE" }, + { 0x8c, 1, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM_I830" }, + { 0x8b, 1, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM_I830" }, + { 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" }, + { 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" }, + { 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" }, + }; + + switch ((data[0] & 0x00ff0000) >> 16) { + case 0x07: + /* This instruction is unusual. A 0 length means just 1 DWORD instead of + * 2. The 0 length is specified in one place to be unsupported, but + * stated to be required in another, and 0 length LOAD_INDIRECTs appear + * to cause no harm at least. + */ + instr_out(data, hw_offset, 0, "3DSTATE_LOAD_INDIRECT\n"); + len = (data[0] & 0x000000ff) + 1; + i = 1; + if (data[0] & (0x01 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "SIS.0\n"); + instr_out(data, hw_offset, i++, "SIS.1\n"); + } + if (data[0] & (0x02 << 8)) { + if (i + 1 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "DIS.0\n"); + } + if (data[0] & (0x04 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "SSB.0\n"); + instr_out(data, hw_offset, i++, "SSB.1\n"); + } + if (data[0] & (0x08 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "MSB.0\n"); + instr_out(data, hw_offset, i++, "MSB.1\n"); + } + if (data[0] & (0x10 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "PSP.0\n"); + instr_out(data, hw_offset, i++, "PSP.1\n"); + } + if (data[0] & (0x20 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "PSC.0\n"); + instr_out(data, hw_offset, i++, "PSC.1\n"); + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + (*failures)++; + return len; + } + return len; + case 0x04: + instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 0; word <= 7; word++) { + if (data[0] & (1 << (4 + word))) { + if (i >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1"); + + /* save vertex state for decode */ + if (word == 2) { + saved_s2_set = 1; + saved_s2 = data[i]; + } + if (word == 4) { + saved_s4_set = 1; + saved_s4 = data[i]; + } + + instr_out(data, hw_offset, i++, "S%d\n", word); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + (*failures)++; + } + return len; + case 0x00: + instr_out(data, hw_offset, 0, "3DSTATE_MAP_STATE\n"); + len = (data[0] & 0x0000003f) + 2; + instr_out(data, hw_offset, 1, "mask\n"); + + i = 2; + for (map = 0; map <= 15; map++) { + if (data[1] & (1 << map)) { + if (i + 3 >= count) + BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE"); + instr_out(data, hw_offset, i++, "map %d MS2\n", map); + instr_out(data, hw_offset, i++, "map %d MS3\n", map); + instr_out(data, hw_offset, i++, "map %d MS4\n", map); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n"); + (*failures)++; + return len; + } + return len; + case 0x06: + instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + len = (data[0] & 0x000000ff) + 2; + + i = 2; + for (c = 0; c <= 31; c++) { + if (data[1] & (1 << c)) { + if (i + 4 >= count) + BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_CONSTANTS"); + instr_out(data, hw_offset, i, "C%d.X = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(data, hw_offset, i, "C%d.Y = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(data, hw_offset, i, "C%d.Z = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(data, hw_offset, i, "C%d.W = %f\n", + c, int_as_float(data[i])); + i++; + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + (*failures)++; + } + return len; + case 0x05: + instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n"); + len = (data[0] & 0x000000ff) + 2; + if ((len - 1) % 3 != 0 || len > 370) { + fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_PROGRAM\n"); + (*failures)++; + } + i = 1; + for (instr = 0; instr < (len - 1) / 3; instr++) { + char instr_prefix[10]; + + if (i + 3 >= count) + BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM"); + sprintf(instr_prefix, "PS%03d", instr); + i915_decode_instruction(data, hw_offset, i, instr_prefix); + i += 3; + } + return len; + case 0x01: + if (i830) + break; + instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n"); + instr_out(data, hw_offset, 1, "mask\n"); + len = (data[0] & 0x0000003f) + 2; + i = 2; + for (sampler = 0; sampler <= 15; sampler++) { + if (data[1] & (1 << sampler)) { + if (i + 3 >= count) + BUFFER_FAIL(count, len, "3DSTATE_SAMPLER_STATE"); + instr_out(data, hw_offset, i++, "sampler %d SS2\n", + sampler); + instr_out(data, hw_offset, i++, "sampler %d SS3\n", + sampler); + instr_out(data, hw_offset, i++, "sampler %d SS4\n", + sampler); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_SAMPLER_STATE\n"); + (*failures)++; + } + return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + + if (len != 2) + fprintf(out, "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n"); + if (count < 2) + BUFFER_FAIL(count, len, "3DSTATE_DEST_BUFFER_VARIABLES"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: format = "g8"; break; + case 0x1: format = "x1r5g5b5"; break; + case 0x2: format = "r5g6b5"; break; + case 0x3: format = "a8r8g8b8"; break; + case 0x4: format = "ycrcb_swapy"; break; + case 0x5: format = "ycrcb_normal"; break; + case 0x6: format = "ycrcb_swapuv"; break; + case 0x7: format = "ycrcb_swapuvy"; break; + case 0x8: format = "a4r4g4b4"; break; + case 0x9: format = "a1r5g5b5"; break; + case 0xa: format = "a2r10g10b10"; break; + default: format = "BAD"; break; + } + instr_out(data, hw_offset, 1, "%s format, early Z %sabled\n", + format, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]); + opcode++) + { + if (opcodes_3d_1d[opcode].i830_only && !i830) + continue; + + if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) { + len = 1; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name); + if (opcodes_3d_1d[opcode].max_len > 1) { + len = (data[0] & 0x0000ffff) + 2; + if (len < opcodes_3d_1d[opcode].min_len || + len > opcodes_3d_1d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", + opcodes_3d_1d[opcode].name); + (*failures)++; + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, + int *failures) +{ + char immediate = (data[0] & (1 << 23)) == 0; + unsigned int len, i; + char *primtype; + + switch ((data[0] >> 18) & 0xf) { + case 0x0: primtype = "TRILIST"; break; + case 0x1: primtype = "TRISTRIP"; break; + case 0x2: primtype = "TRISTRIP_REVERSE"; break; + case 0x3: primtype = "TRIFAN"; break; + case 0x4: primtype = "POLYGON"; break; + case 0x5: primtype = "LINELIST"; break; + case 0x6: primtype = "LINESTRIP"; break; + case 0x7: primtype = "RECTLIST"; break; + case 0x8: primtype = "POINTLIST"; break; + case 0x9: primtype = "DIB"; break; + case 0xa: primtype = "CLEAR_RECT"; break; + default: primtype = "unknown"; break; + } + + /* XXX: 3DPRIM_DIB not supported */ + if (immediate) { + len = (data[0] & 0x0003ffff) + 2; + instr_out(data, hw_offset, 0, "3DPRIMITIVE inline %s\n", primtype); + if (count < len) + BUFFER_FAIL(count, len, "3DPRIMITIVE inline"); + if (!saved_s2_set || !saved_s4_set) { + fprintf(out, "unknown vertex format\n"); + for (i = 1; i < len; i++) { + instr_out(data, hw_offset, i, + " vertex data (%f float)\n", + int_as_float(data[i])); + } + } else { + unsigned int vertex = 0; + for (i = 1; i < len;) { + unsigned int tc; + +#define VERTEX_OUT(fmt, ...) do { \ + if (i < len) \ + instr_out(data, hw_offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \ + else \ + fprintf(out, " missing data in V%d\n", vertex); \ + i++; \ +} while (0) + + VERTEX_OUT("X = %f", int_as_float(data[i])); + VERTEX_OUT("Y = %f", int_as_float(data[i])); + switch (saved_s4 >> 6 & 0x7) { + case 0x1: + VERTEX_OUT("Z = %f", int_as_float(data[i])); + break; + case 0x2: + VERTEX_OUT("Z = %f", int_as_float(data[i])); + VERTEX_OUT("W = %f", int_as_float(data[i])); + break; + case 0x3: + break; + case 0x4: + VERTEX_OUT("W = %f", int_as_float(data[i])); + break; + default: + fprintf(out, "bad S4 position mask\n"); + } + + if (saved_s4 & (1 << 10)) { + VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", + data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 11)) { + VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", + data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 12)) + VERTEX_OUT("width = 0x%08x)", data[i]); + + for (tc = 0; tc <= 7; tc++) { + switch ((saved_s2 >> (tc * 4)) & 0xf) { + case 0x0: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + break; + case 0x1: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i])); + break; + case 0x2: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i])); + break; + case 0x3: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + break; + case 0x4: + VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]); + break; + case 0x5: + VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]); + VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]); + break; + case 0xf: + break; + default: + fprintf(out, "bad S2.T%d format\n", tc); + } + } + vertex++; + } + } + } else { + /* indirect vertices */ + len = data[0] & 0x0000ffff; /* index count */ + if (data[0] & (1 << 17)) { + /* random vertex access */ + if (count < (len + 1) / 2 + 1) { + BUFFER_FAIL(count, (len + 1) / 2 + 1, + "3DPRIMITIVE random indirect"); + } + instr_out(data, hw_offset, 0, + "3DPRIMITIVE random indirect %s (%d)\n", primtype, len); + if (len == 0) { + /* vertex indices continue until 0xffff is found */ + for (i = 1; i < count; i++) { + if ((data[i] & 0xffff) == 0xffff) { + instr_out(data, hw_offset, i, + " indices: (terminator)\n"); + return i; + } else if ((data[i] >> 16) == 0xffff) { + instr_out(data, hw_offset, i, + " indices: 0x%04x, " + "(terminator)\n", + data[i] & 0xffff); + return i; + } else { + instr_out(data, hw_offset, i, + " indices: 0x%04x, 0x%04x\n", + data[i] & 0xffff, data[i] >> 16); + } + } + fprintf(out, + "3DPRIMITIVE: no terminator found in index buffer\n"); + (*failures)++; + return count; + } else { + /* fixed size vertex index buffer */ + for (i = 0; i < len; i += 2) { + if (i * 2 == len - 1) { + instr_out(data, hw_offset, i, + " indices: 0x%04x\n", + data[i] & 0xffff); + } else { + instr_out(data, hw_offset, i, + " indices: 0x%04x, 0x%04x\n", + data[i] & 0xffff, data[i] >> 16); + } + } + } + return (len + 1) / 2 + 1; + } else { + /* sequential vertex access */ + if (count < 2) + BUFFER_FAIL(count, 2, "3DPRIMITIVE seq indirect"); + instr_out(data, hw_offset, 0, + "3DPRIMITIVE sequential indirect %s, %d starting from " + "%d\n", primtype, len, data[1] & 0xffff); + instr_out(data, hw_offset, 1, " start\n"); + return 2; + } + } + + return len; +} + +static int +decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d[] = { + { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" }, + { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" }, + { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" }, + { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" }, + { 0x0d, 1, 1, "3DSTATE_MODES_4" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + }; + + switch ((data[0] & 0x1f000000) >> 24) { + case 0x1f: + return decode_3d_primitive(data, count, hw_offset, failures); + case 0x1d: + return decode_3d_1d(data, count, hw_offset, failures, 0); + case 0x1c: + return decode_3d_1c(data, count, hw_offset, failures); + } + + for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); + opcode++) { + if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + unsigned int len = 1, i; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); + if (opcodes_3d[opcode].max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcodes_3d[opcode].min_len || + len > opcodes_3d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +static int +decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode, len; + int i; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_STATE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + }; + + len = (data[0] & 0x0000ffff) + 2; + + switch ((data[0] & 0xffff0000) >> 16) { + case 0x6101: + if (len != 6) + fprintf(out, "Bad count in STATE_BASE_ADDRESS\n"); + if (count < 6) + BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS"); + + instr_out(data, hw_offset, 0, + "STATE_BASE_ADDRESS\n"); + + if (data[1] & 1) { + instr_out(data, hw_offset, 1, "General state at 0x%08x\n", + data[1] & ~1); + } else + instr_out(data, hw_offset, 1, "General state not updated\n"); + + if (data[2] & 1) { + instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n", + data[2] & ~1); + } else + instr_out(data, hw_offset, 2, "Surface state not updated\n"); + + if (data[3] & 1) { + instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n", + data[3] & ~1); + } else + instr_out(data, hw_offset, 3, "Indirect state not updated\n"); + + if (data[4] & 1) { + instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n", + data[4] & ~1); + } else + instr_out(data, hw_offset, 4, "General state not updated\n"); + + if (data[5] & 1) { + instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n", + data[5] & ~1); + } else + instr_out(data, hw_offset, 5, "Indirect state not updated\n"); + + return len; + case 0x7800: + if (len != 7) + fprintf(out, "Bad count in 3DSTATE_PIPELINED_POINTERS\n"); + if (count < 7) + BUFFER_FAIL(count, len, "3DSTATE_PIPELINED_POINTERS"); + + instr_out(data, hw_offset, 0, + "3DSTATE_PIPELINED_POINTERS\n"); + instr_out(data, hw_offset, 1, "VS state\n"); + instr_out(data, hw_offset, 2, "GS state\n"); + instr_out(data, hw_offset, 3, "Clip state\n"); + instr_out(data, hw_offset, 4, "SF state\n"); + instr_out(data, hw_offset, 5, "WM state\n"); + instr_out(data, hw_offset, 6, "CC state\n"); + return len; + case 0x7801: + if (len != 6) + fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n"); + if (count < 6) + BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); + + instr_out(data, hw_offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + instr_out(data, hw_offset, 1, "VS binding table\n"); + instr_out(data, hw_offset, 2, "GS binding table\n"); + instr_out(data, hw_offset, 3, "Clip binding table\n"); + instr_out(data, hw_offset, 4, "SF binding table\n"); + instr_out(data, hw_offset, 5, "WM binding table\n"); + + return len; + + case 0x7808: + len = (data[0] & 0xff) + 2; + if ((len - 1) % 4 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VERTEX_BUFFERS"); + instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 26) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + instr_out(data, hw_offset, i++, "buffer address\n"); + instr_out(data, hw_offset, i++, "max index\n"); + instr_out(data, hw_offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + len = (data[0] & 0xff) + 2; + if ((len + 1) % 2 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VERTEX_ELEMENTS"); + instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + for (i = 1; i < len;) { + instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + instr_out(data, hw_offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + return len; + + case 0x780a: + len = (data[0] & 0xff) + 2; + if (len != 3) + fprintf(out, "Bad count in 3DSTATE_INDEX_BUFFER\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_INDEX_BUFFER"); + instr_out(data, hw_offset, 0, "3DSTATE_INDEX_BUFFER\n"); + instr_out(data, hw_offset, 1, "beginning buffer address\n"); + instr_out(data, hw_offset, 2, "ending buffer address\n"); + return len; + + case 0x7900: + if (len != 4) + fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n"); + if (count < 4) + BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + instr_out(data, hw_offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + instr_out(data, hw_offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + instr_out(data, hw_offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + + return len; + + case 0x7905: + if (len != 5 && len != 6) + fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + instr_out(data, hw_offset, 1, "%s, %s, pitch = %d bytes, %stiled\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not "); + instr_out(data, hw_offset, 2, "depth offset\n"); + instr_out(data, hw_offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + instr_out(data, hw_offset, 4, "volume depth\n"); + if (len == 6) + instr_out(data, hw_offset, 5, "\n"); + + return len; + + case 0x7b00: + len = (data[0] & 0xff) + 2; + if (len != 6) + fprintf(out, "Bad count in 3DPRIMITIVE\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DPRIMITIVE"); + + instr_out(data, hw_offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + instr_out(data, hw_offset, 1, "vertex count\n"); + instr_out(data, hw_offset, 2, "start vertex\n"); + instr_out(data, hw_offset, 3, "instance count\n"); + instr_out(data, hw_offset, 4, "start instance\n"); + instr_out(data, hw_offset, 5, "index bias\n"); + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); + opcode++) { + if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) { + unsigned int i; + len = 1; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); + if (opcodes_3d[opcode].max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcodes_3d[opcode].min_len || + len > opcodes_3d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d[] = { + { 0x02, 1, 1, "3DSTATE_MODES_3" }, + { 0x03, 1, 1, "3DSTATE_ENABLES_1"}, + { 0x04, 1, 1, "3DSTATE_ENABLES_2"}, + { 0x05, 1, 1, "3DSTATE_VFT0"}, + { 0x06, 1, 1, "3DSTATE_AA"}, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + { 0x08, 1, 1, "3DSTATE_MODES_1" }, + { 0x09, 1, 1, "3DSTATE_STENCIL_TEST" }, + { 0x0a, 1, 1, "3DSTATE_VFT1"}, + { 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" }, + { 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" }, + { 0x0f, 1, 1, "3DSTATE_MODES_2" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x16, 1, 1, "3DSTATE_MODES_4" }, + }; + + switch ((data[0] & 0x1f000000) >> 24) { + case 0x1f: + return decode_3d_primitive(data, count, hw_offset, failures); + case 0x1d: + return decode_3d_1d(data, count, hw_offset, failures, 1); + case 0x1c: + return decode_3d_1c(data, count, hw_offset, failures); + } + + for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); + opcode++) { + if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + unsigned int len = 1, i; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); + if (opcodes_3d[opcode].max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcodes_3d[opcode].min_len || + len > opcodes_3d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +/** + * Decodes an i830-i915 batch buffer, writing the output to stdout. + * + * \param data batch buffer contents + * \param count number of DWORDs to decode in the batch buffer + * \param hw_offset hardware address for the buffer + */ +int +intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid) +{ + int index = 0; + int failures = 0; + + out = stderr; + + while (index < count) { + switch ((data[index] & 0xe0000000) >> 29) { + case 0x0: + index += decode_mi(data + index, count - index, + hw_offset + index * 4, &failures); + break; + case 0x2: + index += decode_2d(data + index, count - index, + hw_offset + index * 4, &failures); + break; + case 0x3: + if (IS_965(devid)) { + index += decode_3d_965(data + index, count - index, + hw_offset + index * 4, &failures); + } else if (IS_9XX(devid)) { + index += decode_3d(data + index, count - index, + hw_offset + index * 4, &failures); + } else { + index += decode_3d_i830(data + index, count - index, + hw_offset + index * 4, &failures); + } + break; + default: + instr_out(data, hw_offset, index, "UNKNOWN\n"); + failures++; + index++; + break; + } + fflush(out); + } + + return failures; +} + +void intel_decode_context_reset(void) +{ + saved_s2_set = 0; + saved_s4_set = 1; +} + diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h new file mode 100644 index 00000000000..7683097b869 --- /dev/null +++ b/src/gallium/drivers/i965/intel_decode.h @@ -0,0 +1,29 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid); +void intel_decode_context_reset(void); diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h new file mode 100644 index 00000000000..522e3bd92c2 --- /dev/null +++ b/src/gallium/drivers/i965/intel_structs.h @@ -0,0 +1,132 @@ +#ifndef INTEL_STRUCTS_H +#define INTEL_STRUCTS_H + +struct br0 { + GLuint length:8; + GLuint pad0:3; + GLuint dst_tiled:1; + GLuint pad1:8; + GLuint write_rgb:1; + GLuint write_alpha:1; + GLuint opcode:7; + GLuint client:3; +}; + + +struct br13 { + GLint dest_pitch:16; + GLuint rop:8; + GLuint color_depth:2; + GLuint pad1:3; + GLuint mono_source_transparency:1; + GLuint clipping_enable:1; + GLuint pad0:1; +}; + + + +/* This is an attempt to move some of the 2D interaction in this + * driver to using structs for packets rather than a bunch of #defines + * and dwords. + */ +struct xy_color_blit { + struct br0 br0; + struct br13 br13; + + struct { + GLuint dest_x1:16; + GLuint dest_y1:16; + } dw2; + + struct { + GLuint dest_x2:16; + GLuint dest_y2:16; + } dw3; + + GLuint dest_base_addr; + GLuint color; +}; + +struct xy_src_copy_blit { + struct br0 br0; + struct br13 br13; + + struct { + GLuint dest_x1:16; + GLuint dest_y1:16; + } dw2; + + struct { + GLuint dest_x2:16; + GLuint dest_y2:16; + } dw3; + + GLuint dest_base_addr; + + struct { + GLuint src_x1:16; + GLuint src_y1:16; + } dw5; + + struct { + GLint src_pitch:16; + GLuint pad:16; + } dw6; + + GLuint src_base_addr; +}; + +struct xy_setup_blit { + struct br0 br0; + struct br13 br13; + + struct { + GLuint clip_x1:16; + GLuint clip_y1:16; + } dw2; + + struct { + GLuint clip_x2:16; + GLuint clip_y2:16; + } dw3; + + GLuint dest_base_addr; + GLuint background_color; + GLuint foreground_color; + GLuint pattern_base_addr; +}; + + +struct xy_text_immediate_blit { + struct { + GLuint length:8; + GLuint pad2:3; + GLuint dst_tiled:1; + GLuint pad1:4; + GLuint byte_packed:1; + GLuint pad0:5; + GLuint opcode:7; + GLuint client:3; + } dw0; + + struct { + GLuint dest_x1:16; + GLuint dest_y1:16; + } dw1; + + struct { + GLuint dest_x2:16; + GLuint dest_y2:16; + } dw2; + + /* Src bitmap data follows as inline dwords. + */ +}; + + +#define CLIENT_2D 0x2 +#define OPCODE_XY_SETUP_BLT 0x1 +#define OPCODE_XY_COLOR_BLT 0x50 +#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31 + +#endif diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 4e700089e33..9f5b4e63236 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -46,17 +46,6 @@ identity_destroy(struct pipe_context *_pipe) } static void -identity_set_edgeflags(struct pipe_context *_pipe, - const unsigned *bitfield) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct pipe_context *pipe = id_pipe->pipe; - - pipe->set_edgeflags(pipe, - bitfield); -} - -static boolean identity_draw_arrays(struct pipe_context *_pipe, unsigned prim, unsigned start, @@ -65,13 +54,13 @@ identity_draw_arrays(struct pipe_context *_pipe, struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - return pipe->draw_arrays(pipe, - prim, - start, - count); + pipe->draw_arrays(pipe, + prim, + start, + count); } -static boolean +static void identity_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -84,15 +73,15 @@ identity_draw_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_elements(pipe, - indexBuffer, - indexSize, - prim, - start, - count); + pipe->draw_elements(pipe, + indexBuffer, + indexSize, + prim, + start, + count); } -static boolean +static void identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -107,14 +96,14 @@ identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, - minIndex, - maxIndex, - mode, - start, - count); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, + minIndex, + maxIndex, + mode, + start, + count); } static struct pipe_query * @@ -221,16 +210,29 @@ identity_create_sampler_state(struct pipe_context *_pipe, } static void -identity_bind_sampler_states(struct pipe_context *_pipe, - unsigned num, - void **samplers) +identity_bind_fragment_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + + pipe->bind_fragment_sampler_states(pipe, + num_samplers, + samplers); +} + +static void +identity_bind_vertex_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - pipe->bind_sampler_states(pipe, - num, - samplers); + pipe->bind_vertex_sampler_states(pipe, + num_samplers, + samplers); } static void @@ -480,9 +482,9 @@ identity_set_viewport_state(struct pipe_context *_pipe, } static void -identity_set_sampler_textures(struct pipe_context *_pipe, - unsigned num_textures, - struct pipe_texture **_textures) +identity_set_fragment_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **_textures) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; @@ -499,9 +501,34 @@ identity_set_sampler_textures(struct pipe_context *_pipe, textures = unwrapped_textures; } - pipe->set_sampler_textures(pipe, - num_textures, - textures); + pipe->set_fragment_sampler_textures(pipe, + num_textures, + textures); +} + +static void +identity_set_vertex_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **_textures) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_texture **textures = NULL; + unsigned i; + + if (_textures) { + for (i = 0; i < num_textures; i++) + unwrapped_textures[i] = identity_texture_unwrap(_textures[i]); + for (; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + unwrapped_textures[i] = NULL; + + textures = unwrapped_textures; + } + + pipe->set_vertex_sampler_textures(pipe, + num_textures, + textures); } static void @@ -669,7 +696,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.draw = NULL; id_pipe->base.destroy = identity_destroy; - id_pipe->base.set_edgeflags = identity_set_edgeflags; id_pipe->base.draw_arrays = identity_draw_arrays; id_pipe->base.draw_elements = identity_draw_elements; id_pipe->base.draw_range_elements = identity_draw_range_elements; @@ -682,7 +708,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.bind_blend_state = identity_bind_blend_state; id_pipe->base.delete_blend_state = identity_delete_blend_state; id_pipe->base.create_sampler_state = identity_create_sampler_state; - id_pipe->base.bind_sampler_states = identity_bind_sampler_states; + id_pipe->base.bind_fragment_sampler_states = identity_bind_fragment_sampler_states; + id_pipe->base.bind_vertex_sampler_states = identity_bind_vertex_sampler_states; id_pipe->base.delete_sampler_state = identity_delete_sampler_state; id_pipe->base.create_rasterizer_state = identity_create_rasterizer_state; id_pipe->base.bind_rasterizer_state = identity_bind_rasterizer_state; @@ -703,7 +730,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.set_polygon_stipple = identity_set_polygon_stipple; id_pipe->base.set_scissor_state = identity_set_scissor_state; id_pipe->base.set_viewport_state = identity_set_viewport_state; - id_pipe->base.set_sampler_textures = identity_set_sampler_textures; + id_pipe->base.set_fragment_sampler_textures = identity_set_fragment_sampler_textures; + id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures; id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers; id_pipe->base.set_vertex_elements = identity_set_vertex_elements; id_pipe->base.surface_copy = identity_surface_copy; diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index e893e599408..bc9bc7121d5 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -180,3 +180,42 @@ identity_transfer_destroy(struct identity_transfer *id_transfer) screen->tex_transfer_destroy(id_transfer->transfer); FREE(id_transfer); } + +struct pipe_video_surface * +identity_video_surface_create(struct identity_screen *id_screen, + struct pipe_video_surface *video_surface) +{ + struct identity_video_surface *id_video_surface; + + if (!video_surface) { + goto error; + } + + assert(video_surface->screen == id_screen->screen); + + id_video_surface = CALLOC_STRUCT(identity_video_surface); + if (!id_video_surface) { + goto error; + } + + memcpy(&id_video_surface->base, + video_surface, + sizeof(struct pipe_video_surface)); + + pipe_reference_init(&id_video_surface->base.reference, 1); + id_video_surface->base.screen = &id_screen->base; + id_video_surface->video_surface = video_surface; + + return &id_video_surface->base; + +error: + pipe_video_surface_reference(&video_surface, NULL); + return NULL; +} + +void +identity_video_surface_destroy(struct identity_video_surface *id_video_surface) +{ + pipe_video_surface_reference(&id_video_surface->video_surface, NULL); + FREE(id_video_surface); +} diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index ce58faa3c7c..77cc7190798 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -31,6 +31,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" +#include "pipe/p_video_state.h" #include "id_screen.h" @@ -67,6 +68,14 @@ struct identity_transfer }; +struct identity_video_surface +{ + struct pipe_video_surface base; + + struct pipe_video_surface *video_surface; +}; + + static INLINE struct identity_buffer * identity_buffer(struct pipe_buffer *_buffer) { @@ -103,6 +112,15 @@ identity_transfer(struct pipe_transfer *_transfer) return (struct identity_transfer *)_transfer; } +static INLINE struct identity_video_surface * +identity_video_surface(struct pipe_video_surface *_video_surface) +{ + if (!_video_surface) { + return NULL; + } + (void)identity_screen(_video_surface->screen); + return (struct identity_video_surface *)_video_surface; +} static INLINE struct pipe_buffer * identity_buffer_unwrap(struct pipe_buffer *_buffer) @@ -165,5 +183,12 @@ identity_transfer_create(struct identity_texture *id_texture, void identity_transfer_destroy(struct identity_transfer *id_transfer); +struct pipe_video_surface * +identity_video_surface_create(struct identity_screen *id_screen, + struct pipe_video_surface *video_surface); + +void +identity_video_surface_destroy(struct identity_video_surface *id_video_surface); + #endif /* ID_OBJECTS_H */ diff --git a/src/gallium/drivers/identity/id_public.h b/src/gallium/drivers/identity/id_public.h index cac14cfd604..3d2862eaa01 100644 --- a/src/gallium/drivers/identity/id_public.h +++ b/src/gallium/drivers/identity/id_public.h @@ -37,4 +37,4 @@ identity_screen_create(struct pipe_screen *screen); struct pipe_context * identity_context_create(struct pipe_screen *screen, struct pipe_context *pipe); -#endif /* PT_PUBLIC_H */ +#endif /* ID_PUBLIC_H */ diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index 26439637d08..53eae3ef544 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -379,6 +379,33 @@ identity_screen_buffer_destroy(struct pipe_buffer *_buffer) identity_buffer_destroy(identity_buffer(_buffer)); } +static struct pipe_video_surface * +identity_screen_video_surface_create(struct pipe_screen *_screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, + unsigned height) +{ + struct identity_screen *id_screen = identity_screen(_screen); + struct pipe_screen *screen = id_screen->screen; + struct pipe_video_surface *result; + + result = screen->video_surface_create(screen, + chroma_format, + width, + height); + + if (result) { + return identity_video_surface_create(id_screen, result); + } + return NULL; +} + +static void +identity_screen_video_surface_destroy(struct pipe_video_surface *_vsfc) +{ + identity_video_surface_destroy(identity_video_surface(_vsfc)); +} + static void identity_screen_flush_frontbuffer(struct pipe_screen *_screen, struct pipe_surface *_surface, @@ -472,6 +499,12 @@ identity_screen_create(struct pipe_screen *screen) if (screen->buffer_unmap) id_screen->base.buffer_unmap = identity_screen_buffer_unmap; id_screen->base.buffer_destroy = identity_screen_buffer_destroy; + if (screen->video_surface_create) { + id_screen->base.video_surface_create = identity_screen_video_surface_create; + } + if (screen->video_surface_destroy) { + id_screen->base.video_surface_destroy = identity_screen_video_surface_destroy; + } id_screen->base.flush_frontbuffer = identity_screen_flush_frontbuffer; id_screen->base.fence_reference = identity_screen_fence_reference; id_screen->base.fence_signalled = identity_screen_fence_signalled; diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 89d08834a3c..0c3f00fd58f 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -51,21 +51,22 @@ Requirements - Linux - - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes - opcodes not yet supported by upstream. + - A x86 or amd64 processor. 64bit mode is preferred. - git clone git://people.freedesktop.org/~jrfonseca/udis86 - cd udis86 - ./configure --with-pic - make - sudo make install + Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will + yield the most efficient code. The less features the CPU has the more + likely is that you ran into underperforming, buggy, or incomplete code. + + See /proc/cpuinfo to know what your CPU supports. + + - LLVM 2.5 or greater. LLVM 2.6 is preferred. - - LLVM 2.5. On Debian based distributions do: + On Debian based distributions do: aptitude install llvm-dev - There is a typo in one of the llvm-dev 2.5 headers, that causes compilation - errors in the debug build: + There is a typo in one of the llvm 2.5 headers, that may cause compilation + errors. To fix it apply the change: --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 @@ -79,12 +80,17 @@ Requirements #endif return reinterpret_cast<T**>(Vals); - - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD - instructions. This is necessary because we emit several SSE intrinsics for - convenience. See /proc/cpuinfo to know what your CPU supports. - - - scons + - scons (optional) + - udis86, http://udis86.sourceforge.net/ (optional): + + git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86 + cd udis86 + ./autogen.sh + ./configure --with-pic + make + sudo make install + Building ======== diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 3bd2e700138..3ca676647c7 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'): env.Tool('udis86') +env.Append(CPPPATH = ['.']) + env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', @@ -74,21 +76,19 @@ llvmpipe = env.ConvenienceLibrary( env = env.Clone() -env.Prepend(LIBS = [llvmpipe] + auxiliaries) +env.Prepend(LIBS = [llvmpipe] + gallium) -env.Program( - target = 'lp_test_format', - source = ['lp_test_format.c', 'lp_test_main.c'], -) +tests = [ + 'format', + 'blend', + 'conv', +] -env.Program( - target = 'lp_test_blend', - source = ['lp_test_blend.c', 'lp_test_main.c'], -) - -env.Program( - target = 'lp_test_conv', - source = ['lp_test_conv.c', 'lp_test_main.c'], -) +for test in tests: + target = env.Program( + target = 'lp_test_' + test, + source = ['lp_test_' + test + '.c', 'lp_test_main.c'], + ) + env.InstallProgram(target) Export('llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 9c59677a741..eea6b5d6a5c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -629,7 +629,8 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + unsigned long absMask = ~(1 << (type.width - 1)); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); @@ -1083,7 +1084,7 @@ lp_build_log(struct lp_build_context *bld, LLVMValueRef x) { /* log(2) */ - LLVMValueRef log2 = lp_build_const_scalar(bld->type, 1.4426950408889634); + LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529); return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); } @@ -1095,7 +1096,7 @@ lp_build_log(struct lp_build_context *bld, /** * Generate polynomial. - * Ex: x^2 * coeffs[0] + x * coeffs[1] + coeffs[2]. + * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2]. */ static LLVMValueRef lp_build_polynomial(struct lp_build_context *bld, @@ -1285,13 +1286,13 @@ lp_build_log2_approx(struct lp_build_context *bld, /* mant = (float) mantissa(x) */ mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); mant = LLVMBuildOr(bld->builder, mant, one, ""); - mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, ""); + mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), ""); + logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 59d8f492e60..39dfc51e503 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -77,10 +77,10 @@ lp_disassemble(const void* func) while (ud_disassemble(&ud_obj)) { #ifdef PIPE_ARCH_X86 - debug_printf("%08lx: ", (unsigned long)ud_insn_off(&ud_obj)); + debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); #endif #ifdef PIPE_ARCH_X86_64 - debug_printf("%016llx: ", (unsigned long long)ud_insn_off(&ud_obj)); + debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); #endif #if 0 @@ -115,9 +115,16 @@ lp_disassemble(const void* func) } } - if (ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) + if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) || + ud_obj.mnemonic == UD_Iinvalid) break; } + +#if 0 + /* Print GDB command, useful to verify udis86 output */ + debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc); +#endif + debug_printf("\n"); #else (void)func; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 818c0e943e3..49dab8ab61e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -303,8 +303,8 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, unsigned first, last, mask; unsigned attrib; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( attrib = first; attrib <= last; ++attrib ) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c index 4d272bea87e..af70ddc6ab9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -59,9 +59,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->format = texture->format; state->target = texture->target; - state->pot_width = util_is_pot(texture->width[0]); - state->pot_height = util_is_pot(texture->height[0]); - state->pot_depth = util_is_pot(texture->depth[0]); + state->pot_width = util_is_pot(texture->width0); + state->pot_height = util_is_pot(texture->height0); + state->pot_depth = util_is_pot(texture->depth0); state->wrap_s = sampler->wrap_s; state->wrap_t = sampler->wrap_t; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 47b68b71e25..c46fef50109 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -577,7 +577,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: if(lp_format_is_rgba8(bld.format_desc)) lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); else diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 0dea2cd4c86..61b033c9fcf 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -64,7 +64,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -157,7 +157,7 @@ emit_fetch( unsigned index, const unsigned chan_index ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index]; + const struct tgsi_full_src_register *reg = &inst->Src[index]; unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; @@ -167,9 +167,9 @@ emit_fetch( case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0); + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); res = lp_build_broadcast_scalar(&bld->base, scalar); @@ -177,17 +177,17 @@ emit_fetch( } case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->SrcRegister.Index][swizzle]; + res = bld->immediates[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_INPUT: - res = bld->inputs[reg->SrcRegister.Index][swizzle]; + res = bld->inputs[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_TEMPORARY: - res = bld->temps[reg->SrcRegister.Index][swizzle]; + res = bld->temps[reg->Register.Index][swizzle]; if(!res) return bld->base.undef; break; @@ -267,7 +267,7 @@ emit_store( unsigned chan_index, LLVMValueRef value) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index]; + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -287,13 +287,13 @@ emit_store( assert(0); } - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - bld->outputs[reg->DstRegister.Index][chan_index] = value; + bld->outputs[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_TEMPORARY: - bld->temps[reg->DstRegister.Index][chan_index] = value; + bld->temps[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_ADDRESS: @@ -319,14 +319,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, boolean projected, LLVMValueRef *texel) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; LLVMValueRef lodbias; LLVMValueRef oow = NULL; LLVMValueRef coords[3]; unsigned num_coords; unsigned i; - switch (inst->InstructionExtTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; break; @@ -375,7 +375,7 @@ emit_kil( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0]; + const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; LLVMValueRef mask; unsigned chan_index; @@ -423,15 +423,15 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) { uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; - if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Indirect) + const struct tgsi_full_src_register *reg = &inst->Src[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Indirect) + const struct tgsi_full_dst_register *reg = &inst->Dst[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } return FALSE; @@ -768,7 +768,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); tmp0 = lp_build_floor(&bld->base, src0); - tmp0 = lp_build_sub(&bld->base, tmp0, src0); + tmp0 = lp_build_sub(&bld->base, src0, tmp0); dst0[chan_index] = tmp0; } break; @@ -1315,7 +1315,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); return 0; diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index bdcff94b9bf..08d9f2e2735 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -50,6 +50,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + union util_color uc; unsigned cv; uint i; @@ -64,8 +65,8 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { struct pipe_surface *ps = llvmpipe->framebuffer.cbufs[i]; - util_pack_color(rgba, ps->format, &cv); - lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv); + util_pack_color(rgba, ps->format, &uc); + lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, uc.ui); } llvmpipe->dirty_render_cache = TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 57e71f3e986..37587d4f792 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -118,6 +118,11 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) pipe_texture_reference(&llvmpipe->texture[i], NULL); } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + lp_destroy_tex_tile_cache(llvmpipe->vertex_tex_cache[i]); + pipe_texture_reference(&llvmpipe->vertex_textures[i], NULL); + } + for (i = 0; i < Elements(llvmpipe->constants); i++) { if (llvmpipe->constants[i].buffer) { pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL); @@ -135,6 +140,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); unsigned i; + /* check if any of the bound drawing surfaces are this texture */ if(llvmpipe->dirty_render_cache) { for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { if(llvmpipe->framebuffer.cbufs[i] && @@ -145,6 +151,18 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, llvmpipe->framebuffer.zsbuf->texture == texture) return PIPE_REFERENCED_FOR_WRITE; } + + /* check if any of the tex_cache textures are this texture */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (llvmpipe->tex_cache[i] && + llvmpipe->tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + if (llvmpipe->vertex_tex_cache[i] && + llvmpipe->vertex_tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } return PIPE_UNREFERENCED; } @@ -180,7 +198,8 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; - llvmpipe->pipe.bind_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; @@ -205,7 +224,8 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_sampler_textures = llvmpipe_set_sampler_textures; + llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures; + llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures; llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; @@ -214,8 +234,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; - llvmpipe->pipe.set_edgeflags = llvmpipe_set_edgeflags; - llvmpipe->pipe.clear = llvmpipe_clear; llvmpipe->pipe.flush = llvmpipe_flush; @@ -234,13 +252,15 @@ llvmpipe_create( struct pipe_screen *screen ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen ); + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen); /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->tex_cache[i]; + llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i]; llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; } @@ -260,7 +280,7 @@ llvmpipe_create( struct pipe_screen *screen ) goto fail; draw_texture_samplers(llvmpipe->draw, - PIPE_MAX_SAMPLERS, + PIPE_MAX_VERTEX_SAMPLERS, (struct tgsi_sampler **) llvmpipe->tgsi.vert_samplers_list); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 3ad95d0bfc2..cc4d5ad5fd9 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -55,6 +55,7 @@ struct llvmpipe_context { /** Constant state objects */ const struct pipe_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; const struct pipe_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; struct lp_fragment_shader *fs; @@ -68,12 +69,15 @@ struct llvmpipe_context { struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned num_samplers; unsigned num_textures; + unsigned num_vertex_samplers; + unsigned num_vertex_textures; unsigned num_vertex_elements; unsigned num_vertex_buffers; @@ -136,6 +140,7 @@ struct llvmpipe_context { unsigned tex_timestamp; struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + struct llvmpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS]; unsigned no_rast : 1; diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h new file mode 100644 index 00000000000..74b27574942 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_DEBUG_H +#define LP_DEBUG_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + +extern void +st_print_current(void); + + +#define DEBUG_PIPE 0x1 +#define DEBUG_TGSI 0x2 +#define DEBUG_TEX 0x4 +#define DEBUG_ASM 0x8 +#define DEBUG_SETUP 0x10 +#define DEBUG_RAST 0x20 +#define DEBUG_QUERY 0x40 +#define DEBUG_SCREEN 0x80 +#define DEBUG_JIT 0x100 + +#ifdef DEBUG +extern int LP_DEBUG; +#else +#define LP_DEBUG 0 +#endif + +void st_debug_init( void ); + +static INLINE void +LP_DBG( unsigned flag, const char *fmt, ... ) +{ + if (LP_DEBUG & flag) + { + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + } +} + + +#endif /* LP_DEBUG_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 0aa13a1fc67..c152b4413fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,11 +45,11 @@ -boolean +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); + llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); } @@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -103,7 +103,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_arrays(draw, mode, start, count); /* - * unmap vertex/index buffers - will cause draw module to flush + * unmap vertex/index buffers */ for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); @@ -112,31 +112,28 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); /* Note: leave drawing surfaces mapped */ lp->dirty_render_cache = TRUE; - - return TRUE; } -boolean +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + llvmpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } - -void -llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) -{ - struct llvmpipe_context *lp = llvmpipe_context(pipe); - draw_set_edgeflags(lp->draw, edgeflags); -} diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 13535dd638e..bce3baec164 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -154,6 +154,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif @@ -167,7 +168,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { _debug_printf("%s\n", error); LLVMDisposeMessage(error); - abort(); + assert(0); } screen->target = LLVMGetExecutionEngineTargetData(screen->engine); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 05189274589..9b47415f003 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_format.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -35,6 +36,24 @@ #include "lp_winsys.h" #include "lp_jit.h" #include "lp_screen.h" +#include "lp_debug.h" + +#ifdef DEBUG +int LP_DEBUG = 0; + +static const struct debug_named_value lp_debug_flags[] = { + { "pipe", DEBUG_PIPE }, + { "tgsi", DEBUG_TGSI }, + { "tex", DEBUG_TEX }, + { "asm", DEBUG_ASM }, + { "setup", DEBUG_SETUP }, + { "rast", DEBUG_RAST }, + { "query", DEBUG_QUERY }, + { "screen", DEBUG_SCREEN }, + { "jit", DEBUG_JIT }, + {NULL, 0} +}; +#endif static const char * @@ -58,7 +77,9 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_SAMPLERS; + return PIPE_MAX_VERTEX_SAMPLERS; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: @@ -131,17 +152,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct llvmpipe_winsys *winsys = screen->winsys; + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if(!format_desc) + return FALSE; assert(target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - if(format == PIPE_FORMAT_Z16_UNORM) - return FALSE; - if(format == PIPE_FORMAT_S8_UNORM) - return FALSE; - switch(format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: @@ -152,8 +173,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, break; } - if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) - return winsys->is_displaytarget_format_supported(winsys, format); + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, format)) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + /* FIXME: Temporary restriction. See lp_state_fs.c. */ + if(format_desc->block.bits != 32) + return FALSE; + } + + /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */ + if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + } return TRUE; } @@ -213,6 +277,10 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys) { struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen); +#ifdef DEBUG + LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); +#endif + if (!screen) return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ffcbc9a379f..b18f17c0cd3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -41,6 +41,7 @@ #include "draw/draw_vertex.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_thread.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_bld_debug.h" @@ -166,7 +167,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, assert((y % 2) == 0); depth = llvmpipe->zsbuf_map + y*llvmpipe->zsbuf_transfer->stride + - 2*x*llvmpipe->zsbuf_transfer->block.size; + 2*x*util_format_get_blocksize(llvmpipe->zsbuf_transfer->texture->format); } else depth = NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 7b26ce61a38..3e482cb9047 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -126,6 +126,10 @@ void * llvmpipe_create_sampler_state(struct pipe_context *, const struct pipe_sampler_state *); void llvmpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void +llvmpipe_bind_vertex_sampler_states(struct pipe_context *, + unsigned num_samplers, + void **samplers); void llvmpipe_delete_sampler_state(struct pipe_context *, void *); void * @@ -172,6 +176,11 @@ void llvmpipe_set_sampler_textures( struct pipe_context *, unsigned num, struct pipe_texture ** ); +void +llvmpipe_set_vertex_sampler_textures(struct pipe_context *, + unsigned num_textures, + struct pipe_texture **); + void llvmpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); @@ -188,14 +197,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp); void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ); -boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); -boolean llvmpipe_draw_elements(struct pipe_context *pipe, +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -204,10 +213,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void -llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); - - -void llvmpipe_map_transfers(struct llvmpipe_context *lp); void diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index b2e75d3b14e..a94cd05ef20 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_debug_dump.h" +#include "draw/draw_context.h" #include "lp_screen.h" #include "lp_context.h" #include "lp_state.h" @@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->blend == blend) + return; + + draw_flush(llvmpipe->draw); + llvmpipe->blend = blend; llvmpipe->dirty |= LP_NEW_BLEND; @@ -69,6 +75,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); unsigned i, j; + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; + + draw_flush(llvmpipe->draw); + memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); if(!llvmpipe->jit_context.blend_color) @@ -99,7 +110,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + if (llvmpipe->depth_stencil == depth_stencil) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->depth_stencil = depth_stencil; if(llvmpipe->depth_stencil) llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index c753b183c0c..acfd7be5f74 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -66,7 +66,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); + const uint num = draw_current_shader_outputs(llvmpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -116,13 +116,13 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, + src = draw_find_shader_output(llvmpipe->draw, lpfs->info.input_semantic_name[i], lpfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, + llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (llvmpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, @@ -198,10 +198,14 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) unsigned i; /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i]; + llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i]; + llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; + } + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] ); } /* fragment shader samplers */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 8e2aae40afa..f2b8c362644 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -87,6 +87,7 @@ #include "lp_state.h" #include "lp_quad.h" #include "lp_tex_sample.h" +#include "lp_debug.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -148,6 +149,20 @@ generate_depth(LLVMBuilderRef builder, format_desc = util_format_description(key->zsbuf_format); assert(format_desc); + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(src_type.floating) { + src_type.sign = FALSE; + src_type.norm = TRUE; + } + else { + assert(!src_type.sign); + assert(src_type.norm); + } + /* Pick the depth type. */ dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); @@ -155,14 +170,11 @@ generate_depth(LLVMBuilderRef builder, assert(dst_type.width == src_type.width); assert(dst_type.length == src_type.length); -#if 1 - src = lp_build_clamped_float_to_unsigned_norm(builder, - src_type, - dst_type.width, - src); -#else lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); -#endif + + dst_ptr = LLVMBuildBitCast(builder, + dst_ptr, + LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); lp_build_depth_test(builder, &key->depth, @@ -397,59 +409,58 @@ generate_fragment(struct llvmpipe_context *lp, unsigned i; unsigned chan; -#ifdef DEBUG - tgsi_dump(shader->base.tokens, 0); - if(key->depth.enabled) { - debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); - debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); - debug_printf("depth.writemask = %u\n", key->depth.writemask); - } - if(key->alpha.enabled) { - debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); - debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); - } - if(key->blend.logicop_enable) { - debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); - } - else if(key->blend.blend_enable) { - debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); - } - debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); - for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if(key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - pf_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - debug_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode) - debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); + if (LP_DEBUG & DEBUG_JIT) { + tgsi_dump(shader->base.tokens, 0); + if(key->depth.enabled) { + debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); + debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); + debug_printf("depth.writemask = %u\n", key->depth.writemask); + } + if(key->alpha.enabled) { + debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); + debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); + } + if(key->blend.logicop_enable) { + debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); + } + else if(key->blend.blend_enable) { + debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); + debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); + debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); + debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); + debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); + debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + } + debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if(key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + pf_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + debug_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if(key->sampler[i].compare_mode) + debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); + } } } -#endif - variant = CALLOC_STRUCT(lp_fragment_shader_variant); if(!variant) return NULL; @@ -588,8 +599,8 @@ generate_fragment(struct llvmpipe_context *lp, } lp_build_conv_mask(builder, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); + fs_mask, num_fs, + &blend_mask, 1); /* * Blending. @@ -611,23 +622,24 @@ generate_fragment(struct llvmpipe_context *lp, * Translate the LLVM IR into machine code. */ +#ifdef DEBUG if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { LLVMDumpValue(variant->function); - abort(); + assert(0); } +#endif LLVMRunFunctionPassManager(screen->pass, variant->function); -#ifdef DEBUG - LLVMDumpValue(variant->function); - debug_printf("\n"); -#endif + if (LP_DEBUG & DEBUG_JIT) { + LLVMDumpValue(variant->function); + debug_printf("\n"); + } variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); -#ifdef DEBUG - lp_disassemble(variant->jit_function); -#endif + if (LP_DEBUG & DEBUG_ASM) + lp_disassemble(variant->jit_function); variant->next = shader->variants; shader->variants = variant; @@ -661,7 +673,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->fs = (struct lp_fragment_shader *) fs; + if (llvmpipe->fs == fs) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->fs = fs; llvmpipe->dirty |= LP_NEW_FS; } @@ -712,8 +729,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); - if(shader == PIPE_SHADER_VERTEX) - draw_flush(llvmpipe->draw); + draw_flush(llvmpipe->draw); /* note: reference counting */ pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); @@ -723,7 +739,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, } if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, + data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 4561c6b8456..aa3b5a3f91e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -41,14 +41,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, } void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(llvmpipe->draw, setup); + draw_set_rasterizer_state(llvmpipe->draw, rasterizer); - llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + llvmpipe->rasterizer = rasterizer; llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index c69d90c723a..d382f9ca87e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -78,6 +78,34 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, void +llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + unsigned i; + + assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == llvmpipe->num_vertex_samplers && + !memcmp(llvmpipe->vertex_samplers, samplers, num_samplers * sizeof(void *))) + return; + + draw_flush(llvmpipe->draw); + + for (i = 0; i < num_samplers; ++i) + llvmpipe->vertex_samplers[i] = samplers[i]; + for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + llvmpipe->vertex_samplers[i] = NULL; + + llvmpipe->num_vertex_samplers = num_samplers; + + llvmpipe->dirty |= LP_NEW_SAMPLER; +} + + +void llvmpipe_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { @@ -102,8 +130,8 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; - jit_tex->width = tex->width[0]; - jit_tex->height = tex->height[0]; + jit_tex->width = tex->width0; + jit_tex->height = tex->height0; jit_tex->stride = lp_tex->stride[0]; if(!lp_tex->dt) jit_tex->data = lp_tex->data; @@ -117,6 +145,37 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, void +llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == llvmpipe->num_vertex_textures && + !memcmp(llvmpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + + draw_flush(llvmpipe->draw); + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num_textures ? textures[i] : NULL; + + pipe_texture_reference(&llvmpipe->vertex_textures[i], tex); + lp_tex_tile_cache_set_texture(llvmpipe->vertex_tex_cache[i], tex); + } + + llvmpipe->num_vertex_textures = num_textures; + + llvmpipe->dirty |= LP_NEW_TEXTURE; +} + + +void llvmpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index c06ce8b75c1..e37ff04f3df 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -35,6 +35,8 @@ #include "draw/draw_context.h" +#include "util/u_format.h" + /** * XXX this might get moved someday @@ -49,6 +51,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, struct llvmpipe_context *lp = llvmpipe_context(pipe); uint i; + draw_flush(lp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { @@ -88,8 +92,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, if (lp->framebuffer.zsbuf) { int depth_bits; double mrd; - depth_bits = pf_get_component_bits(lp->framebuffer.zsbuf->format, - PIPE_FORMAT_COMP_Z); + depth_bits = util_format_get_component_bits(lp->framebuffer.zsbuf->format, + UTIL_FORMAT_COLORSPACE_ZS, + 0); if (depth_bits > 16) { mrd = 0.0000001; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 15c30296144..884e3878e62 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -70,14 +70,18 @@ fail: void -llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs; - llvmpipe->vs = (const struct lp_vertex_shader *)vs; + if (llvmpipe->vs == vs) + return; - draw_bind_vertex_shader(llvmpipe->draw, - (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL)); + draw_bind_vertex_shader(llvmpipe->draw, + vs ? vs->draw_data : NULL); + + llvmpipe->vs = vs; llvmpipe->dirty |= LP_NEW_VS; } @@ -92,5 +96,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) (struct lp_vertex_shader *)vs; draw_delete_vertex_shader(llvmpipe->draw, state->draw_data); + FREE( (void *)state->shader.tokens ); FREE( state ); } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 773e8482425..a6d9a2c1acf 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -36,6 +36,7 @@ #include "util/u_memory.h" #include "util/u_tile.h" #include "util/u_format.h" +#include "util/u_math.h" #include "lp_context.h" #include "lp_surface.h" #include "lp_texture.h" @@ -154,7 +155,6 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) if (lpt->timestamp != tc->timestamp) { /* texture was modified, invalidate all cached tiles */ uint i; - debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); for (i = 0; i < NUM_ENTRIES; i++) { tc->entries[i].addr.bits.invalid = 1; } @@ -270,8 +270,8 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, addr.bits.level, addr.bits.z, PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); + u_minify(tc->texture->width0, addr.bits.level), + u_minify(tc->texture->height0, addr.bits.level)); tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); @@ -290,7 +290,7 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, assert(0); } - util_format_read_4ub(tc->tex_trans->format, + util_format_read_4ub(tc->tex_trans->texture->format, (uint8_t *)tile->color, sizeof tile->color[0], tc->tex_trans_map, tc->tex_trans->stride, x, y, w, h); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 699394c0deb..eaff6d5cdf7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -544,7 +544,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; dsdx = fabsf(dsdx); dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width[0]; + rho = MAX2(dsdx, dsdy) * texture->width0; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; @@ -552,7 +552,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float max; dtdx = fabsf(dtdx); dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height[0]; + max = MAX2(dtdx, dtdy) * texture->height0; rho = MAX2(rho, max); } if (p) { @@ -561,7 +561,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float max; dpdx = fabsf(dpdx); dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth[0]; + max = MAX2(dpdx, dpdy) * texture->depth0; rho = MAX2(rho, max); } @@ -726,9 +726,9 @@ get_texel(const struct tgsi_sampler *tgsi_sampler, const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level) || + z < 0 || z >= (int) u_minify(texture->depth0, level)) { rgba[0][j] = sampler->border_color[0]; rgba[1][j] = sampler->border_color[1]; rgba[2][j] = sampler->border_color[2]; @@ -1085,7 +1085,7 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, const struct pipe_sampler_state *sampler = samp->sampler; unsigned level0, level1, j, imgFilter; int width, height; - float levelBlend = 0.0F; + float levelBlend = 0.0f; choose_mipmap_levels(tgsi_sampler, s, t, p, lodbias, @@ -1093,8 +1093,8 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, assert(sampler->normalized_coords); - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); @@ -1131,7 +1131,6 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, } break; case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: { int x0[4], y0[4], x1[4], y1[4]; float xw[4], yw[4]; /* weights */ @@ -1241,7 +1240,7 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, /* get/map pipe_surfaces corresponding to 3D tex slices */ unsigned level0, level1, j, imgFilter; int width, height, depth; - float levelBlend; + float levelBlend = 0.0f; const uint face = 0; choose_mipmap_levels(tgsi_sampler, s, t, p, @@ -1250,9 +1249,9 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, assert(sampler->normalized_coords); - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); assert(width > 0); assert(height > 0); @@ -1283,7 +1282,6 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, } break; case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: { int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; float xw[4], yw[4], zw[4]; /* interpolation weights */ @@ -1394,8 +1392,8 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, /* texture RECTS cannot be mipmapped */ assert(level0 == level1); - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); @@ -1414,7 +1412,6 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, } break; case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: { int x0[4], y0[4], x1[4], y1[4]; float xw[4], yw[4]; /* weights */ @@ -1513,8 +1510,8 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, /* Do this elsewhere: */ - samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); - samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); + samp->xpot = util_unsigned_logbase2( samp->texture->width0 ); + samp->ypot = util_unsigned_logbase2( samp->texture->height0 ); /* Try to hook in a faster sampler. Ultimately we'll have to * code-generate these. Luckily most of this looks like it is diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index a00f2495dfc..2c135029ea2 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -34,6 +34,8 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/internal/p_winsys_screen.h" + +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -48,7 +50,6 @@ /* Simple, maximally packed layout. */ - /* Conventional allocation path for non-display textures: */ static boolean @@ -57,29 +58,21 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, { struct pipe_texture *pt = &lpt->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned buffer_size = 0; - pf_get_block(lpt->base.format, &lpt->base.block); - for (level = 0; level <= pt->last_level; level++) { unsigned nblocksx, nblocksy; - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); - /* Allocate storage for whole quads. This is particularly important * for depth surfaces, which are currently stored in a swizzled format. */ - nblocksx = pf_get_nblocksx(&pt->block, align(width, 2)); - nblocksy = pf_get_nblocksy(&pt->block, align(height, 2)); + nblocksx = util_format_get_nblocksx(pt->format, align(width, 2)); + nblocksy = util_format_get_nblocksy(pt->format, align(height, 2)); - lpt->stride[level] = align(nblocksx*pt->block.size, 16); + lpt->stride[level] = align(nblocksx * util_format_get_blocksize(pt->format), 16); lpt->level_offset[level] = buffer_size; @@ -87,9 +80,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } lpt->data = align_malloc(buffer_size, 16); @@ -103,14 +96,10 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, { struct llvmpipe_winsys *winsys = screen->winsys; - pf_get_block(lpt->base.format, &lpt->base.block); - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); - lpt->dt = winsys->displaytarget_create(winsys, lpt->base.format, - lpt->base.width[0], - lpt->base.height[0], + lpt->base.width0, + lpt->base.height0, 16, &lpt->stride[0] ); @@ -172,7 +161,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -183,8 +172,6 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, lpt->base = *base; pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = screen; - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); lpt->stride[0] = stride[0]; pipe_buffer_reference(&lpt->buffer, buffer); @@ -229,8 +216,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = lpt->level_offset[level]; ps->usage = usage; @@ -258,11 +245,17 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, ps->level = level; ps->zslice = zslice; + /* XXX shouldn't that rather be + tex_height = align(ps->height, 2); + to account for alignment done in llvmpipe_texture_layout ? + */ if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * pt->nblocksy[level] * lpt->stride[level]; + unsigned tex_height = ps->height; + ps->offset += face * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; } else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * pt->nblocksy[level] * lpt->stride[level]; + unsigned tex_height = ps->height; + ps->offset += zslice * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; } else { assert(face == 0); @@ -303,14 +296,10 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, if (lpt) { struct pipe_transfer *pt = &lpt->base; pipe_texture_reference(&pt->texture, texture); - pt->format = texture->format; - pt->block = texture->block; pt->x = x; pt->y = y; pt->width = w; pt->height = h; - pt->nblocksx = texture->nblocksx[level]; - pt->nblocksy = texture->nblocksy[level]; pt->stride = lptex->stride[level]; pt->usage = usage; pt->face = face; @@ -319,11 +308,17 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, lpt->offset = lptex->level_offset[level]; + /* XXX shouldn't that rather be + tex_height = align(u_minify(texture->height0, level), 2) + to account for alignment done in llvmpipe_texture_layout ? + */ if (texture->target == PIPE_TEXTURE_CUBE) { - lpt->offset += face * pt->nblocksy * pt->stride; + unsigned tex_height = u_minify(texture->height0, level); + lpt->offset += face * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; } else if (texture->target == PIPE_TEXTURE_3D) { - lpt->offset += zslice * pt->nblocksy * pt->stride; + unsigned tex_height = u_minify(texture->height0, level); + lpt->offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; } else { assert(face == 0); @@ -355,9 +350,11 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, struct llvmpipe_screen *screen = llvmpipe_screen(_screen); ubyte *map, *xfer_map; struct llvmpipe_texture *lpt; + enum pipe_format format; assert(transfer->texture); lpt = llvmpipe_texture(transfer->texture); + format = lpt->base.format; if(lpt->dt) { struct llvmpipe_winsys *winsys = screen->winsys; @@ -382,8 +379,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, } xfer_map = map + llvmpipe_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / util_format_get_blockheight(format) * transfer->stride + + transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); /*printf("map = %p xfer map = %p\n", map, xfer_map);*/ return xfer_map; } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index ec3e002d628..7a1ecf5107b 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -252,13 +252,13 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) case LP_TILE_STATUS_CLEAR: /* Actually clear the tiles which were flagged as being in a * clear state. */ - util_fill_rect(tc->transfer_map, &pt->block, pt->stride, + util_fill_rect(tc->transfer_map, pt->texture->format, pt->stride, x, y, w, h, tc->clear_val); break; case LP_TILE_STATUS_DEFINED: - lp_tile_write_4ub(pt->format, + lp_tile_write_4ub(pt->texture->format, tile->color, tc->transfer_map, pt->stride, x, y, w, h); @@ -291,6 +291,11 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, assert(tc->surface); assert(tc->transfer); + if(!tc->transfer_map) + lp_tile_cache_map_transfers(tc); + + assert(tc->transfer_map); + switch(tile->status) { case LP_TILE_STATUS_CLEAR: /* don't get tile from framebuffer, just clear it */ @@ -306,7 +311,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, y &= ~(TILE_SIZE - 1); if (!pipe_clip_tile(x, y, &w, &h, tc->transfer)) - lp_tile_read_4ub(pt->format, + lp_tile_read_4ub(pt->texture->format, tile->color, tc->transfer_map, tc->transfer->stride, x, y, w, h); diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h deleted file mode 100644 index 9c235080a55..00000000000 --- a/src/gallium/drivers/nouveau/nouveau_push.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef __NOUVEAU_PUSH_H__ -#define __NOUVEAU_PUSH_H__ - -#include "nouveau/nouveau_winsys.h" - -#ifndef NOUVEAU_PUSH_CONTEXT -#error undefined push context -#endif - -#define OUT_RING(data) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - (*pc->base.channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - memcpy(pc->base.channel->pushbuf->cur, (src), (size) * 4); \ - pc->base.channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING_NI(obj,mthd,size) do { \ - BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ -} while(0) - -static inline void -DO_FIRE_RING(struct nouveau_channel *chan, struct pipe_fence_handle **fence) -{ - nouveau_pushbuf_flush(chan, 0); - if (fence) - *fence = NULL; -} - -#define FIRE_RING(fence) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - DO_FIRE_RING(pc->base.channel, fence); \ -} while(0) - -#define OUT_RELOC(bo,data,flags,vor,tor) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, nouveau_bo(bo), \ - (data), 0, (flags), (vor), (tor)); \ -} while(0) - -/* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) - -/* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - pc->base.channel->vram->handle, \ - pc->base.channel->gart->handle); \ -} while(0) - -/* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) - -/* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) - -/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ -#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ - (flags), 0, 0); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#endif diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index e4cf91c005c..7ebc94ed6c7 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -31,7 +31,7 @@ nouveau_screen_bo_skel(struct pipe_screen *pscreen, struct nouveau_bo *bo, unsigned alignment, unsigned usage, unsigned size) { struct pipe_buffer *pb; - + pb = CALLOC(1, sizeof(struct pipe_buffer)+sizeof(struct nouveau_bo *)); if (!pb) { nouveau_bo_ref(NULL, &bo); @@ -127,8 +127,18 @@ nouveau_screen_bo_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map(bo, nouveau_screen_map_flags(usage)); if (ret) { debug_printf("map failed: %d\n", ret); @@ -143,11 +153,22 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned offset, unsigned length, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); uint32_t flags = nouveau_screen_map_flags(usage); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map_range(bo, offset, length, flags); if (ret) { + nouveau_bo_unmap(bo); if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY) debug_printf("map_range failed: %d\n", ret); return NULL; @@ -239,5 +260,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) void nouveau_screen_fini(struct nouveau_screen *screen) { + nouveau_channel_free(&screen->channel); } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index ebfc67ad1c1..a7927d88dfc 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -5,6 +5,9 @@ struct nouveau_screen { struct pipe_screen base; struct nouveau_device *device; struct nouveau_channel *channel; + + int (*pre_pipebuffer_map_callback) (struct pipe_screen *pscreen, + struct pipe_buffer *pb, unsigned usage); }; static inline struct nouveau_screen * diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index b595405357f..e844f6abb3d 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -3,41 +3,95 @@ #include "util/u_debug.h" +#ifdef DEBUG +#define DEBUG_NOUVEAU_STATEOBJ +#endif /* DEBUG */ + struct nouveau_stateobj_reloc { struct nouveau_bo *bo; - unsigned offset; - unsigned packet; + struct nouveau_grobj *gr; + uint32_t push_offset; + uint32_t mthd; - unsigned data; + uint32_t data; unsigned flags; unsigned vor; unsigned tor; }; +struct nouveau_stateobj_start { + struct nouveau_grobj *gr; + uint32_t mthd; + uint32_t size; + unsigned offset; +}; + struct nouveau_stateobj { struct pipe_reference reference; - unsigned *push; + struct nouveau_stateobj_start *start; struct nouveau_stateobj_reloc *reloc; - unsigned *cur; - unsigned cur_packet; + /* Common memory pool for data. */ + uint32_t *pool; + unsigned pool_cur; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + unsigned start_alloc; + unsigned reloc_alloc; + unsigned pool_alloc; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + unsigned total; /* includes begin_ring */ + unsigned cur; /* excludes begin_ring, offset from "cur_start" */ + unsigned cur_start; unsigned cur_reloc; }; +static INLINE void +so_dump(struct nouveau_stateobj *so) +{ + unsigned i, nr, total = 0; + + for (i = 0; i < so->cur_start; i++) { + if (so->start[i].gr->subc > -1) + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | (so->start[i].gr->subc << 13) + | so->start[i].mthd); + else + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | so->start[i].mthd); + for (nr = 0; nr < so->start[i].size; nr++, total++) + debug_printf("+0x%04x: 0x%08x\n", total, + so->pool[so->start[i].offset + nr]); + } +} + static INLINE struct nouveau_stateobj * -so_new(unsigned push, unsigned reloc) +so_new(unsigned start, unsigned push, unsigned reloc) { struct nouveau_stateobj *so; so = MALLOC(sizeof(struct nouveau_stateobj)); pipe_reference_init(&so->reference, 1); - so->push = MALLOC(sizeof(unsigned) * push); - so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); + so->total = so->cur = so->cur_start = so->cur_reloc = 0; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + so->start_alloc = start; + so->reloc_alloc = reloc; + so->pool_alloc = push; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ - so->cur = so->push; - so->cur_reloc = so->cur_packet = 0; + so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start)); + so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc)); + so->pool = MALLOC(push * sizeof(uint32_t)); + so->pool_cur = 0; + + if (!so->start || !so->reloc || !so->pool) { + debug_printf("malloc failed\n"); + assert(0); + } return so; } @@ -48,62 +102,128 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) struct nouveau_stateobj *so = *pso; int i; - if (pipe_reference((struct pipe_reference**)pso, &ref->reference)) { - free(so->push); + if (pipe_reference(&(*pso)->reference, &ref->reference)) { + FREE(so->start); for (i = 0; i < so->cur_reloc; i++) nouveau_bo_ref(NULL, &so->reloc[i].bo); - free(so->reloc); - free(so); + FREE(so->reloc); + FREE(so->pool); + FREE(so); } + *pso = ref; } static INLINE void -so_data(struct nouveau_stateobj *so, unsigned data) +so_data(struct nouveau_stateobj *so, uint32_t data) { - (*so->cur++) = (data); - so->cur_packet += 4; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur >= so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data; } static INLINE void -so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size) +so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size) { - so->cur_packet += (4 * size); +#ifdef DEBUG_NOUVEAU_STATEOBJ + if ((so->cur + size) > so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + while (size--) - (*so->cur++) = (*data++); + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = + *data++; } static INLINE void so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, unsigned mthd, unsigned size) { - so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); - so_data(so, (gr->subc << 13) | (size << 18) | mthd); + struct nouveau_stateobj_start *start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start_alloc <= so->cur_start) { + debug_printf("exceeding num_start size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + start = so->start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) { + debug_printf("previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->start = start; + start[so->cur_start].gr = gr; + start[so->cur_start].mthd = mthd; + start[so->cur_start].size = size; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->pool_alloc < (size + so->pool_cur)) { + debug_printf("exceeding num_pool size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + start[so->cur_start].offset = so->pool_cur; + so->pool_cur += size; + + so->cur_start++; + /* The 1 is for *this* begin_ring. */ + so->total += so->cur + 1; + so->cur = 0; } static INLINE void so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo, unsigned data, unsigned flags, unsigned vor, unsigned tor) { - struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; - - r->bo = NULL; - nouveau_bo_ref(bo, &r->bo); - r->offset = so->cur - so->push; - r->packet = so->cur_packet; - r->data = data; - r->flags = flags; - r->vor = vor; - r->tor = tor; + struct nouveau_stateobj_reloc *r; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->reloc_alloc <= so->cur_reloc) { + debug_printf("exceeding num_reloc size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + r = so->reloc; + + so->reloc = r; + r[so->cur_reloc].bo = NULL; + nouveau_bo_ref(bo, &(r[so->cur_reloc].bo)); + r[so->cur_reloc].gr = so->start[so->cur_start-1].gr; + r[so->cur_reloc].push_offset = so->total + so->cur; + r[so->cur_reloc].data = data; + r[so->cur_reloc].flags = flags; + r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd + + (so->cur << 2); + r[so->cur_reloc].vor = vor; + r[so->cur_reloc].tor = tor; + so_data(so, data); + so->cur_reloc++; } -static INLINE void -so_dump(struct nouveau_stateobj *so) +/* Determine if this buffer object is referenced by this state object. */ +static INLINE boolean +so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo) { - unsigned i, nr = so->cur - so->push; + int i; + + for (i = 0; i < so->cur_reloc; i++) + if (so->reloc[i].bo == bo) + return true; - for (i = 0; i < nr; i++) - debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]); + return false; } static INLINE void @@ -111,48 +231,95 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so) { struct nouveau_pushbuf *pb = chan->pushbuf; unsigned nr, i; + int ret = 0; - nr = so->cur - so->push; - if (pb->remaining < nr) - nouveau_pushbuf_flush(chan, nr); - pb->remaining -= nr; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start[so->cur_start - 1].size > so->cur) { + debug_printf("emit: previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* We cannot update total in case we so_emit again. */ + nr = so->total + so->cur; + + /* This will flush if we need space. + * We don't actually need the marker. + */ + if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) { + debug_printf("so_emit failed marker emit with error %d\n", ret); + assert(0); + } + + /* Submit data. This will ensure proper binding of objects. */ + for (i = 0; i < so->cur_start; i++) { + BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size); + OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size); + } - memcpy(pb->cur, so->push, nr * 4); for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset, - r->bo, r->data, 0, r->flags, - r->vor, r->tor); + if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur - nr + + r->push_offset, r->bo, r->data, + 0, r->flags, r->vor, r->tor))) { + debug_printf("so_emit failed reloc with error %d\n", ret); + assert(0); + } } - pb->cur += nr; } static INLINE void so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so) { struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *gr = NULL; unsigned i; + int ret = 0; if (!so) return; - i = so->cur_reloc << 1; - if (pb->remaining < i) - nouveau_pushbuf_flush(chan, i); - pb->remaining -= i; - + /* If we need to flush in flush notify, then we have a problem anyway. */ for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->packet, 0, - (r->flags & (NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | - NOUVEAU_BO_RDWR)) | - NOUVEAU_BO_DUMMY, 0, 0); - nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->data, 0, - r->flags | NOUVEAU_BO_DUMMY, - r->vor, r->tor); +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (r->mthd & 0x40000000) { + debug_printf("error: NI mthd 0x%08X\n", r->mthd); + continue; + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* The object needs to be bound and the system must know the + * subchannel is being used. Otherwise it will discard it. + */ + if (gr != r->gr) { + BEGIN_RING(chan, r->gr, 0x100, 1); + OUT_RING(chan, 0); + gr = r->gr; + } + + /* Some relocs really don't like to be hammered, + * NOUVEAU_BO_DUMMY makes sure it only + * happens when needed. + */ + ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) | + r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART + | NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); + } + + ret = OUT_RELOC(chan, r->bo, r->data, r->flags | + NOUVEAU_BO_DUMMY, r->vor, r->tor); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); + } + + pb->remaining -= 2; } } diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 42c77e5e778..4c3e08a43f5 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -23,6 +23,9 @@ #define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17) #define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18) +/* use along with GPU_WRITE for 2D-only writes */ +#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19) + extern struct pipe_screen * nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *); diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 10d984ace9b..edd96859cf8 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -10,10 +10,14 @@ nv04_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv04->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -27,40 +31,39 @@ nv04_destroy(struct pipe_context *pipe) FREE(nv04); } -static void -nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) -{ -} - static boolean nv04_init_hwctx(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + // requires a valid handle -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); // OUT_RING(0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1); - OUT_RING(0); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); + OUT_RING(chan, 0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(0x40182800); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, 0x40182800); // OUT_RING(1<<20/*no cull*/); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); // OUT_RING(0x24|(1<<6)|(1<<8)); - OUT_RING(0x120001a4); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1); - OUT_RING(0x332213a1); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1); - OUT_RING(0x11001010); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1); - OUT_RING(0x0); -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1); + OUT_RING(chan, 0x120001a4); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); + OUT_RING(chan, 0x332213a1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); + OUT_RING(chan, 0x11001010); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); + OUT_RING(chan, 0x0); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); // OUT_RING(SCREEN_OFFSET); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1); - OUT_RING(0xff000000); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); + OUT_RING(chan, 0xff000000); - FIRE_RING (NULL); + FIRE_RING (chan); return TRUE; } @@ -83,7 +86,6 @@ nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) nv04->pipe.winsys = ws; nv04->pipe.screen = pscreen; nv04->pipe.destroy = nv04_destroy; - nv04->pipe.set_edgeflags = nv04_set_edgeflags; nv04->pipe.draw_arrays = nv04_draw_arrays; nv04->pipe.draw_elements = nv04_draw_elements; nv04->pipe.clear = nv04_clear; diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h index 55326c787a8..fe3b527423c 100644 --- a/src/gallium/drivers/nv04/nv04_context.h +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv04_screen *ctx = nv04->screen -#include "nouveau/nouveau_push.h" - #include "nv04_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -141,9 +137,9 @@ extern void nv04_emit_hw_state(struct nv04_context *nv04); extern void nv04_state_tex_update(struct nv04_context *nv04); /* nv04_vbo.c */ -extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv04_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv04_draw_elements( struct pipe_context *pipe, +extern void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c index 21f990fd536..c152b52119a 100644 --- a/src/gallium/drivers/nv04/nv04_fragtex.c +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -4,7 +4,7 @@ #define _(m,tf) \ { \ PIPE_FORMAT_##m, \ - NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ + NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ } struct nv04_texture_format { @@ -53,14 +53,14 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit) return; } - nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER + nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER + | NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER | nv04_fragtex_format(pt->format) - | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) - | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) - | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE + | ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) + | ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE ; } diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 93f752faec9..e0a6948aeb4 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv04_context.h" #include "nv04_screen.h" @@ -9,31 +10,22 @@ static void nv04_miptree_layout(struct nv04_miptree *nv04mt) { struct pipe_texture *pt = &nv04mt->base; - uint width = pt->width[0], height = pt->height[0]; uint offset = 0; int nr_faces, l; nr_faces = 1; for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - - nv04mt->level[l].pitch = pt->width[0]; + nv04mt->level[l].pitch = pt->width0; nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63; - - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); } for (l = 0; l <= pt->last_level; l++) { - nv04mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - offset += nv04mt->level[l].pitch * pt->height[l]; + /* XXX guess was obviously missing */ + nv04mt->level[l].image_offset[0] = offset; + offset += nv04mt->level[l].pitch * u_minify(pt->height0, l); } nv04mt->total_size = offset; @@ -63,7 +55,7 @@ nv04_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) FREE(mt); return NULL; } - + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -75,7 +67,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv04_miptree); @@ -89,6 +81,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -120,8 +113,8 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; @@ -129,7 +122,7 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.zslice = zslice; ns->pitch = nv04mt->level[level].pitch; - ns->base.offset = nv04mt->level[level].image_offset; + ns->base.offset = nv04mt->level[level].image_offset[0]; return &ns->base; } diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index f6458232ae5..0b795ea2430 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -93,33 +93,45 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render, static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RINGp(buffer + VERTEX_SIZE * v4,8); - OUT_RINGp(buffer + VERTEX_SIZE * v5,8); - OUT_RING(0xFEDCBA); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA), 49); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v4,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v5,8); + OUT_RING(chan, 0xFEDCBA); } static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RING(0xFED); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD), 25); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RING(chan, 0xFED); } static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RING(0xFECEDC); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC), 33); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RING(chan, 0xFECEDC); } static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) @@ -156,7 +168,10 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con { const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; for(i = 0; i<nr_indices; i+=14) @@ -166,15 +181,15 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con if (numvert<3) break; - BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8); for(j = 0; j<numvert; j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices [i+j], 8 ); - BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 ); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); for(j = 0; j<numtri/2; j++ ) - OUT_RING(striptbl[j]); + OUT_RING(chan, striptbl[j]); if (numtri%2) - OUT_RING(striptbl[numtri/2]&0xFFF); + OUT_RING(chan, striptbl[numtri/2]&0xFFF); } } @@ -182,11 +197,14 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const { const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); - OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[0], 8); for(i = 1; i<nr_indices; i+=14) { @@ -195,16 +213,16 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const if (numvert < 3) break; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); for(j=0;j<numvert;j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[ i+j ], 8 ); - BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); for(j = 0; j<numtri/2; j++) - OUT_RING(fantbl[j]); + OUT_RING(chan, fantbl[j]); if (numtri%2) - OUT_RING(fantbl[numtri/2]&0xFFF); + OUT_RING(chan, fantbl[numtri/2]&0xFFF); } } diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 170ce3eb7e5..7c5b6e8229a 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -119,6 +119,8 @@ nv04_screen_destroy(struct pipe_screen *pscreen) nouveau_grobj_free(&screen->fahrenheit); nv04_surface_2d_takedown(&screen->eng2d); + nouveau_screen_fini(&screen->base); + FREE(pscreen); } @@ -163,10 +165,10 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) fahrenheit_class = 0; sub3d_class = 0; } else if (dev->chipset >= 0x10) { - fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE; + fahrenheit_class = NV10_TEXTURED_TRIANGLE; sub3d_class = NV10_CONTEXT_SURFACES_3D; } else { - fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE; + fahrenheit_class=NV04_TEXTURED_TRIANGLE; sub3d_class = NV04_CONTEXT_SURFACES_3D; } diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index d356ebd8b36..e3dc4c5bf44 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -50,28 +50,28 @@ wrap_mode(unsigned wrap) { switch (wrap) { case PIPE_TEX_WRAP_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; break; case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; break; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; break; case PIPE_TEX_WRAP_CLAMP: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_CLAMP: default: NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; } - return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; + return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; } static void * @@ -84,20 +84,20 @@ nv04_sampler_state_create(struct pipe_context *pipe, ss = MALLOC(sizeof(struct nv04_sampler_state)); - ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | - (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); + ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | + (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); if (cso->max_anisotropy > 1.0) { - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; } switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; break; } @@ -105,14 +105,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, case PIPE_TEX_FILTER_LINEAR: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; break; } break; @@ -120,14 +120,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, default: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; break; } break; @@ -181,7 +181,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe, */ rs = MALLOC(sizeof(struct nv04_rasterizer_state)); - rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; + rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; return (void *)rs; } @@ -229,16 +229,16 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); hw->control = float_to_ubyte(cso->alpha.ref_value); - hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); - hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN; - hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0; - hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; - hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0; - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format + hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); + hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN; + hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0; + hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; + hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0; + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format return (void *)hw; } @@ -377,7 +377,7 @@ nv04_set_scissor_state(struct pipe_context *pipe, /* struct nv04_context *nv04 = nv04_context(pipe); // XXX - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); OUT_RING (((s->maxx - s->minx) << 16) | s->minx); OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ } @@ -425,9 +425,9 @@ nv04_init_state_functions(struct nv04_context *nv04) nv04->pipe.delete_blend_state = nv04_blend_state_delete; nv04->pipe.create_sampler_state = nv04_sampler_state_create; - nv04->pipe.bind_sampler_states = nv04_sampler_state_bind; + nv04->pipe.bind_fragment_sampler_states = nv04_sampler_state_bind; nv04->pipe.delete_sampler_state = nv04_sampler_state_delete; - nv04->pipe.set_sampler_textures = nv04_set_sampler_texture; + nv04->pipe.set_fragment_sampler_textures = nv04_set_sampler_texture; nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create; nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h index 399f750dbe7..81d1d2ebaa9 100644 --- a/src/gallium/drivers/nv04/nv04_state.h +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -31,6 +31,7 @@ struct nv04_rasterizer_state { struct nv04_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c index eb2c1c57c67..b8d6dc560f0 100644 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -57,13 +57,19 @@ static uint32_t nv04_blend_func(uint32_t f) static void nv04_emit_control(struct nv04_context* nv04) { uint32_t control = nv04->dsa->control; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, control); } static void nv04_emit_blend(struct nv04_context* nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; uint32_t blend; blend=0x4; // texture MODULATE_ALPHA @@ -75,19 +81,23 @@ static void nv04_emit_blend(struct nv04_context* nv04) blend|=(nv04_blend_func(nv04->blend->b_src)<<24); blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); - OUT_RING(blend); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); + OUT_RING(chan, blend); } static void nv04_emit_sampler(struct nv04_context *nv04, int unit) { struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; struct pipe_texture *pt = &nv04mt->base; - - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING(nv04->sampler[unit]->filter); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING(chan, nv04->sampler[unit]->filter); } static void nv04_state_emit_framebuffer(struct nv04_context* nv04) @@ -97,6 +107,10 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv04_miptree *nv04mt = 0; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; + struct nouveau_bo *bo; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -128,24 +142,29 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) assert(0); } - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); - OUT_RING(rt_format); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); + OUT_RING(chan, rt_format); nv04mt = (struct nv04_miptree *)rt->base.texture; + bo = nouveau_bo(nv04mt->buffer); /* FIXME pitches have to be aligned ! */ - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt->pitch|(zeta->pitch<<16)); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt->pitch|(zeta->pitch<<16)); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (fb->zsbuf) { nv04mt = (struct nv04_miptree *)zeta->base.texture; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } } void nv04_emit_hw_state(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; int i; if (nv04->dirty & NV04_NEW_VERTPROG) { @@ -163,8 +182,8 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (nv04->dirty & NV04_NEW_CONTROL) { nv04->dirty &= ~NV04_NEW_CONTROL; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(nv04->dsa->control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, nv04->dsa->control); } if (nv04->dirty & NV04_NEW_BLEND) { @@ -205,12 +224,12 @@ nv04_emit_hw_state(struct nv04_context *nv04) unsigned rt_pitch = ((struct nv04_surface *)nv04->rt)->pitch; unsigned zeta_pitch = ((struct nv04_surface *)nv04->zeta)->pitch; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt_pitch|(zeta_pitch<<16)); - OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt_pitch|(zeta_pitch<<16)); + OUT_RELOCl(chan, nouveau_bo(nv04->rt), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv04->zeta) { - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, nouveau_bo(nv04->zeta), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } /* Texture images */ @@ -218,9 +237,10 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (!(nv04->fp_samplers & (1 << i))) continue; struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); } } diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index 8be134b83dd..b24a9cee5ae 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -1,5 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_format.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -76,7 +77,7 @@ nv04_scaled_image_format(enum pipe_format format) } static INLINE unsigned -nv04_swizzle_bits(unsigned x, unsigned y) +nv04_swizzle_bits_square(unsigned x, unsigned y) { unsigned u = (x & 0x001) << 0 | (x & 0x002) << 1 | @@ -106,6 +107,15 @@ nv04_swizzle_bits(unsigned x, unsigned y) return v | u; } +/* rectangular swizzled textures are linear concatenations of swizzled square tiles */ +static INLINE unsigned +nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h) +{ + unsigned s = MIN2(w, h); + unsigned m = s - 1; + return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m); +} + static int nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, struct pipe_surface *dst, int dx, int dy, @@ -133,6 +143,9 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, assert(sub_w == w || util_is_pot(sub_w)); assert(sub_h == h || util_is_pot(sub_h)); + MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 + + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2); + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); @@ -154,20 +167,19 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, for (x = 0; x < w; x += sub_w) { sub_w = MIN2(sub_w, w - x); - /* Must be 64-byte aligned */ - assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size) & 63)); + assert(!(dst->offset & 63)); BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size, + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); OUT_RING (chan, nv04_scaled_image_format(src->format)); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 1 << 20); OUT_RING (chan, 1 << 20); @@ -177,7 +189,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, OUT_RING (chan, src_pitch | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); - OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * src->texture->block.size, + OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format), NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); OUT_RING (chan, 0); } @@ -198,11 +210,11 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, unsigned src_pitch = ((struct nv04_surface *)src)->pitch; unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; unsigned dst_offset = dst->offset + dy * dst_pitch + - dx * dst->texture->block.size; + dx * util_format_get_blocksize(dst->texture->format); unsigned src_offset = src->offset + sy * src_pitch + - sx * src->texture->block.size; + sx * util_format_get_blocksize(src->texture->format); - WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9); + MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2); BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); OUT_RELOCo(chan, src_bo, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -219,7 +231,7 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); OUT_RING (chan, src_pitch); OUT_RING (chan, dst_pitch); - OUT_RING (chan, w * src->texture->block.size); + OUT_RING (chan, w * util_format_get_blocksize(src->texture->format)); OUT_RING (chan, count); OUT_RING (chan, 0x0101); OUT_RING (chan, 0); @@ -250,7 +262,7 @@ nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, if (format < 0) return 1; - WAIT_RING (chan, 12); + MARK_RING (chan, 12, 4); BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); @@ -315,7 +327,7 @@ nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst, gdirect_format = nv04_rect_format(dst->format); assert(gdirect_format >= 0); - WAIT_RING (chan, 16); + MARK_RING (chan, 16, 4); BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); @@ -487,3 +499,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen) ctx->fill = nv04_surface_fill; return ctx; } + +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns) +{ + int temp_flags; + + // printf("creating temp, flags is %i!\n", flags); + + if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD) + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD; + } + else + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; + } + + struct nv40_screen* screen = (struct nv40_screen*)pscreen; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + + struct pipe_texture templ; + memset(&templ, 0, sizeof(templ)); + templ.format = ns->base.texture->format; + templ.target = PIPE_TEXTURE_2D; + templ.width0 = ns->base.width; + templ.height0 = ns->base.height; + templ.depth0 = 1; + templ.last_level = 0; + + // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented + templ.nr_samples = ns->base.texture->nr_samples; + + templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ); + struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags); + temp_ns->backing = ns; + + if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ) + eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height); + + return temp_ns; +} + diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h index 02b3f56ba8b..ce696a11a39 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.h +++ b/src/gallium/drivers/nv04/nv04_surface_2d.h @@ -4,6 +4,7 @@ struct nv04_surface { struct pipe_surface base; unsigned pitch; + struct nv04_surface* backing; }; struct nv04_surface_2d { @@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen); void nv04_surface_2d_takedown(struct nv04_surface_2d **); +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns); + #endif diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index 6618660743d..2dd2e146a8f 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -1,7 +1,9 @@ #include <pipe/p_state.h> #include <pipe/p_defines.h> #include <pipe/p_inlines.h> +#include <util/u_format.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv04_context.h" #include "nv04_screen.h" @@ -10,22 +12,19 @@ struct nv04_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void -nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; + template->width0 = width; + template->height0 = height; + template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -48,14 +47,10 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -76,7 +71,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv04_compatible_transfer_tex(pt, level, &tx_tex_template); + nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -85,6 +80,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -110,8 +107,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -135,9 +132,9 @@ nv04_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -156,8 +153,10 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index e3167814f2b..34847718145 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv04_draw_elements( struct pipe_context *pipe, +void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -45,7 +45,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv04->constbuf[PIPE_SHADER_VERTEX], nv04->constbuf_nr[PIPE_SHADER_VERTEX]); @@ -65,15 +65,13 @@ boolean nv04_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv04_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv04_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { printf("coucou in draw arrays\n"); - return nv04_draw_elements(pipe, NULL, 0, prim, start, count); + nv04_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 65a22b175e1..1ecb73d06e8 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -10,10 +10,14 @@ nv10_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv10->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,230 +35,226 @@ static void nv10_init_hwctx(struct nv10_context *nv10) { struct nv10_screen *screen = nv10->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; float projectionmatrix[16]; - BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0x7ff<<16)|0x800); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); for (i=1;i<8;i++) { - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, 0x290, 1); - OUT_RING ((0x10<<16)|1); - BEGIN_RING(celsius, 0x3f4, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, 0x290, 1); + OUT_RING (chan, (0x10<<16)|1); + BEGIN_RING(chan, celsius, 0x3f4, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); if (nv10->screen->celsius->grclass != NV10TCL) { /* For nv11, nv17 */ - BEGIN_RING(celsius, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, celsius, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); /* Set state */ - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (0x207); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); - OUT_RING (0); - OUT_RING (0); - - BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12); - OUT_RING (0x30141010); - OUT_RING (0); - OUT_RING (0x20040000); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0x18000000); - OUT_RING (0x300e0300); - OUT_RING (0x0c091c80); - - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); - OUT_RING (1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4); - OUT_RING (1); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x8006); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8); - OUT_RING (0xff); - OUT_RING (0x207); - OUT_RING (0); - OUT_RING (0xff); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1d01); - BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (0x201); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (0x1b02); - OUT_RING (0x1b02); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (0x405); - OUT_RING (0x901); - BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + + BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(0), 12); + OUT_RING (chan, 0x30141010); + OUT_RING (chan, 0); + OUT_RING (chan, 0x20040000); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0x18000000); + OUT_RING (chan, 0x300e0300); + OUT_RING (chan, 0x0c091c80); + + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x8006); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1d01); + BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_CONTROL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, 0x201); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, 0x1b02); + OUT_RING (chan, 0x1b02); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, 0x405); + OUT_RING (chan, 0x901); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_S(0), 8); for (i=0;i<8;i++) { - OUT_RING (0); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RING (0x3fc00000); /* -1.50 */ - OUT_RING (0xbdb8aa0a); /* -0.09 */ - OUT_RING (0); /* 0.00 */ + BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RING (chan, 0x3fc00000); /* -1.50 */ + OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */ + OUT_RING (chan, 0); /* 0.00 */ - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2); - OUT_RING (0x802); - OUT_RING (2); + BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2); + OUT_RING (chan, 0x802); + OUT_RING (chan, 2); /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when * using texturing, except when using the texture matrix */ - BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); - OUT_RING (6); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (0x01010101); + BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); + OUT_RING (chan, 6); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x01010101); /* Set vertex component */ - BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1); - OUT_RINGf (0.0); - BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1); + OUT_RINGf (chan, 0.0); + BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); memset(projectionmatrix, 0, sizeof(projectionmatrix)); - BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 1.0; projectionmatrix[3*4+3] = 1.0; for (i=0;i<16;i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); - OUT_RING (0.0); - OUT_RINGf (16777216.0); - - BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (-2048.0); - OUT_RINGf (-2048.0); - OUT_RINGf (16777215.0 * 0.5); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); + OUT_RING (chan, 0.0); + OUT_RINGf (chan, 16777216.0); - FIRE_RING (NULL); -} + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RING (chan, 0); -static void -nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) -{ + FIRE_RING (chan); } struct pipe_context * @@ -276,7 +276,6 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) nv10->pipe.winsys = ws; nv10->pipe.screen = pscreen; nv10->pipe.destroy = nv10_destroy; - nv10->pipe.set_edgeflags = nv10_set_edgeflags; nv10->pipe.draw_arrays = nv10_draw_arrays; nv10->pipe.draw_elements = nv10_draw_elements; nv10->pipe.clear = nv10_clear; diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 36a6aa7a74e..ab4b825487d 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv10_screen *ctx = nv10->screen -#include "nouveau/nouveau_push.h" - #include "nv10_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -144,9 +140,9 @@ extern void nv10_emit_hw_state(struct nv10_context *nv10); extern void nv10_state_tex_update(struct nv10_context *nv10); /* nv10_vbo.c */ -extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv10_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv10_draw_elements( struct pipe_context *pipe, +extern void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c index 27f2f875847..c1f7ccb9ab6 100644 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -52,6 +52,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; struct pipe_texture *pt = &nv10mt->base; struct nv10_texture_format *tf; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; uint32_t txf, txs, txp; tf = nv10_fragtex_format(pt->format); @@ -62,9 +65,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) txf = tf->format << 8; txf |= (pt->last_level + 1) << 16; - txf |= log2i(pt->width[0]) << 20; - txf |= log2i(pt->height[0]) << 24; - txf |= log2i(pt->depth[0]) << 28; + txf |= log2i(pt->width0) << 20; + txf |= log2i(pt->height0) << 24; + txf |= log2i(pt->depth0) << 28; txf |= 8; switch (pt->target) { @@ -82,15 +85,15 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) return; } - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width[0] << 16) | pt->height[0]); - OUT_RING (ps->bcol); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv10mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv10mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv10_fragtex_bind(struct nv10_context *nv10) { #if 0 struct nv10_fragment_program *fp = nv10->fragprog.active; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; unsigned samplers, unit; samplers = nv10->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv10_fragtex_bind(struct nv10_context *nv10) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv10->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 34e3c2ebd77..908482ad854 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -1,6 +1,8 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" +#include "util/u_math.h" #include "nv10_context.h" #include "nv10_screen.h" @@ -10,7 +12,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) { struct pipe_texture *pt = &nv10mt->base; boolean swizzled = FALSE; - uint width = pt->width[0], height = pt->height[0]; + uint width = pt->width0; uint offset = 0; int nr_faces, l, f; @@ -21,29 +23,23 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - if (swizzled) - nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size; + nv10mt->level[l].pitch = util_format_get_stride(pt->format, width); else - nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size; + nv10mt->level[l].pitch = util_format_get_stride(pt->format, pt->width0); nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63; nv10mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); + width = u_minify(width, 1); } for (f = 0; f < nr_faces; f++) { for (l = 0; l <= pt->last_level; l++) { nv10mt->level[l].image_offset[f] = offset; - offset += nv10mt->level[l].pitch * pt->height[l]; + offset += nv10mt->level[l].pitch * u_minify(pt->height0, l); } } @@ -58,7 +54,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv10_miptree); @@ -72,6 +68,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -95,6 +92,7 @@ nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) FREE(mt); return NULL; } + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -133,8 +131,8 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 7ba9777a222..c5dbe43dbc8 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -67,12 +67,15 @@ struct nv10_vbuf_render { void nv10_vtxbuf_bind( struct nv10_context* nv10 ) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; for(i = 0; i < 8; i++) { - BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv10->vtxbuf*/); - BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv10->vtxbuf*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1); + OUT_RING(chan, 0/*XXX*/); } } @@ -163,19 +166,22 @@ nv10_vbuf_render_draw( struct vbuf_render *render, { struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); struct nv10_context *nv10 = nv10_render->nv10; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int push, i; nv10_emit_hw_state(nv10); - BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv10_render->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv10_render->hwprim); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv10_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, celsius, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -183,16 +189,16 @@ nv10_vbuf_render_draw( struct vbuf_render *render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index ee5901e743e..69a6dab866a 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -115,6 +115,9 @@ nv10_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->celsius); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } @@ -177,7 +180,6 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->celsius, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 9b38219b996..ffc6be3c401 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -553,9 +553,9 @@ nv10_init_state_functions(struct nv10_context *nv10) nv10->pipe.delete_blend_state = nv10_blend_state_delete; nv10->pipe.create_sampler_state = nv10_sampler_state_create; - nv10->pipe.bind_sampler_states = nv10_sampler_state_bind; + nv10->pipe.bind_fragment_sampler_states = nv10_sampler_state_bind; nv10->pipe.delete_sampler_state = nv10_sampler_state_delete; - nv10->pipe.set_sampler_textures = nv10_set_sampler_texture; + nv10->pipe.set_fragment_sampler_textures = nv10_set_sampler_texture; nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create; nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h index 3a3fd0d4f4f..2524ac02e29 100644 --- a/src/gallium/drivers/nv10/nv10_state.h +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -126,6 +126,7 @@ struct nv10_depth_stencil_alpha_state { struct nv10_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 2577ab73b56..30a596ca604 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -4,25 +4,32 @@ static void nv10_state_emit_blend(struct nv10_context* nv10) { struct nv10_blend_state *b = nv10->blend; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); - OUT_RING (b->b_enable); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (chan, b->b_enable); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv10_state_emit_blend_color(struct nv10_context* nv10) { struct pipe_blend_color *c = nv10->blend_color; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -31,60 +38,66 @@ static void nv10_state_emit_blend_color(struct nv10_context* nv10) static void nv10_state_emit_rast(struct nv10_context* nv10) { struct nv10_rasterizer_state *r = nv10->rast; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv10_state_emit_dsa(struct nv10_context* nv10) { struct nv10_depth_stencil_alpha_state *d = nv10->dsa; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); #if 0 - BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv10_state_emit_viewport(struct nv10_context* nv10) @@ -108,6 +121,10 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) int colour_format = 0, zeta_format = 0; struct nv10_miptree *nv10mt = 0; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; colour_format = fb->cbufs[0]->format; @@ -144,11 +161,11 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) } if (zeta) { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv10mt = (struct nv10_miptree *)rt->base.texture; @@ -160,13 +177,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) nv10->zeta = nv10mt->buffer; } - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - OUT_RING (rt_format); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0 | 0x08000800); - OUT_RING (((h - 1) << 16) | 0 | 0x08000800); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); + OUT_RING (chan, rt_format); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0 | 0x08000800); + OUT_RING (chan, ((h - 1) << 16) | 0 | 0x08000800); } static void nv10_vertex_layout(struct nv10_context *nv10) @@ -201,6 +218,10 @@ static void nv10_vertex_layout(struct nv10_context *nv10) void nv10_emit_hw_state(struct nv10_context *nv10) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + struct nouveau_bo *rt_bo; int i; if (nv10->dirty & NV10_NEW_VERTPROG) { @@ -269,38 +290,41 @@ nv10_emit_hw_state(struct nv10_context *nv10) */ /* Render target */ + rt_bo = nouveau_bo(nv10->rt[0]); // XXX figre out who's who for NV10TCL_DMA_* and fill accordingly -// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1); -// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_COLOR0, 1); +// OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv10->zeta) { + struct nouveau_bo *zeta_bo = nouveau_bo(nv10->zeta); // XXX -// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1); -// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_ZETA, 1); +// OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv10->zeta + lma_offset);*/ +/* BEGIN_RING(chan, celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv10->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv10->fp_samplers & (1 << i))) continue; - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv10->tex[i].buffer); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, + BEGIN_RING(chan, celsius, NV10TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv10->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, NV10TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index 8feb85e4bda..eb04af9782e 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -1,7 +1,9 @@ #include <pipe/p_state.h> #include <pipe/p_defines.h> #include <pipe/p_inlines.h> +#include <util/u_format.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv10_context.h" #include "nv10_screen.h" @@ -10,22 +12,19 @@ struct nv10_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void -nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; + template->width0 = width; + template->height0 = height; + template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -48,14 +47,10 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -76,7 +71,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv10_compatible_transfer_tex(pt, level, &tx_tex_template); + nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -85,6 +80,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -110,8 +107,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -135,9 +132,9 @@ nv10_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -156,8 +153,10 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 441a4f75f3f..9180c72c9b0 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv10_draw_elements( struct pipe_context *pipe, +void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -45,6 +45,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, } draw_set_mapped_constant_buffer(draw, + PIPE_SHADER_VERTEX, nv10->constbuf[PIPE_SHADER_VERTEX], nv10->constbuf_nr[PIPE_SHADER_VERTEX]); @@ -64,14 +65,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv10_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv10_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { - return nv10_draw_elements(pipe, NULL, 0, prim, start, count); + nv10_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index 276db8b57b6..1dba7248877 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -10,10 +10,14 @@ nv20_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv20_context *nv20 = nv20_context(pipe); + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv20->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,353 +35,352 @@ static void nv20_init_hwctx(struct nv20_context *nv20) { struct nv20_screen *screen = nv20->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; float projectionmatrix[16]; - const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL); + const boolean is_nv25tcl = (kelvin->grclass == NV25TCL); - BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); /* TEXTURE1 */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); /* ZETA */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); /* TEXTURE1 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); /* ZETA */ - BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1); - OUT_RING (0); /* renouveau: beef0351, unique */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1); + OUT_RING (chan, 0); /* renouveau: beef0351, unique */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0xfff << 16) | 0x0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x17e0, 3); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); + BEGIN_RING(chan, kelvin, 0x17e0, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); } else { - BEGIN_RING(kelvin, 0x1e68, 1); - OUT_RING (0x4b800000); /* 16777216.000000 */ - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL); + BEGIN_RING(chan, kelvin, 0x1e68, 1); + OUT_RING (chan, 0x4b800000); /* 16777216.000000 */ + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL); } - BEGIN_RING(kelvin, 0x290, 1); - OUT_RING ((0x10 << 16) | 1); - BEGIN_RING(kelvin, 0x9fc, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x1d80, 1); - OUT_RING (1); - BEGIN_RING(kelvin, 0x9f8, 1); - OUT_RING (4); - BEGIN_RING(kelvin, 0x17ec, 3); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, 0x290, 1); + OUT_RING (chan, (0x10 << 16) | 1); + BEGIN_RING(chan, kelvin, 0x9fc, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x1d80, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, 0x9f8, 1); + OUT_RING (chan, 4); + BEGIN_RING(chan, kelvin, 0x17ec, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 0.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d88, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d88, 1); + OUT_RING (chan, 3); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1); - OUT_RING (chan->vram->handle); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1); + OUT_RING (chan, chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1); + OUT_RING (chan, chan->vram->handle); } - BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1); - OUT_RING (0); /* renouveau: beef1e10 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1); + OUT_RING (chan, 0); /* renouveau: beef1e10 */ - BEGIN_RING(kelvin, 0x1e98, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1e98, 1); + OUT_RING (chan, 0); #if 0 if (is_nv25tcl) { - BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ - OUT_RING (NvDmaFB); /* renouveau: beef0201 */ + BEGIN_RING(chan, NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ + OUT_RING (chan, NvDmaFB); /* renouveau: beef0201 */ - BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ + BEGIN_RING(chan, NvSub3D, NV20TCL_DMA_TEXTURE1, 1); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ } #endif - BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, kelvin, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); /* error: ILLEGAL_MTHD, PROTECTION_FAULT - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); - OUT_RINGf (512.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 512.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x022c, 2); - OUT_RING (0x280); - OUT_RING (0x07d28000); + BEGIN_RING(chan, kelvin, 0x022c, 2); + OUT_RING (chan, 0x280); + OUT_RING (chan, 0x07d28000); } /* * illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c2c, 1); - OUT_RING (0); */ + BEGIN_RING(chan, NvSub3D, 0x1c2c, 1); + OUT_RING (chan, 0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1da4, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1da4, 1); + OUT_RING (chan, 0); } /* * crashes with illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c18, 1); - OUT_RING (0x200); */ + BEGIN_RING(chan, NvSub3D, 0x1c18, 1); + OUT_RING (chan, 0x200); */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING ((0 << 16) | 0); - OUT_RING ((0 << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, (0 << 16) | 0); + OUT_RING (chan, (0 << 16) | 0); /* *** Set state *** */ - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */ + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_ALPHA_FUNC_REF */ for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4); - OUT_RING (0x30d410d0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1); - OUT_RING (0x00011101); - BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2); - OUT_RING (0x130e0300); - OUT_RING (0x0c091c80); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4); - OUT_RING (0x20c400c0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); - OUT_RING (0x035125a0); - OUT_RING (0); - OUT_RING (0x40002000); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); - OUT_RING (0xffff0000); - - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4); - OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE); - OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO); - OUT_RING (0); /* NV20TCL_BLEND_COLOR */ - OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RING (0xff); - OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */ - OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */ - OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP); - - BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (0); - OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY); - BEGIN_RING(kelvin, 0x17cc, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4); + OUT_RING (chan, 0x30d410d0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1); + OUT_RING (chan, 0x00011101); + BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2); + OUT_RING (chan, 0x130e0300); + OUT_RING (chan, 0x0c091c80); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4); + OUT_RING (chan, 0x20c400c0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); + OUT_RING (chan, 0x035125a0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x40002000); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); + OUT_RING (chan, 0xffff0000); + + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, NV20TCL_BLEND_FUNC_SRC_ONE); + OUT_RING (chan, NV20TCL_BLEND_FUNC_DST_ZERO); + OUT_RING (chan, 0); /* NV20TCL_BLEND_COLOR */ + OUT_RING (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RING (chan, 0xff); + OUT_RING (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_STENCIL_FUNC_REF */ + OUT_RING (chan, 0xff); /* NV20TCL_STENCIL_FUNC_MASK */ + OUT_RING (chan, NV20TCL_STENCIL_OP_FAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP); + + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY); + BEGIN_RING(chan, kelvin, 0x17cc, 1); + OUT_RING (chan, 0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 1); } - BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1); - OUT_RING (0x00020000); - BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), + BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_CONTROL, 1); + OUT_RING (chan, 0x00020000); + BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE); for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) { - OUT_RING(0xffffffff); + OUT_RING(chan, 0xffffffff); } - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (NV20TCL_DEPTH_FUNC_LESS); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RINGf (0.0); - OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, NV20TCL_DEPTH_FUNC_LESS); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); if (!is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 3); } - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); if (!is_nv25tcl) { - OUT_RING (8); + OUT_RING (chan, 8); } else { - OUT_RINGf (1.0); + OUT_RINGf (chan, 1.0); } if (!is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POINT_SMOOTH_ENABLE */ } else { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x0a1c, 1); - OUT_RING (0x800); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x0a1c, 1); + OUT_RING (chan, 0x800); } - BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL); - OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (NV20TCL_CULL_FACE_BACK); - OUT_RING (NV20TCL_FRONT_FACE_CCW); - BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1); - OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, NV20TCL_POLYGON_MODE_FRONT_FILL); + OUT_RING (chan, NV20TCL_POLYGON_MODE_BACK_FILL); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, NV20TCL_CULL_FACE_BACK); + OUT_RING (chan, NV20TCL_FRONT_FACE_CCW); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1); + OUT_RING (chan, NV20TCL_SHADE_MODEL_SMOOTH); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) { - OUT_RING(0); + OUT_RING(chan, 0); } - BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RINGf (1.5); - OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ - OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ - BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2); - OUT_RING (NV20TCL_FOG_MODE_EXP_2); - OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG); - BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.FOG_COLOR */ - BEGIN_RING(kelvin, NV20TCL_ENGINE, 1); - OUT_RING (NV20TCL_ENGINE_FIXED); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RINGf (chan, 1.5); + OUT_RINGf (chan, -0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ + OUT_RINGf (chan, 0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ + BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2); + OUT_RING (chan, NV20TCL_FOG_MODE_EXP_2); + OUT_RING (chan, NV20TCL_FOG_COORD_DIST_COORD_FOG); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.FOG_COLOR */ + BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1); + OUT_RING (chan, NV20TCL_ENGINE_FIXED); for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); - OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0); - OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); + BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); for (i = 4; i < 16; ++i) { - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 1.0); } - BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (0x00010101); - BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x00010101); + BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1); + OUT_RING (chan, 0); memset(projectionmatrix, 0, sizeof(projectionmatrix)); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 16777215.0; projectionmatrix[3*4+3] = 1.0; - BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); for (i = 0; i < 16; i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); - OUT_RINGf (0.0); - OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ - - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */ - OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */ - OUT_RINGf (0.0); - OUT_RINGf (16777215.0); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); - OUT_RINGf (0.0); /* no effect?, w/2 */ - OUT_RINGf (0.0); /* no effect?, h/2 */ - OUT_RINGf (16777215.0 * 0.5); - OUT_RINGf (65535.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); /* x-offset, w/2 + 1.031250 */ + OUT_RINGf (chan, 0.0); /* y-offset, h/2 + 0.030762 */ + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777215.0); - FIRE_RING (NULL); -} + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (chan, 0.0); /* no effect?, w/2 */ + OUT_RINGf (chan, 0.0); /* no effect?, h/2 */ + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RINGf (chan, 65535.0); -static void -nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) -{ + FIRE_RING (chan); } struct pipe_context * @@ -399,7 +402,6 @@ nv20_create(struct pipe_screen *pscreen, unsigned pctx_id) nv20->pipe.winsys = ws; nv20->pipe.screen = pscreen; nv20->pipe.destroy = nv20_destroy; - nv20->pipe.set_edgeflags = nv20_set_edgeflags; nv20->pipe.draw_arrays = nv20_draw_arrays; nv20->pipe.draw_elements = nv20_draw_elements; nv20->pipe.clear = nv20_clear; diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h index a4eaa956608..c7dfadaa311 100644 --- a/src/gallium/drivers/nv20/nv20_context.h +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv20_screen *ctx = nv20->screen -#include "nouveau/nouveau_push.h" - #include "nv20_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -143,9 +139,9 @@ extern void nv20_emit_hw_state(struct nv20_context *nv20); extern void nv20_state_tex_update(struct nv20_context *nv20); /* nv20_vbo.c */ -extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv20_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv20_draw_elements( struct pipe_context *pipe, +extern void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c index 495a7be9127..dedbec73f39 100644 --- a/src/gallium/drivers/nv20/nv20_fragtex.c +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -52,6 +52,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) struct nv20_miptree *nv20mt = nv20->tex_miptree[unit]; struct pipe_texture *pt = &nv20mt->base; struct nv20_texture_format *tf; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t txf, txs, txp; tf = nv20_fragtex_format(pt->format); @@ -62,9 +65,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) txf = tf->format << 8; txf |= (pt->last_level + 1) << 16; - txf |= log2i(pt->width[0]) << 20; - txf |= log2i(pt->height[0]) << 24; - txf |= log2i(pt->depth[0]) << 28; + txf |= log2i(pt->width0) << 20; + txf |= log2i(pt->height0) << 24; + txf |= log2i(pt->depth0) << 28; txf |= 8; switch (pt->target) { @@ -82,15 +85,15 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) return; } - BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width[0] << 16) | pt->height[0]); - OUT_RING (ps->bcol); + BEGIN_RING(chan, kelvin, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv20mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv20mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv20_fragtex_bind(struct nv20_context *nv20) { #if 0 struct nv20_fragment_program *fp = nv20->fragprog.active; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; unsigned samplers, unit; samplers = nv20->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv20_fragtex_bind(struct nv20_context *nv20) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv20->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index 185fbf53e0f..8f7538e7f57 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -1,15 +1,18 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" +#include "util/u_math.h" #include "nv20_context.h" #include "nv20_screen.h" +#include "../nv04/nv04_surface_2d.h" static void nv20_miptree_layout(struct nv20_miptree *nv20mt) { struct pipe_texture *pt = &nv20mt->base; - uint width = pt->width[0], height = pt->height[0]; + uint width = pt->width0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -25,21 +28,15 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt) } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv20mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + nv20mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); else - nv20mt->level[l].pitch = pt->width[l] * pt->block.size; + nv20mt->level[l].pitch = util_format_get_stride(pt->format, width); nv20mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); + width = u_minify(width, 1); } for (f = 0; f < nr_faces; f++) { @@ -47,14 +44,14 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt) nv20mt->level[l].image_offset[f] = offset; if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - pt->width[l + 1] > 1 && pt->height[l + 1] > 1) - offset += align(nv20mt->level[l].pitch * pt->height[l], 64); + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(nv20mt->level[l].pitch * u_minify(pt->height0, l), 64); else - offset += nv20mt->level[l].pitch * pt->height[l]; + offset += nv20mt->level[l].pitch * u_minify(pt->height0, l); } nv20mt->level[l].image_offset[f] = offset; - offset += nv20mt->level[l].pitch * pt->height[l]; + offset += nv20mt->level[l].pitch * u_minify(pt->height0, l); } nv20mt->total_size = offset; @@ -68,7 +65,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv20_miptree); @@ -82,6 +79,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -100,8 +98,8 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) mt->base.screen = screen; /* Swizzled textures must be POT */ - if (pt->width[0] & (pt->width[0] - 1) || - pt->height[0] & (pt->height[0] - 1)) + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | @@ -130,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv20_miptree_layout(mt); mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size); @@ -137,6 +141,7 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) FREE(mt); return NULL; } + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -167,8 +172,8 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; @@ -185,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, ns->base.offset = nv20mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base; + return &ns->base; } static void nv20_miptree_surface_destroy(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv20_miptree_surface_destroy(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c index ddfcdb8057a..2e145672da1 100644 --- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -81,12 +81,15 @@ nv20_vbuf_render(struct vbuf_render *render) void nv20_vtxbuf_bind( struct nv20_context* nv20 ) { #if 0 + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv20->vtxbuf*/); - BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv20->vtxbuf*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i) ,1); + OUT_RING(chan, 0/*XXX*/); } #endif } @@ -202,6 +205,9 @@ nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type) static unsigned nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t hwfmt = 0; unsigned fields; @@ -231,8 +237,8 @@ nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) return 0; } - BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1); - OUT_RING(hwfmt); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(hwattr), 1); + OUT_RING(chan, hwfmt); return fields; } @@ -262,6 +268,9 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; struct vertex_info *vinfo = &nv20->vertex_info; unsigned nr_fields; int max_push; @@ -270,29 +279,29 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, nr_fields = nv20__emit_vertex_array_format(nv20); - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); max_push = 1200 / nr_fields; while (nr_indices) { int i; int push = MIN2(nr_indices, max_push); - BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); + BEGIN_RING_NI(chan, kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); for (i = 0; i < push; i++) { /* XXX: fixme to handle other than floats? */ int f = nr_fields; float *attrv = (float*)&data[indices[i] * vsz]; while (f-- > 0) - OUT_RINGf(*attrv++); + OUT_RINGf(chan, *attrv++); } nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, NV20TCL_VERTEX_BEGIN_END_STOP); } static void @@ -301,20 +310,23 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int push, i; NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n"); - BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv20_render->pbuffer, 0, + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv20_render->pbuffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, kelvin, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -322,16 +334,16 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } static void diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index 4eeacd1afd5..d091335063b 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -115,6 +115,9 @@ nv20_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->kelvin); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } @@ -173,7 +176,6 @@ nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->kelvin, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c index ed4084980f2..3a82e63423d 100644 --- a/src/gallium/drivers/nv20/nv20_state.c +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -546,9 +546,9 @@ nv20_init_state_functions(struct nv20_context *nv20) nv20->pipe.delete_blend_state = nv20_blend_state_delete; nv20->pipe.create_sampler_state = nv20_sampler_state_create; - nv20->pipe.bind_sampler_states = nv20_sampler_state_bind; + nv20->pipe.bind_fragment_sampler_states = nv20_sampler_state_bind; nv20->pipe.delete_sampler_state = nv20_sampler_state_delete; - nv20->pipe.set_sampler_textures = nv20_set_sampler_texture; + nv20->pipe.set_fragment_sampler_textures = nv20_set_sampler_texture; nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create; nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h index 34f402fdcbf..dde41065685 100644 --- a/src/gallium/drivers/nv20/nv20_state.h +++ b/src/gallium/drivers/nv20/nv20_state.h @@ -126,6 +126,7 @@ struct nv20_depth_stencil_alpha_state { struct nv20_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index 0122b1c2cdb..6bbd1fdae9d 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -5,27 +5,34 @@ static void nv20_state_emit_blend(struct nv20_context* nv20) { struct nv20_blend_state *b = nv20->blend; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (b->b_enable); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, b->b_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 2); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv20_state_emit_blend_color(struct nv20_context* nv20) { struct pipe_blend_color *c = nv20->blend_color; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -34,63 +41,69 @@ static void nv20_state_emit_blend_color(struct nv20_context* nv20) static void nv20_state_emit_rast(struct nv20_context* nv20) { struct nv20_rasterizer_state *r = nv20->rast; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv20_state_emit_dsa(struct nv20_context* nv20) { struct nv20_depth_stencil_alpha_state *d = nv20->dsa; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); #if 0 - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv20_state_emit_viewport(struct nv20_context* nv20) @@ -101,9 +114,13 @@ static void nv20_state_emit_scissor(struct nv20_context* nv20) { /* NV20TCL_SCISSOR_* is probably a software method */ /* struct pipe_scissor_state *s = nv20->scissor; - BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + + BEGIN_RING(chan, kelvin, NV20TCL_SCISSOR_HORIZ, 2); + OUT_RING (chan, ((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (chan, ((s->maxy - s->miny) << 16) | s->miny);*/ } static void nv20_state_emit_framebuffer(struct nv20_context* nv20) @@ -113,6 +130,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv20_miptree *nv20mt = 0; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -150,11 +170,11 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) } if (zeta) { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv20mt = (struct nv20_miptree *)rt->base.texture; @@ -166,13 +186,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) nv20->zeta = nv20mt->buffer; } - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); /*NV20TCL_RT_VERT */ - OUT_RING (rt_format); /* NV20TCL_RT_FORMAT */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0); - OUT_RING (((h - 1) << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); /*NV20TCL_RT_VERT */ + OUT_RING (chan, rt_format); /* NV20TCL_RT_FORMAT */ + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0); + OUT_RING (chan, ((h - 1) << 16) | 0); } static void nv20_vertex_layout(struct nv20_context *nv20) @@ -228,7 +248,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) } /* always do position */ { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->hwfmt[0] |= (1 << 0); } @@ -237,19 +257,19 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 4; i < 6; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 3)); } if (colors[0]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 3); } if (colors[1]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 4); } @@ -258,7 +278,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 6; i < 10; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 1)); } @@ -267,7 +287,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 0; i < 4; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 9)); } @@ -276,13 +296,13 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 10; i < 12; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 3)); } if (fog) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << 15); } @@ -293,6 +313,10 @@ static void nv20_vertex_layout(struct nv20_context *nv20) void nv20_emit_hw_state(struct nv20_context *nv20) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + struct nouveau_bo *rt_bo; int i; if (nv20->dirty & NV20_NEW_VERTPROG) { @@ -361,36 +385,39 @@ nv20_emit_hw_state(struct nv20_context *nv20) */ /* Render target */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + rt_bo = nouveau_bo(nv20->rt[0]); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv20->zeta) { - BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1); - OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + struct nouveau_bo *zeta_bo = nouveau_bo(nv20->zeta); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_ZETA, 1); + OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv20->zeta + lma_offset);*/ +/* BEGIN_RING(chan, kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv20->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv20->fp_samplers & (1 << i))) continue; - BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv20->tex[i].buffer); + BEGIN_RING(chan, kelvin, NV20TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format, + BEGIN_RING(chan, kelvin, NV20TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv20->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0, NV20TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 81b4f1a9177..699773e8e6f 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -1,7 +1,9 @@ #include <pipe/p_state.h> #include <pipe/p_defines.h> #include <pipe/p_inlines.h> +#include <util/u_format.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv20_context.h" #include "nv20_screen.h" @@ -10,22 +12,19 @@ struct nv20_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void -nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; + template->width0 = width; + template->height0 = height; + template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -48,14 +47,10 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -76,7 +71,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv20_compatible_transfer_tex(pt, level, &tx_tex_template); + nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -85,6 +80,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -110,8 +107,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -131,13 +128,13 @@ nv20_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -156,8 +153,10 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c index 84d7db6c5e2..52991a0d856 100644 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv20_draw_elements( struct pipe_context *pipe, +void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -45,7 +45,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv20->constbuf[PIPE_SHADER_VERTEX], nv20->constbuf_nr[PIPE_SHADER_VERTEX]); @@ -67,13 +67,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe, } draw_flush(nv20->draw); - return TRUE; } -boolean nv20_draw_arrays( struct pipe_context *pipe, +void nv20_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return nv20_draw_elements(pipe, NULL, 0, prim, start, count); + nv20_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index 388245ecb04..7886c2af7e6 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -253,32 +253,32 @@ static INLINE struct nv20_sreg tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nv20_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + src = nv20_sr(NV30SR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; + src = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->SrcRegister.Index) - vpc->high_temp = fsrc->SrcRegister.Index; - src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + if (vpc->high_temp < fsrc->Register.Index) + vpc->high_temp = fsrc->Register.Index; + src = nv20_sr(NV30SR_TEMP, fsrc->Register.Index); break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -286,14 +286,14 @@ static INLINE struct nv20_sreg tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) { struct nv20_sreg dst; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: dst = nv20_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->DstRegister.Index]); + vpc->output_map[fdst->Register.Index]); break; case TGSI_FILE_TEMPORARY: - dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index); + dst = nv20_sr(NV30SR_TEMP, fdst->Register.Index); if (vpc->high_temp < dst.index) vpc->high_temp = dst.index; break; @@ -334,8 +334,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } @@ -343,11 +343,11 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - switch (fsrc->SrcRegister.File) { + fsrc = &finst->Src[i]; + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -360,8 +360,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, */ case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -378,8 +378,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: @@ -490,15 +490,15 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV30_VP_INST_DEST_POS; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -506,10 +506,10 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, } break; case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_BFC0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_BFC1; } else { NOUVEAU_ERR("bad bcolour semantic index\n"); @@ -523,19 +523,22 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, hw = NV30_VP_INST_DEST_PSZ; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; } break; + case TGSI_SEMANTIC_EDGEFLAG: + NOUVEAU_ERR("cannot handle edgeflag output\n"); + return FALSE; default: NOUVEAU_ERR("bad output semantic\n"); return FALSE; } - vpc->output_map[fdec->DeclarationRange.First] = hw; + vpc->output_map[fdec->Range.First] = hw; return TRUE; } diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index d8300fd69f6..54572e9ab3a 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -10,21 +10,32 @@ nv30_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void nv30_destroy(struct pipe_context *pipe) { struct nv30_context *nv30 = nv30_context(pipe); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (nv30->state.hw[i]) + so_ref(NULL, &nv30->state.hw[i]); + } if (nv30->draw) draw_destroy(nv30->draw); @@ -58,6 +69,9 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced; nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; + screen->base.channel->user_private = nv30; + screen->base.channel->flush_notify = nv30_state_flush_notify; + nv30_init_query_functions(nv30); nv30_init_surface_functions(nv30); nv30_init_state_functions(nv30); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 8d49366dfcb..e59449287b5 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv30_screen *ctx = nv30->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv30_state.h" @@ -144,7 +140,6 @@ struct nv30_context { unsigned vtxbuf_nr; struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; unsigned vtxelt_nr; - const unsigned *edgeflags; }; static INLINE struct nv30_context * @@ -184,6 +179,7 @@ extern void nv30_fragtex_bind(struct nv30_context *); /* nv30_state.c and friends */ extern boolean nv30_state_validate(struct nv30_context *nv30); extern void nv30_state_emit(struct nv30_context *nv30); +extern void nv30_state_flush_notify(struct nouveau_channel *chan); extern struct nv30_state_entry nv30_state_rasterizer; extern struct nv30_state_entry nv30_state_scissor; extern struct nv30_state_entry nv30_state_stipple; @@ -198,9 +194,9 @@ extern struct nv30_state_entry nv30_state_fragtex; extern struct nv30_state_entry nv30_state_vbo; /* nv30_vbo.c */ -extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv30_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv30_draw_elements(struct pipe_context *pipe, +extern void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 0ce702d6f84..2d565cb631b 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -237,20 +237,20 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) { struct nv30_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: src = nv30_sr(NV30SR_INPUT, - fpc->attrib_map[fsrc->SrcRegister.Index]); + fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->SrcRegister.Index, NULL); + src = constant(fpc, fsrc->Register.Index, NULL); break; case TGSI_FILE_IMMEDIATE: - assert(fsrc->SrcRegister.Index < fpc->nr_imm); - src = fpc->imm[fsrc->SrcRegister.Index]; + assert(fsrc->Register.Index < fpc->nr_imm); + src = fpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); + src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1); if (fpc->high_temp < src.index) fpc->high_temp = src.index; break; @@ -258,7 +258,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) * Luckily fragprog results are just temp regs.. */ case TGSI_FILE_OUTPUT: - if (fsrc->SrcRegister.Index == fpc->colour_id) + if (fsrc->Register.Index == fpc->colour_id) return nv30_sr(NV30SR_OUTPUT, 0); else return nv30_sr(NV30SR_OUTPUT, 1); @@ -268,12 +268,12 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -281,22 +281,22 @@ static INLINE struct nv30_sreg tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { int idx; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - if (fdst->DstRegister.Index == fpc->colour_id) + if (fdst->Register.Index == fpc->colour_id) return nv30_sr(NV30SR_OUTPUT, 0); else return nv30_sr(NV30SR_OUTPUT, 1); break; case TGSI_FILE_TEMPORARY: - idx = fdst->DstRegister.Index + 1; + idx = fdst->Register.Index + 1; if (fpc->high_temp < idx) fpc->high_temp = idx; return nv30_sr(NV30SR_TEMP, idx); case TGSI_FILE_NULL: return nv30_sr(NV30SR_NONE, 0); default: - NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); return nv30_sr(NV30SR_NONE, 0); } } @@ -363,8 +363,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(fpc, fsrc); } } @@ -372,9 +372,9 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -385,14 +385,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { NOUVEAU_MSG("extra src attr %d\n", - fsrc->SrcRegister.Index); + fsrc->Register.Index); src[i] = temp(fpc); arith(fpc, 0, MOV, src[i], MASK_ALL, tgsi_src(fpc, fsrc), none, none); @@ -400,8 +400,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -413,7 +413,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, /* handled above */ break; case TGSI_FILE_SAMPLER: - unit = fsrc->SrcRegister.Index; + unit = fsrc->Register.Index; break; case TGSI_FILE_OUTPUT: break; @@ -423,8 +423,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, } } - dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(fpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); switch (finst->Instruction.Opcode) { @@ -435,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv30_sr(NV30SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV30_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV30_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -517,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SIN: @@ -572,15 +588,15 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV30_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_FP_OP_INPUT_SRC_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -591,9 +607,9 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, hw = NV30_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { + if (fdec->Semantic.Index <= 7) { hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. - SemanticIndex); + Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -604,7 +620,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, return FALSE; } - fpc->attrib_map[fdec->DeclarationRange.First] = hw; + fpc->attrib_map[fdec->Range.First] = hw; return TRUE; } @@ -612,12 +628,12 @@ static boolean nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, const struct tgsi_full_declaration *fdec) { - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->DeclarationRange.First; + fpc->depth_id = fdec->Range.First; break; case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->DeclarationRange.First; + fpc->colour_id = fdec->Range.First; break; default: NOUVEAU_ERR("bad output semantic\n"); @@ -653,9 +669,9 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc) goto out_err; break; /*case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break;*/ default: @@ -821,7 +837,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv30_fragprog_upload(nv30, fp); - so = so_new(8, 1); + so = so_new(4, 4, 1); so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | @@ -870,6 +886,12 @@ void nv30_fragprog_destroy(struct nv30_context *nv30, struct nv30_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index dca760cae62..98935678911 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -74,9 +74,9 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txf = tf->format; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); - txf |= log2i(pt->width[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; - txf |= log2i(pt->height[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; - txf |= log2i(pt->depth[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; + txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; + txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; + txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; switch (pt->target) { @@ -106,7 +106,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(1, 8, 2); so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -115,8 +115,8 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); so_data (so, txs); so_data (so, ps->filt | 0x2000 /*voodoo*/); - so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | - pt->height[0]); + so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | + pt->height0); so_data (so, ps->bcol); return so; @@ -135,7 +135,7 @@ nv30_fragtex_validate(struct nv30_context *nv30) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 280696d4503..8fbba38e78f 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -1,14 +1,17 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" +#include "util/u_math.h" #include "nv30_context.h" +#include "../nv04/nv04_surface_2d.h" static void nv30_miptree_layout(struct nv30_miptree *nv30mt) { struct pipe_texture *pt = &nv30mt->base; - uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint width = pt->width0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -21,29 +24,21 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) nr_faces = 6; } else if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth[0]; + nr_faces = pt->depth0; } else { nr_faces = 1; } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv30mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + nv30mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); else - nv30mt->level[l].pitch = pt->width[l] * pt->block.size; + nv30mt->level[l].pitch = util_format_get_stride(pt->format, width); nv30mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); + width = u_minify(width, 1); } for (f = 0; f < nr_faces; f++) { @@ -51,14 +46,14 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) nv30mt->level[l].image_offset[f] = offset; if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - pt->width[l + 1] > 1 && pt->height[l + 1] > 1) - offset += align(nv30mt->level[l].pitch * pt->height[l], 64); + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64); else - offset += nv30mt->level[l].pitch * pt->height[l]; + offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); } nv30mt->level[l].image_offset[f] = offset; - offset += nv30mt->level[l].pitch * pt->height[l]; + offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); } nv30mt->total_size = offset; @@ -79,8 +74,8 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.screen = pscreen; /* Swizzled textures must be POT */ - if (pt->width[0] & (pt->width[0] - 1) || - pt->height[0] & (pt->height[0] - 1)) + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | @@ -114,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv30_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, @@ -122,6 +123,7 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) FREE(mt); return NULL; } + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -134,7 +136,7 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv30_miptree); @@ -151,6 +153,7 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -182,8 +185,8 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; @@ -200,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = nv30mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv30_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv30_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c index 1d1c8a484e1..e27e9ccbf60 100644 --- a/src/gallium/drivers/nv30/nv30_query.c +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -41,6 +41,9 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_query *q = nv30_query(pq); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv30->screen->query, q->object->start); - BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -69,12 +72,15 @@ static void nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_query *q = nv30_query(pq); - BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 7cd36902eb4..9ed48178dc2 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -156,6 +156,12 @@ static void nv30_screen_destroy(struct pipe_screen *pscreen) { struct nv30_screen *screen = nv30_screen(pscreen); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -163,6 +169,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->rankine); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } @@ -224,7 +233,6 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->rankine, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -261,7 +269,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static rankine initialisation */ - so = so_new(128, 0); + so = so_new(36, 60, 0); so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index b91e972c123..a80dfb04880 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -14,7 +14,7 @@ nv30_blend_state_create(struct pipe_context *pipe, struct nv30_context *nv30 = nv30_context(pipe); struct nouveau_grobj *rankine = nv30->screen->rankine; struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); @@ -300,7 +300,7 @@ nv30_rasterizer_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(9, 19, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; /*XXX: ignored: @@ -435,7 +435,7 @@ nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(5, 21, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); @@ -672,16 +672,6 @@ nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count, /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ } -static void -nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->edgeflags = bitfield; - nv30->dirty |= NV30_NEW_ARRAYS; - /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ -} - void nv30_init_state_functions(struct nv30_context *nv30) { @@ -690,9 +680,9 @@ nv30_init_state_functions(struct nv30_context *nv30) nv30->pipe.delete_blend_state = nv30_blend_state_delete; nv30->pipe.create_sampler_state = nv30_sampler_state_create; - nv30->pipe.bind_sampler_states = nv30_sampler_state_bind; + nv30->pipe.bind_fragment_sampler_states = nv30_sampler_state_bind; nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; - nv30->pipe.set_sampler_textures = nv30_set_sampler_texture; + nv30->pipe.set_fragment_sampler_textures = nv30_set_sampler_texture; nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; @@ -721,7 +711,6 @@ nv30_init_state_functions(struct nv30_context *nv30) nv30->pipe.set_scissor_state = nv30_set_scissor_state; nv30->pipe.set_viewport_state = nv30_set_viewport_state; - nv30->pipe.set_edgeflags = nv30_set_edgeflags; nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers; nv30->pipe.set_vertex_elements = nv30_set_vertex_elements; } diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index e6f23bf1667..e42e872de75 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -72,6 +72,7 @@ struct nv30_fragment_program { struct nv30_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c index 64cf9ae93a0..c36d58c040c 100644 --- a/src/gallium/drivers/nv30/nv30_state_blend.c +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -18,7 +18,7 @@ struct nv30_state_entry nv30_state_blend = { static boolean nv30_state_blend_colour_validate(struct nv30_context *nv30) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv30->blend_colour; so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 621b8846c8e..ac52d946f02 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -41,7 +41,7 @@ nv30_state_emit(struct nv30_context *nv30) struct nouveau_channel *chan = nv30->screen->base.channel; struct nv30_state *state = &nv30->state; struct nv30_screen *screen = nv30->screen; - unsigned i, samplers; + unsigned i; uint64_t states; if (nv30->pctx_id != screen->cur_pctx) { @@ -63,6 +63,14 @@ nv30_state_emit(struct nv30_context *nv30) } state->dirty = 0; +} + +void +nv30_state_flush_notify(struct nouveau_channel *chan) +{ + struct nv30_context *nv30 = chan->user_private; + struct nv30_state *state = &nv30->state; + unsigned i, samplers; so_emit_reloc_markers(chan, state->hw[NV30_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 6f6d1740d6e..2ed2ea55e84 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -10,7 +10,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) struct nv04_surface *rt[2], *zeta = NULL; uint32_t rt_enable = 0, rt_format = 0; int i, colour_format = 0, zeta_format = 0, depth_only = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(12, 18, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c index 3ac7a8471ea..ba61a9e24a4 100644 --- a/src/gallium/drivers/nv30/nv30_state_scissor.c +++ b/src/gallium/drivers/nv30/nv30_state_scissor.c @@ -12,7 +12,7 @@ nv30_state_scissor_validate(struct nv30_context *nv30) return FALSE; nv30->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); if (nv30->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c index d0c791ac082..ed520a4f439 100644 --- a/src/gallium/drivers/nv30/nv30_state_stipple.c +++ b/src/gallium/drivers/nv30/nv30_state_stipple.c @@ -14,14 +14,14 @@ nv30_state_stipple_validate(struct nv30_context *nv30) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv30->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c index c3eb413dac6..2d7781292bd 100644 --- a/src/gallium/drivers/nv30/nv30_state_viewport.c +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -19,7 +19,7 @@ nv30_state_viewport_validate(struct nv30_context *nv30) return FALSE; nv30->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(3, 10, 0); if (!bypass) { so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 98011decf7c..65598991c68 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -1,7 +1,9 @@ #include <pipe/p_state.h> #include <pipe/p_defines.h> #include <pipe/p_inlines.h> +#include <util/u_format.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv30_context.h" #include "nv30_screen.h" @@ -10,22 +12,19 @@ struct nv30_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void -nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; + template->width0 = width; + template->height0 = height; + template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -48,14 +47,10 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -76,7 +71,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv30_compatible_transfer_tex(pt, level, &tx_tex_template); + nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -85,6 +80,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -110,8 +107,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -131,13 +128,13 @@ nv30_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -156,8 +153,10 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index 189656ec817..1c5db03ea24 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -163,19 +163,21 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; nv30_vbo_set_idxbuf(nv30, NULL, 0); if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } while (count) { @@ -186,17 +188,17 @@ nv30_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,15 +208,15 @@ nv30_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; @@ -228,7 +230,9 @@ static INLINE void nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv30_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv30_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv30_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; while (count) { @@ -412,17 +421,17 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -461,7 +468,7 @@ nv30_draw_elements(struct pipe_context *pipe, if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } if (idxbuf) { @@ -472,7 +479,6 @@ nv30_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -485,14 +491,9 @@ nv30_vbo_validate(struct nv30_context *nv30) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - if (nv30->edgeflags) { - /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ - return FALSE; - } - - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); for (hw = 0; hw < nv30->vtxelt_nr; hw++) { @@ -505,7 +506,7 @@ nv30_vbo_validate(struct nv30_context *nv30) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 14a5c0260d0..e77a5be3f23 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -253,32 +253,32 @@ static INLINE struct nv30_sreg tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nv30_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; + src = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->SrcRegister.Index) - vpc->high_temp = fsrc->SrcRegister.Index; - src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + if (vpc->high_temp < fsrc->Register.Index) + vpc->high_temp = fsrc->Register.Index; + src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index); break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -286,14 +286,14 @@ static INLINE struct nv30_sreg tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { struct nv30_sreg dst; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: dst = nv30_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->DstRegister.Index]); + vpc->output_map[fdst->Register.Index]); break; case TGSI_FILE_TEMPORARY: - dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index); + dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index); if (vpc->high_temp < dst.index) vpc->high_temp = dst.index; break; @@ -334,8 +334,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } @@ -343,11 +343,11 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - switch (fsrc->SrcRegister.File) { + fsrc = &finst->Src[i]; + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -360,8 +360,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, */ case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -378,8 +378,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: @@ -490,15 +490,15 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV30_VP_INST_DEST_POS; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -506,10 +506,10 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, } break; case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_BFC0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_BFC1; } else { NOUVEAU_ERR("bad bcolour semantic index\n"); @@ -523,19 +523,22 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, hw = NV30_VP_INST_DEST_PSZ; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; } break; + case TGSI_SEMANTIC_EDGEFLAG: + NOUVEAU_ERR("cannot handle edgeflag output\n"); + return FALSE; default: NOUVEAU_ERR("bad output semantic\n"); return FALSE; } - vpc->output_map[fdec->DeclarationRange.First] = hw; + vpc->output_map[fdec->Range.First] = hw; return TRUE; } @@ -647,7 +650,9 @@ static boolean nv30_vertprog_validate(struct nv30_context *nv30) { struct pipe_screen *pscreen = nv30->pipe.screen; - struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -681,7 +686,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) assert(0); } - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_ref(so, &vp->so); @@ -767,9 +772,9 @@ nv30_vertprog_validate(struct nv30_context *nv30) 4 * sizeof(float)); } - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -785,11 +790,11 @@ nv30_vertprog_validate(struct nv30_context *nv30) vp->insns[i].data[2], vp->insns[i].data[3]); } #endif - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 7f008274a4e..f79ae4db84e 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -10,21 +10,32 @@ nv40_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void nv40_destroy(struct pipe_context *pipe) { struct nv40_context *nv40 = nv40_context(pipe); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (nv40->state.hw[i]) + so_ref(NULL, &nv40->state.hw[i]); + } if (nv40->draw) draw_destroy(nv40->draw); @@ -58,6 +69,9 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced; nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; + screen->base.channel->user_private = nv40; + screen->base.channel->flush_notify = nv40_state_flush_notify; + nv40_init_query_functions(nv40); nv40_init_surface_functions(nv40); nv40_init_state_functions(nv40); diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index a3d594167aa..e219bb537ac 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv40_screen *ctx = nv40->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv40_state.h" @@ -159,7 +155,6 @@ struct nv40_context { unsigned vtxbuf_nr; struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; unsigned vtxelt_nr; - const unsigned *edgeflags; }; static INLINE struct nv40_context * @@ -184,7 +179,7 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); -extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, +extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned ib_size, unsigned mode, unsigned start, unsigned count); @@ -204,6 +199,7 @@ extern void nv40_fragtex_bind(struct nv40_context *); extern boolean nv40_state_validate(struct nv40_context *nv40); extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); extern void nv40_state_emit(struct nv40_context *nv40); +extern void nv40_state_flush_notify(struct nouveau_channel *chan); extern struct nv40_state_entry nv40_state_rasterizer; extern struct nv40_state_entry nv40_state_scissor; extern struct nv40_state_entry nv40_state_stipple; @@ -219,9 +215,9 @@ extern struct nv40_state_entry nv40_state_vbo; extern struct nv40_state_entry nv40_state_vtxfmt; /* nv40_vbo.c */ -extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv40_draw_elements(struct pipe_context *pipe, +extern void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index b2f19ecb699..d826f8c2f5f 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -31,6 +31,9 @@ nv40_render_stage(struct draw_stage *stage) static INLINE void nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) { + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned i; for (i = 0; i < nv40->swtnl.nr_attribs; i++) { @@ -41,30 +44,30 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) case EMIT_OMIT: break; case EMIT_1F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (fui(v->data[idx][0])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (chan, fui(v->data[idx][0])); break; case EMIT_2F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); break; case EMIT_3F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); break; case EMIT_4F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); - OUT_RING (fui(v->data[idx][3])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + OUT_RING (chan, fui(v->data[idx][3])); break; case EMIT_4UB: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), float_to_ubyte(v->data[idx][1]), float_to_ubyte(v->data[idx][2]), float_to_ubyte(v->data[idx][3]))); @@ -82,7 +85,11 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, { struct nv40_render_stage *rs = nv40_render_stage(stage); struct nv40_context *nv40 = rs->nv40; - struct nouveau_pushbuf *pb = nv40->screen->base.channel->pushbuf; + + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *curie = screen->curie; unsigned i; /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ @@ -91,19 +98,19 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, NOUVEAU_ERR("AIII, missed flush\n"); assert(0); } - FIRE_RING(NULL); + FIRE_RING(chan); nv40_state_emit(nv40); } /* Switch primitive modes if necessary */ if (rs->prim != mode) { if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (mode); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, mode); rs->prim = mode; } @@ -115,8 +122,8 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, * off the primitive now. */ if (pb->remaining < ((count * 20) + 6)) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -144,10 +151,13 @@ nv40_render_flush(struct draw_stage *draw, unsigned flags) { struct nv40_render_stage *rs = nv40_render_stage(draw); struct nv40_context *nv40 = rs->nv40; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -226,7 +236,7 @@ nv40_draw_render_stage(struct nv40_context *nv40) return &render->stage; } -boolean +void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned idxbuf_size, unsigned mode, unsigned start, unsigned count) @@ -237,7 +247,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, void *map; if (!nv40_state_validate_swtnl(nv40)) - return FALSE; + return; nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); nv40_state_emit(nv40); @@ -261,7 +271,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, map = pipe_buffer_map(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nv40->draw, map, nr); + draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, + map, nr); } draw_arrays(nv40->draw, mode, start, count); @@ -277,15 +288,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, draw_flush(nv40->draw); pipe->flush(pipe, 0, NULL); - - return TRUE; } static INLINE void emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, unsigned semantic, unsigned index) { - unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); + unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index); unsigned a = nv40->swtnl.nr_attribs++; nv40->swtnl.hw[a] = hw; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 99277506fc2..1237066c398 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -149,7 +149,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); break; case NV40SR_NONE: sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); @@ -255,50 +255,50 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) { struct nv40_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: src = nv40_sr(NV40SR_INPUT, - fpc->attrib_map[fsrc->SrcRegister.Index]); + fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->SrcRegister.Index, NULL); + src = constant(fpc, fsrc->Register.Index, NULL); break; case TGSI_FILE_IMMEDIATE: - assert(fsrc->SrcRegister.Index < fpc->nr_imm); - src = fpc->imm[fsrc->SrcRegister.Index]; + assert(fsrc->Register.Index < fpc->nr_imm); + src = fpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = fpc->r_temp[fsrc->SrcRegister.Index]; + src = fpc->r_temp[fsrc->Register.Index]; break; /* NV40 fragprog result regs are just temps, so this is simple */ case TGSI_FILE_OUTPUT: - src = fpc->r_result[fsrc->SrcRegister.Index]; + src = fpc->r_result[fsrc->Register.Index]; break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } static INLINE struct nv40_sreg tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - return fpc->r_result[fdst->DstRegister.Index]; + return fpc->r_result[fdst->Register.Index]; case TGSI_FILE_TEMPORARY: - return fpc->r_temp[fdst->DstRegister.Index]; + return fpc->r_temp[fdst->Register.Index]; case TGSI_FILE_NULL: return nv40_sr(NV40SR_NONE, 0); default: - NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); return nv40_sr(NV40SR_NONE, 0); } } @@ -364,8 +364,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(fpc, fsrc); } } @@ -373,9 +373,9 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -386,10 +386,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -399,8 +399,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; case TGSI_FILE_CONSTANT: if ((ci == -1 && ii == -1) || - ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -410,8 +410,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; case TGSI_FILE_IMMEDIATE: if ((ci == -1 && ii == -1) || - ii == fsrc->SrcRegister.Index) { - ii = fsrc->SrcRegister.Index; + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -423,7 +423,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, /* handled above */ break; case TGSI_FILE_SAMPLER: - unit = fsrc->SrcRegister.Index; + unit = fsrc->Register.Index; break; case TGSI_FILE_OUTPUT: break; @@ -433,8 +433,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, } } - dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(fpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); switch (finst->Instruction.Opcode) { @@ -445,10 +445,11 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv40_sr(NV40SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV40_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -573,13 +574,28 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, neg(swz(tmp, X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SEQ: @@ -644,15 +660,15 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV40_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV40_FP_OP_INPUT_SRC_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV40_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -663,9 +679,9 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, hw = NV40_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { + if (fdec->Semantic.Index <= 7) { hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. - SemanticIndex); + Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -676,7 +692,7 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, return FALSE; } - fpc->attrib_map[fdec->DeclarationRange.First] = hw; + fpc->attrib_map[fdec->Range.First] = hw; return TRUE; } @@ -684,15 +700,15 @@ static boolean nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, const struct tgsi_full_declaration *fdec) { - unsigned idx = fdec->DeclarationRange.First; + unsigned idx = fdec->Range.First; unsigned hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = 1; break; case TGSI_SEMANTIC_COLOR: - switch (fdec->Semantic.SemanticIndex) { + switch (fdec->Semantic.Index) { case 0: hw = 0; break; case 1: hw = 2; break; case 2: hw = 3; break; @@ -738,9 +754,9 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) goto out_err; break; case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break; default: @@ -752,7 +768,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) { struct tgsi_full_immediate *imm; float vals[4]; - + imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); @@ -836,7 +852,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, fp->insn[fpc->inst_offset + 1] = 0x00000000; fp->insn[fpc->inst_offset + 2] = 0x00000000; fp->insn[fpc->inst_offset + 3] = 0x00000000; - + fp->translated = TRUE; out_err: tgsi_parse_free(&parse); @@ -903,7 +919,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv40_fragprog_upload(nv40, fp); - so = so_new(4, 1); + so = so_new(2, 2, 1); so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | @@ -917,7 +933,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) update_constants: if (fp->nr_consts) { float *map; - + map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { @@ -948,6 +964,12 @@ void nv40_fragprog_destroy(struct nv40_context *nv40, struct nv40_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index e2ec57564d1..aad9198210f 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -108,7 +108,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(2, 9, 2); so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -117,11 +117,11 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); so_data (so, txs); so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); - so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | - pt->height[0]); + so_data (so, (pt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) | + pt->height0); so_data (so, ps->bcol); so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); - so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); return so; } @@ -139,7 +139,7 @@ nv40_fragtex_validate(struct nv40_context *nv40) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 465dd3b0693..89bd155ff49 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -1,14 +1,19 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" +#include "util/u_math.h" #include "nv40_context.h" +#include "../nv04/nv04_surface_2d.h" + + static void nv40_miptree_layout(struct nv40_miptree *mt) { struct pipe_texture *pt = &mt->base; - uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint width = pt->width0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -21,29 +26,21 @@ nv40_miptree_layout(struct nv40_miptree *mt) nr_faces = 6; } else if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth[0]; + nr_faces = pt->depth0; } else { nr_faces = 1; } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); else - mt->level[l].pitch = pt->width[l] * pt->block.size; + mt->level[l].pitch = util_format_get_stride(pt->format, width); mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); + width = u_minify(width, 1); } for (f = 0; f < nr_faces; f++) { @@ -51,14 +48,14 @@ nv40_miptree_layout(struct nv40_miptree *mt) mt->level[l].image_offset[f] = offset; if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - pt->width[l + 1] > 1 && pt->height[l + 1] > 1) - offset += align(mt->level[l].pitch * pt->height[l], 64); + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64); else - offset += mt->level[l].pitch * pt->height[l]; + offset += mt->level[l].pitch * u_minify(pt->height0, l); } mt->level[l].image_offset[f] = offset; - offset += mt->level[l].pitch * pt->height[l]; + offset += mt->level[l].pitch * u_minify(pt->height0, l); } mt->total_size = offset; @@ -79,8 +76,8 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.screen = pscreen; /* Swizzled textures must be POT */ - if (pt->width[0] & (pt->width[0] - 1) || - pt->height[0] & (pt->height[0] - 1)) + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | @@ -109,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv40_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); @@ -116,7 +119,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) FREE(mt); return NULL; } - + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -128,7 +131,7 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv40_miptree); @@ -145,6 +148,7 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -176,8 +180,8 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; @@ -194,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv40_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv40_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 7874aedd428..8ed4a67dd03 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -41,6 +41,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv40->screen->query, q->object->start); - BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -70,11 +73,14 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; - BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index bd13dfddd1c..9e55e5a089c 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -140,6 +140,12 @@ static void nv40_screen_destroy(struct pipe_screen *pscreen) { struct nv40_screen *screen = nv40_screen(pscreen); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -147,6 +153,7 @@ nv40_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->curie); + nv04_surface_2d_takedown(&screen->eng2d); nouveau_screen_fini(&screen->base); @@ -208,7 +215,6 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->curie, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -245,7 +251,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static curie initialisation */ - so = so_new(128, 0); + so = so_new(16, 25, 0); so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index c3ee4d23453..ed0ca9e02c3 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -16,7 +16,7 @@ nv40_blend_state_create(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); struct nouveau_grobj *curie = nv40->screen->curie; struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); @@ -310,7 +310,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(8, 18, 0); struct nouveau_grobj *curie = nv40->screen->curie; /*XXX: ignored: @@ -445,7 +445,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(4, 21, 0); struct nouveau_grobj *curie = nv40->screen->curie; so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); @@ -687,16 +687,6 @@ nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, nv40->draw_dirty |= NV40_NEW_ARRAYS; } -static void -nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->edgeflags = bitfield; - nv40->dirty |= NV40_NEW_ARRAYS; - nv40->draw_dirty |= NV40_NEW_ARRAYS; -} - void nv40_init_state_functions(struct nv40_context *nv40) { @@ -705,9 +695,9 @@ nv40_init_state_functions(struct nv40_context *nv40) nv40->pipe.delete_blend_state = nv40_blend_state_delete; nv40->pipe.create_sampler_state = nv40_sampler_state_create; - nv40->pipe.bind_sampler_states = nv40_sampler_state_bind; + nv40->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; - nv40->pipe.set_sampler_textures = nv40_set_sampler_texture; + nv40->pipe.set_fragment_sampler_textures = nv40_set_sampler_texture; nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; @@ -736,7 +726,6 @@ nv40_init_state_functions(struct nv40_context *nv40) nv40->pipe.set_scissor_state = nv40_set_scissor_state; nv40->pipe.set_viewport_state = nv40_set_viewport_state; - nv40->pipe.set_edgeflags = nv40_set_edgeflags; nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; nv40->pipe.set_vertex_elements = nv40_set_vertex_elements; } diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index 8a9d8c8fdf6..192074e7471 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -75,6 +75,7 @@ struct nv40_fragment_program { struct nv40_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c index 8cd05ce66ef..3ff00a37f66 100644 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -18,7 +18,7 @@ struct nv40_state_entry nv40_state_blend = { static boolean nv40_state_blend_colour_validate(struct nv40_context *nv40) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv40->blend_colour; so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 198692965dc..13fe854915b 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -54,10 +54,11 @@ nv40_state_do_validate(struct nv40_context *nv40, void nv40_state_emit(struct nv40_context *nv40) { - struct nouveau_channel *chan = nv40->screen->base.channel; struct nv40_state *state = &nv40->state; struct nv40_screen *screen = nv40->screen; - unsigned i, samplers; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; + unsigned i; uint64_t states; if (nv40->pctx_id != screen->cur_pctx) { @@ -80,13 +81,21 @@ nv40_state_emit(struct nv40_context *nv40) if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | (1ULL << NV40_STATE_FRAGTEX0))) { - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 1); } state->dirty = 0; +} + +void +nv40_state_flush_notify(struct nouveau_channel *chan) +{ + struct nv40_context *nv40 = chan->user_private; + struct nv40_state *state = &nv40->state; + unsigned i, samplers; so_emit_reloc_markers(chan, state->hw[NV40_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { @@ -160,7 +169,6 @@ nv40_state_validate_swtnl(struct nv40_context *nv40) draw_set_viewport_state(draw, &nv40->viewport); if (nv40->draw_dirty & NV40_NEW_ARRAYS) { - draw_set_edgeflags(draw, nv40->edgeflags); draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt); } diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 1c7a7cd64f0..a58fe9ddb19 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -19,7 +19,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) struct nv04_surface *rt[4], *zeta; uint32_t rt_enable, rt_format; int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(18, 24, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index cf58d33906a..753a505e934 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -12,7 +12,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40) return FALSE; nv40->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); if (nv40->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index b51024ad9b2..2b371ebfec0 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -14,14 +14,14 @@ nv40_state_stipple_validate(struct nv40_context *nv40) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv40->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 665d2d5fcac..9919ba1d0b0 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -19,7 +19,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40) return FALSE; nv40->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(2, 9, 0); if (!bypass) { so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index 92caee6f382..791ee6823d3 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -1,7 +1,9 @@ #include <pipe/p_state.h> #include <pipe/p_defines.h> #include <pipe/p_inlines.h> +#include <util/u_format.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv40_context.h" #include "nv40_screen.h" @@ -10,22 +12,19 @@ struct nv40_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void -nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; + template->width0 = width; + template->height0 = height; + template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -48,14 +47,10 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -76,7 +71,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv40_compatible_transfer_tex(pt, level, &tx_tex_template); + nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -85,6 +80,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -110,8 +107,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -131,13 +128,13 @@ nv40_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -156,8 +153,10 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index b2753b8e2e0..a777898f688 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -164,18 +164,21 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; nv40_vbo_set_idxbuf(nv40, NULL, 0); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } while (count) { @@ -186,17 +189,17 @@ nv40_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,29 +209,30 @@ nv40_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } pipe->flush(pipe, 0, NULL); - return TRUE; } static INLINE void nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv40_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv40_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv40_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; while (count) { @@ -412,17 +421,17 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -459,8 +466,9 @@ nv40_draw_elements(struct pipe_context *pipe, idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } if (idxbuf) { @@ -471,7 +479,6 @@ nv40_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -484,14 +491,9 @@ nv40_vbo_validate(struct nv40_context *nv40) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - if (nv40->edgeflags) { - nv40->fallback_swtnl |= NV40_NEW_ARRAYS; - return FALSE; - } - - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); for (hw = 0; hw < nv40->vtxelt_nr; hw++) { @@ -504,7 +506,7 @@ nv40_vbo_validate(struct nv40_context *nv40) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 31dae2457fd..8d80fcad38e 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -295,30 +295,30 @@ static INLINE struct nv40_sreg tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nv40_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); + src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; + src = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = vpc->r_temp[fsrc->SrcRegister.Index]; + src = vpc->r_temp[fsrc->Register.Index]; break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -326,15 +326,15 @@ static INLINE struct nv40_sreg tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { struct nv40_sreg dst; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - dst = vpc->r_result[fdst->DstRegister.Index]; + dst = vpc->r_result[fdst->Register.Index]; break; case TGSI_FILE_TEMPORARY: - dst = vpc->r_temp[fdst->DstRegister.Index]; + dst = vpc->r_temp[fdst->Register.Index]; break; case TGSI_FILE_ADDRESS: - dst = vpc->r_address[fdst->DstRegister.Index]; + dst = vpc->r_address[fdst->Register.Index]; break; default: NOUVEAU_ERR("bad dst file\n"); @@ -405,8 +405,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } @@ -414,9 +414,9 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -427,10 +427,10 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -440,8 +440,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, break; case TGSI_FILE_CONSTANT: if ((ci == -1 && ii == -1) || - ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -451,8 +451,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, break; case TGSI_FILE_IMMEDIATE: if ((ci == -1 && ii == -1) || - ii == fsrc->SrcRegister.Index) { - ii = fsrc->SrcRegister.Index; + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -469,8 +469,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: @@ -577,19 +577,19 @@ static boolean nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, const struct tgsi_full_declaration *fdec) { - unsigned idx = fdec->DeclarationRange.First; + unsigned idx = fdec->Range.First; int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV40_VP_INST_DEST_POS; vpc->hpos_idx = idx; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV40_VP_INST_DEST_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV40_VP_INST_DEST_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -597,10 +597,10 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, } break; case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV40_VP_INST_DEST_BFC0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV40_VP_INST_DEST_BFC1; } else { NOUVEAU_ERR("bad bcolour semantic index\n"); @@ -614,13 +614,17 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, hw = NV40_VP_INST_DEST_PSZ; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; } break; + case TGSI_SEMANTIC_EDGEFLAG: + /* not really an error just a fallback */ + NOUVEAU_ERR("cannot handle edgeflag output\n"); + return FALSE; default: NOUVEAU_ERR("bad output semantic\n"); return FALSE; @@ -652,16 +656,16 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break; #if 0 /* this would be nice.. except gallium doesn't track it */ case TGSI_FILE_ADDRESS: - if (fdec->DeclarationRange.Last > high_addr) { + if (fdec->Range.Last > high_addr) { high_addr = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break; #endif @@ -681,11 +685,11 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) const struct tgsi_full_dst_register *fdst; finst = &p.FullToken.FullInstruction; - fdst = &finst->FullDstRegisters[0]; + fdst = &finst->Dst[0]; - if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { - if (fdst->DstRegister.Index > high_addr) - high_addr = fdst->DstRegister.Index; + if (fdst->Register.File == TGSI_FILE_ADDRESS) { + if (fdst->Register.Index > high_addr) + high_addr = fdst->Register.Index; } } @@ -830,7 +834,9 @@ static boolean nv40_vertprog_validate(struct nv40_context *nv40) { struct pipe_screen *pscreen = nv40->pipe.screen; - struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; struct nv40_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -880,7 +886,7 @@ check_gpu_resources: assert(0); } - so = so_new(7, 0); + so = so_new(3, 4, 0); so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); @@ -970,9 +976,9 @@ check_gpu_resources: 4 * sizeof(float)); } - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -989,11 +995,11 @@ check_gpu_resources: NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); } #endif - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 219e7a78623..5997456e4c9 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -43,16 +43,44 @@ nv50_destroy(struct pipe_context *pipe) { struct nv50_context *nv50 = nv50_context(pipe); + if (nv50->state.fb) + so_ref(NULL, &nv50->state.fb); + if (nv50->state.blend) + so_ref(NULL, &nv50->state.blend); + if (nv50->state.blend_colour) + so_ref(NULL, &nv50->state.blend_colour); + if (nv50->state.zsa) + so_ref(NULL, &nv50->state.zsa); + if (nv50->state.rast) + so_ref(NULL, &nv50->state.rast); + if (nv50->state.stipple) + so_ref(NULL, &nv50->state.stipple); + if (nv50->state.scissor) + so_ref(NULL, &nv50->state.scissor); + if (nv50->state.viewport) + so_ref(NULL, &nv50->state.viewport); + if (nv50->state.tsc_upload) + so_ref(NULL, &nv50->state.tsc_upload); + if (nv50->state.tic_upload) + so_ref(NULL, &nv50->state.tic_upload); + if (nv50->state.vertprog) + so_ref(NULL, &nv50->state.vertprog); + if (nv50->state.fragprog) + so_ref(NULL, &nv50->state.fragprog); + if (nv50->state.programs) + so_ref(NULL, &nv50->state.programs); + if (nv50->state.vtxfmt) + so_ref(NULL, &nv50->state.vtxfmt); + if (nv50->state.vtxbuf) + so_ref(NULL, &nv50->state.vtxbuf); + if (nv50->state.vtxattr) + so_ref(NULL, &nv50->state.vtxattr); + draw_destroy(nv50->draw); FREE(nv50); } -static void -nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) -{ -} - struct pipe_context * nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -71,7 +99,6 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) nv50->pipe.destroy = nv50_destroy; - nv50->pipe.set_edgeflags = nv50_set_edgeflags; nv50->pipe.draw_arrays = nv50_draw_arrays; nv50->pipe.draw_elements = nv50_draw_elements; nv50->pipe.clear = nv50_clear; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 4b0f0622953..cbd4c3ff86d 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -65,7 +65,7 @@ struct nv50_rasterizer_stateobj { }; struct nv50_sampler_stateobj { - bool normalized; + boolean normalized; unsigned tsc[8]; }; @@ -126,7 +126,7 @@ struct nv50_state { unsigned viewport_bypass; struct nouveau_stateobj *tsc_upload; struct nouveau_stateobj *tic_upload; - unsigned miptree_nr; + unsigned miptree_nr[PIPE_SHADER_TYPES]; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; struct nouveau_stateobj *programs; @@ -162,10 +162,10 @@ struct nv50_context { unsigned vtxbuf_nr; struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; unsigned vtxelt_nr; - struct nv50_sampler_stateobj *sampler[PIPE_MAX_SAMPLERS]; - unsigned sampler_nr; - struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS]; - unsigned miptree_nr; + struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + unsigned sampler_nr[PIPE_SHADER_TYPES]; + struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + unsigned miptree_nr[PIPE_SHADER_TYPES]; uint16_t vbo_fifo; }; @@ -191,9 +191,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ -extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv50_draw_elements(struct pipe_context *pipe, +extern void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, @@ -218,7 +218,7 @@ extern void nv50_state_flush_notify(struct nouveau_channel *chan); extern void nv50_so_init_sifc(struct nv50_context *nv50, struct nouveau_stateobj *so, struct nouveau_bo *bo, unsigned reloc, - unsigned size); + unsigned offset, unsigned size); /* nv50_tex.c */ extern void nv50_tex_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 9c20c5cc282..3f1edf0a139 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -23,6 +23,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "nv50_context.h" @@ -55,14 +56,28 @@ get_tile_mode(unsigned ny, unsigned d) return tile_mode | 0x10; } +static INLINE unsigned +get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned nb_h) +{ + unsigned tile_h = get_tile_height(tile_mode); + unsigned tile_d = get_tile_depth(tile_mode); + + /* pitch_2d == to next slice within this volume-tile */ + /* pitch_3d == size (in bytes) of a volume-tile */ + unsigned pitch_2d = tile_h * 64; + unsigned pitch_3d = tile_d * align(nb_h, tile_h) * pitch; + + return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d; +} + static struct pipe_texture * nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) { struct nouveau_device *dev = nouveau_screen(pscreen)->device; struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); struct pipe_texture *pt = &mt->base.base; - unsigned width = tmp->width[0], height = tmp->height[0]; - unsigned depth = tmp->depth[0], image_alignment; + unsigned width = tmp->width0, height = tmp->height0; + unsigned depth = tmp->depth0, image_alignment; uint32_t tile_flags; int ret, i, l; @@ -91,20 +106,15 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) for (l = 0; l <= pt->last_level; l++) { struct nv50_miptree_level *lvl = &mt->level[l]; - - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + unsigned nblocksy = util_format_get_nblocksy(pt->format, height); lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); - lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64); - lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth); + lvl->pitch = align(util_format_get_stride(pt->format, width), 64); + lvl->tile_mode = get_tile_mode(nblocksy, depth); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } image_alignment = get_tile_height(mt->level[0].tile_mode) * 64; @@ -121,8 +131,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) unsigned tile_d = get_tile_depth(lvl->tile_mode); size = lvl->pitch; - size *= align(pt->nblocksy[l], tile_h); - size *= align(pt->depth[l], tile_d); + size *= align(util_format_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h); + size *= align(u_minify(pt->depth0, l), tile_d); lvl->image_offset[i] = mt->total_size; @@ -135,6 +145,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { + for (l = 0; l < pt->last_level; ++l) + FREE(mt->level[l].image_offset); FREE(mt); return NULL; } @@ -151,7 +163,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv50_miptree); @@ -174,6 +186,10 @@ static void nv50_miptree_destroy(struct pipe_texture *pt) { struct nv50_miptree *mt = nv50_miptree(pt); + unsigned l; + + for (l = 0; l < pt->last_level; ++l) + FREE(mt->level[l].image_offset); nouveau_bo_ref(NULL, &mt->base.bo); FREE(mt); @@ -187,23 +203,18 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, struct nv50_miptree *mt = nv50_miptree(pt); struct nv50_miptree_level *lvl = &mt->level[level]; struct pipe_surface *ps; - int img; + unsigned img = 0; if (pt->target == PIPE_TEXTURE_CUBE) img = face; - else - if (pt->target == PIPE_TEXTURE_3D) - img = zslice; - else - img = 0; ps = CALLOC_STRUCT(pipe_surface); if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->usage = flags; pipe_reference_init(&ps->reference, 1); ps->face = face; @@ -211,6 +222,12 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->zslice = zslice; ps->offset = lvl->image_offset[img]; + if (pt->target == PIPE_TEXTURE_3D) { + unsigned nb_h = util_format_get_nblocksy(pt->format, ps->height); + ps->offset += get_zslice_offset(lvl->tile_mode, zslice, + lvl->pitch, nb_h); + } + return ps; } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bf50982dd16..af0759e5038 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -98,9 +98,17 @@ struct nv50_reg { #define NV50_MOD_ABS 2 #define NV50_MOD_SAT 4 -/* arbitrary limits */ -#define MAX_IF_DEPTH 4 -#define MAX_LOOP_DEPTH 4 +/* STACK: Conditionals and loops have to use the (per warp) stack. + * Stack entries consist of an entry type (divergent path, join at), + * a mask indicating the active threads of the warp, and an address. + * MPs can store 12 stack entries internally, if we need more (and + * we probably do), we have to create a stack buffer in VRAM. + */ +/* impose low limits for now */ +#define NV50_MAX_COND_NESTING 4 +#define NV50_MAX_LOOP_NESTING 3 + +#define JOIN_ON(e) e; pc->p->exec_tail->inst[1] |= 2 struct nv50_pc { struct nv50_program *p; @@ -119,10 +127,11 @@ struct nv50_pc { struct nv50_reg *param; int param_nr; struct nv50_reg *immd; - float *immd_buf; + uint32_t *immd_buf; int immd_nr; struct nv50_reg **addr; int addr_nr; + uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; @@ -131,23 +140,30 @@ struct nv50_pc { struct nv50_reg *r_brdc; struct nv50_reg *r_dst[4]; + struct nv50_reg reg_instances[16]; + unsigned reg_instance_nr; + unsigned interp_mode[32]; /* perspective interpolation registers */ struct nv50_reg *iv_p; struct nv50_reg *iv_c; - struct nv50_program_exec *if_cond; - struct nv50_program_exec *if_insn[MAX_IF_DEPTH]; - struct nv50_program_exec *br_join[MAX_IF_DEPTH]; - struct nv50_program_exec *br_loop[MAX_LOOP_DEPTH]; /* for BRK branch */ + struct nv50_program_exec *if_insn[NV50_MAX_COND_NESTING]; + struct nv50_program_exec *if_join[NV50_MAX_COND_NESTING]; + struct nv50_program_exec *loop_brka[NV50_MAX_LOOP_NESTING]; int if_lvl, loop_lvl; - unsigned loop_pos[MAX_LOOP_DEPTH]; + unsigned loop_pos[NV50_MAX_LOOP_NESTING]; + + unsigned *insn_pos; /* actual program offset of each TGSI insn */ + boolean in_subroutine; /* current instruction and total number of insns */ unsigned insn_cur; unsigned insn_nr; boolean allow32; + + uint8_t edgeflag_out; }; static INLINE void @@ -176,8 +192,7 @@ terminate_mbb(struct nv50_pc *pc) /* remove records of temporary address register values */ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - if (pc->r_addr[i].index < 0) - pc->r_addr[i].rhw = -1; + pc->r_addr[i].rhw = -1; } static void @@ -229,6 +244,21 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) assert(0); } +static INLINE struct nv50_reg * +reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) +{ + struct nv50_reg *ri; + + assert(pc->reg_instance_nr < 16); + ri = &pc->reg_instances[pc->reg_instance_nr++]; + if (reg) { + alloc_reg(pc, reg); + *ri = *reg; + reg->mod = 0; + } + return ri; +} + /* XXX: For shaders that aren't executed linearly (e.g. shaders that * contain loops), we need to assign all hw regs to TGSI TEMPs early, * lest we risk temp_temps overwriting regs alloc'd "later". @@ -255,22 +285,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) return NULL; } -/* Assign the hw of the discarded temporary register src - * to the tgsi register dst and free src. - */ -static void -assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - assert(src->index == -1 && src->hw != -1); - - if (dst->hw != -1) - pc->r_temp[dst->hw] = NULL; - pc->r_temp[src->hw] = dst; - dst->hw = src->hw; - - FREE(src); -} - /* release the hardware resource held by r */ static void release_hw(struct nv50_pc *pc, struct nv50_reg *r) @@ -342,25 +356,34 @@ static void kill_temp_temp(struct nv50_pc *pc) { int i; - + for (i = 0; i < pc->temp_temp_nr; i++) free_temp(pc, pc->temp_temp[i]); pc->temp_temp_nr = 0; } static int -ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) +ctor_immd_4u32(struct nv50_pc *pc, + uint32_t x, uint32_t y, uint32_t z, uint32_t w) { - pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)), - (pc->immd_nr + 1) * 4 * sizeof(float)); + unsigned size = pc->immd_nr * 4 * sizeof(uint32_t); + + pc->immd_buf = REALLOC(pc->immd_buf, size, size + 4 * sizeof(uint32_t)); + pc->immd_buf[(pc->immd_nr * 4) + 0] = x; pc->immd_buf[(pc->immd_nr * 4) + 1] = y; pc->immd_buf[(pc->immd_nr * 4) + 2] = z; pc->immd_buf[(pc->immd_nr * 4) + 3] = w; - + return pc->immd_nr++; } +static INLINE int +ctor_immd_4f32(struct nv50_pc *pc, float x, float y, float z, float w) +{ + return ctor_immd_4u32(pc, fui(x), fui(y), fui(z), fui(w)); +} + static struct nv50_reg * alloc_immd(struct nv50_pc *pc, float f) { @@ -368,11 +391,11 @@ alloc_immd(struct nv50_pc *pc, float f) unsigned hw; for (hw = 0; hw < pc->immd_nr * 4; hw++) - if (pc->immd_buf[hw] == f) + if (pc->immd_buf[hw] == fui(f)) break; if (hw == pc->immd_nr * 4) - hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4; + hw = ctor_immd_4f32(pc, f, -f, 0.5 * f, 0) * 4; ctor_reg(r, P_IMMD, -1, hw); return r; @@ -418,10 +441,19 @@ is_immd(struct nv50_program_exec *e) return FALSE; } +static boolean +is_join(struct nv50_program_exec *e) +{ + if (is_long(e) && (e->inst[1] & 3) == 2) + return TRUE; + return FALSE; +} + static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) { + assert(!is_immd(e)); set_long(pc, e); e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); e->inst[1] |= (pred << 7) | (idx << 12); @@ -464,22 +496,15 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) static INLINE void set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { - unsigned val; - float f = pc->immd_buf[imm->hw]; - - if (imm->mod & NV50_MOD_ABS) - f = fabsf(f); - val = fui((imm->mod & NV50_MOD_NEG) ? -f : f); - set_long(pc, e); - /*XXX: can't be predicated - bits overlap.. catch cases where both - * are required and avoid them. */ + /* XXX: can't be predicated - bits overlap; cases where both + * are required should be avoided by using pc->allow32 */ set_pred(pc, 0, 0, e); set_pred_wr(pc, 0, 0, e); e->inst[1] |= 0x00000002 | 0x00000001; - e->inst[0] |= (val & 0x3f) << 16; - e->inst[1] |= (val >> 6) << 2; + e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16; + e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2; } static INLINE void @@ -511,21 +536,24 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, static struct nv50_reg * alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) { - int i; struct nv50_reg *a_tgsi = NULL, *a = NULL; + int i; + uint8_t avail = ~pc->addr_alloc; if (!ref) { - /* allocate for TGSI address reg */ - for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { - if (pc->r_addr[i].index >= 0) - continue; - if (pc->r_addr[i].rhw >= 0 && - pc->r_addr[i].acc == pc->insn_cur) - continue; + /* allocate for TGSI_FILE_ADDRESS */ + while (avail) { + i = ffs(avail) - 1; - pc->r_addr[i].rhw = -1; - pc->r_addr[i].index = i; - return &pc->r_addr[i]; + if (pc->r_addr[i].rhw < 0 || + pc->r_addr[i].acc != pc->insn_cur) { + pc->addr_alloc |= (1 << i); + + pc->r_addr[i].rhw = -1; + pc->r_addr[i].index = i; + return &pc->r_addr[i]; + } + avail &= ~(1 << i); } assert(0); return NULL; @@ -533,15 +561,16 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) /* Allocate and set an address reg so we can access 'ref'. * - * If and r_addr has index < 0, it is not reserved for TGSI, - * and index will be the negative of the TGSI addr index the - * value in rhw is relative to, or -256 if rhw is an offset - * from 0. If rhw < 0, the reg has not been initialized. + * If and r_addr->index will be -1 or the hw index the value + * value in rhw is relative to. If rhw < 0, the reg has not + * been initialized or is in use for TGSI_FILE_ADDRESS. */ - for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) { - if (pc->r_addr[i].index >= 0) /* occupied for TGSI */ - continue; - if (pc->r_addr[i].rhw < 0) { /* unused */ + while (avail) { /* only consider regs that are not TGSI */ + i = ffs(avail) - 1; + avail &= ~(1 << i); + + if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) { + /* prefer an usused reg with low hw index */ a = &pc->r_addr[i]; continue; } @@ -551,8 +580,8 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) if (ref->hw - pc->r_addr[i].rhw >= 128) continue; - if ((ref->acc >= 0 && pc->r_addr[i].index == -256) || - (ref->acc < 0 && -pc->r_addr[i].index == ref->index)) { + if ((ref->acc >= 0 && pc->r_addr[i].index < 0) || + (ref->acc < 0 && pc->r_addr[i].index == ref->index)) { pc->r_addr[i].acc = pc->insn_cur; return &pc->r_addr[i]; } @@ -566,7 +595,7 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) a->rhw = ref->hw & ~0x7f; a->acc = pc->insn_cur; - a->index = a_tgsi ? -ref->index : -256; + a->index = a_tgsi ? ref->index : -1; return a; } @@ -624,6 +653,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); } +/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */ static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -644,7 +674,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) if (src->type == P_IMMD || src->type == P_CONST) { set_long(pc, e); set_data(pc, src, 0x7f, 9, e); - e->inst[1] |= 0x20000000; /* src0 const? */ + e->inst[1] |= 0x20000000; /* mov from c[] */ } else { if (src->type == P_ATTR) { set_long(pc, e); @@ -659,9 +689,9 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) if (is_long(e) && !is_immd(e)) { e->inst[1] |= 0x04000000; /* 32-bit */ - e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */ + e->inst[1] |= 0x0000c000; /* 32-bit c[] load / lane mask 0:1 */ if (!(e->inst[1] & 0x20000000)) - e->inst[1] |= 0x00030000; /* "subsubop" 0xf */ + e->inst[1] |= 0x00030000; /* lane mask 2:3 */ } else e->inst[0] |= 0x00008000; @@ -676,6 +706,45 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) FREE(imm); } +/* Assign the hw of the discarded temporary register src + * to the tgsi register dst and free src. + */ +static void +assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + assert(src->index == -1 && src->hw != -1); + + if (pc->if_lvl || pc->loop_lvl || + (dst->type != P_TEMP) || + (src->hw < pc->result_nr * 4 && + pc->p->type == PIPE_SHADER_FRAGMENT) || + pc->p->info.opcode_count[TGSI_OPCODE_CAL] || + pc->p->info.opcode_count[TGSI_OPCODE_BRA]) { + + emit_mov(pc, dst, src); + free_temp(pc, src); + return; + } + + if (dst->hw != -1) + pc->r_temp[dst->hw] = NULL; + pc->r_temp[src->hw] = dst; + dst->hw = src->hw; + + FREE(src); +} + +static void +emit_nop(struct nv50_pc *pc) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xf0000000; + set_long(pc, e); + e->inst[1] = 0xe0000000; + emit(pc, e); +} + static boolean check_swap_src_0_1(struct nv50_pc *pc, struct nv50_reg **s0, struct nv50_reg **s1) @@ -795,6 +864,33 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } static void +emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred) +{ + struct nv50_program_exec *e = exec(pc); + + assert(dst->type == P_TEMP); + e->inst[1] = 0x20000000 | (pred << 12); + set_long(pc, e); + set_dst(pc, dst, e); + + emit(pc, e); +} + +static void +emit_mov_to_pred(struct nv50_pc *pc, int pred, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x000001fc; + e->inst[1] = 0xa0000008; + set_long(pc, e); + set_pred_wr(pc, 1, pred, e); + set_src_0_restricted(pc, src, e); + + emit(pc, e); +} + +static void emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { @@ -809,7 +905,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_dst(pc, dst, e); set_src_0(pc, src0, e); if (src1->type == P_IMMD && !is_long(e)) { - if (src0->mod & NV50_MOD_NEG) + if (src0->mod ^ src1->mod) e->inst[0] |= 0x00008000; set_immd(pc, src1, e); } else { @@ -898,7 +994,6 @@ static INLINE void emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - assert(src0 != src1); src1->mod ^= NV50_MOD_NEG; emit_add(pc, dst, src0, src1); src1->mod ^= NV50_MOD_NEG; @@ -921,6 +1016,8 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, op != TGSI_OPCODE_XOR) assert(!"invalid bit op"); + assert(!(src0->mod | src1->mod)); + if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) { set_immd(pc, src1, e); if (op == TGSI_OPCODE_OR) @@ -967,12 +1064,19 @@ static INLINE void emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { - assert(src2 != src0 && src2 != src1); src2->mod ^= NV50_MOD_NEG; emit_mad(pc, dst, src0, src1, src2); src2->mod ^= NV50_MOD_NEG; } +#define NV50_FLOP_RCP 0 +#define NV50_FLOP_RSQ 2 +#define NV50_FLOP_LG2 3 +#define NV50_FLOP_SIN 4 +#define NV50_FLOP_COS 5 +#define NV50_FLOP_EX2 6 + +/* rcp, rsqrt, lg2 support neg and abs */ static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) @@ -980,17 +1084,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0x90000000; - if (sub) { + if (sub || src->mod) { set_long(pc, e); e->inst[1] |= (sub << 29); } set_dst(pc, dst, e); + set_src_0_restricted(pc, src, e); - if (sub == 0 || sub == 2) - set_src_0_restricted(pc, src, e); - else - set_src_0(pc, src, e); + assert(!src->mod || sub < 4); + + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; emit(pc, e); } @@ -1007,6 +1114,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29) | 0x00004000; + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1022,6 +1134,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29); + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1120,7 +1237,6 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, set_src_1(pc, src1, e); emit(pc, e); - pc->if_cond = pc->p->exec_tail; /* record for OPCODE_IF */ /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ if (rdst) @@ -1157,10 +1273,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, { struct nv50_reg *temp = alloc_temp(pc, NULL); - emit_flop(pc, 3, temp, v); + emit_flop(pc, NV50_FLOP_LG2, temp, v); emit_mul(pc, temp, temp, e); emit_preex2(pc, temp, temp); - emit_flop(pc, 6, dst, temp); + emit_flop(pc, NV50_FLOP_EX2, dst, temp); free_temp(pc, temp); } @@ -1242,24 +1358,115 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) { struct nv50_program_exec *e; const int r_pred = 1; - unsigned cvn = CVT_F32_F32; - - if (src->mod & NV50_MOD_NEG) - cvn |= CVT_NEG; - /* write predicate reg */ - emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); - /* conditional discard */ e = exec(pc); - e->inst[0] = 0x00000002; + e->inst[0] = 0x00000002; /* discard */ + set_long(pc, e); /* sets cond code to ALWAYS */ + + if (src) { + unsigned cvn = CVT_F32_F32; + + set_pred(pc, 0x1 /* cc = LT */, r_pred, e); + + if (src->mod & NV50_MOD_NEG) + cvn |= CVT_NEG; + /* write predicate reg */ + emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); + } + + emit(pc, e); +} + +static struct nv50_program_exec * +emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = (op << 28) | 2; set_long(pc, e); - set_pred(pc, 0x1 /* LT */, r_pred, e); + if (pred >= 0) + set_pred(pc, cc, pred, e); + emit(pc, e); + return e; +} + +static INLINE struct nv50_program_exec * +emit_breakaddr(struct nv50_pc *pc) +{ + return emit_control_flow(pc, 0x4, -1, 0); +} + +static INLINE void +emit_break(struct nv50_pc *pc, int pred, unsigned cc) +{ + emit_control_flow(pc, 0x5, pred, cc); +} + +static INLINE struct nv50_program_exec * +emit_joinat(struct nv50_pc *pc) +{ + return emit_control_flow(pc, 0xa, -1, 0); +} + +static INLINE struct nv50_program_exec * +emit_branch(struct nv50_pc *pc, int pred, unsigned cc) +{ + return emit_control_flow(pc, 0x1, pred, cc); +} + +static INLINE struct nv50_program_exec * +emit_call(struct nv50_pc *pc, int pred, unsigned cc) +{ + return emit_control_flow(pc, 0x2, pred, cc); +} + +static INLINE void +emit_ret(struct nv50_pc *pc, int pred, unsigned cc) +{ + emit_control_flow(pc, 0x3, pred, cc); +} + +#define QOP_ADD 0 +#define QOP_SUBR 1 +#define QOP_SUB 2 +#define QOP_MOV_SRC1 3 + +/* For a quad of threads / top left, top right, bottom left, bottom right + * pixels, do a different operation, and take src0 from a specific thread. + */ +static void +emit_quadop(struct nv50_pc *pc, struct nv50_reg *dst, int wp, int lane_src0, + struct nv50_reg *src0, struct nv50_reg *src1, ubyte qop) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xc0000000; + e->inst[1] = 0x80000000; + set_long(pc, e); + e->inst[0] |= lane_src0 << 16; + set_src_0(pc, src0, e); + set_src_2(pc, src1, e); + + if (wp >= 0) + set_pred_wr(pc, 1, wp, e); + + if (dst) + set_dst(pc, dst, e); + else { + e->inst[0] |= 0x000001fc; + e->inst[1] |= 0x00000008; + } + + e->inst[0] |= (qop & 3) << 20; + e->inst[1] |= (qop >> 2) << 22; + + emit(pc, e); } static void load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], - struct nv50_reg **src, boolean proj) + struct nv50_reg **src, unsigned arg, boolean proj) { int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod }; @@ -1276,7 +1483,11 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], if (proj && 0 /* looks more correct without this */) emit_mul(pc, t[2], t[2], src[3]); - emit_flop(pc, 0, t[2], t[2]); + else + if (arg == 4) /* there is no textureProj(samplerCubeShadow) */ + emit_mov(pc, t[3], src[3]); + + emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]); emit_mul(pc, t[0], src[0], t[2]); emit_mul(pc, t[1], src[1], t[2]); @@ -1284,89 +1495,221 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], } static void -emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, - struct nv50_reg **src, unsigned unit, unsigned type, boolean proj) +load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], + struct nv50_reg **src, unsigned dim, unsigned arg) { - struct nv50_reg *t[4]; - struct nv50_program_exec *e; + unsigned c, mode; + + if (src[0]->type == P_TEMP && src[0]->rhw != -1) { + mode = pc->interp_mode[src[0]->index] | INTERP_PERSPECTIVE; + + t[3]->rhw = src[3]->rhw; + emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID)); + emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]); - unsigned c, mode, dim; + for (c = 0; c < dim; ++c) { + t[c]->rhw = src[c]->rhw; + emit_interp(pc, t[c], t[3], mode); + } + if (arg != dim) { /* depth reference value */ + t[dim]->rhw = src[2]->rhw; + emit_interp(pc, t[dim], t[3], mode); + } + } else { + /* XXX: for some reason the blob sometimes uses MAD + * (mad f32 $rX $rY $rZ neg $r63) + */ + emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]); + for (c = 0; c < dim; ++c) + emit_mul(pc, t[c], src[c], t[3]); + if (arg != dim) /* depth reference value */ + emit_mul(pc, t[dim], src[2], t[3]); + } +} +static INLINE void +get_tex_dim(unsigned type, unsigned *dim, unsigned *arg) +{ switch (type) { case TGSI_TEXTURE_1D: - dim = 1; + *arg = *dim = 1; + break; + case TGSI_TEXTURE_SHADOW1D: + *dim = 1; + *arg = 2; break; case TGSI_TEXTURE_UNKNOWN: case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */ case TGSI_TEXTURE_RECT: - dim = 2; + *arg = *dim = 2; + break; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + *dim = 2; + *arg = 3; break; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: /* XXX */ - dim = 3; + *dim = *arg = 3; break; default: assert(0); break; } +} - /* some cards need t[0]'s hw index to be a multiple of 4 */ - alloc_temp4(pc, t, 0); +/* We shouldn't execute TEXLOD if any of the pixels in a quad have + * different LOD values, so branch off groups of equal LOD. + */ +static void +emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod, + struct nv50_reg *src, struct nv50_program_exec *tex) +{ + struct nv50_program_exec *join_at; + unsigned i, target = pc->p->exec_size + 9 * 2; - if (type == TGSI_TEXTURE_CUBE) { - load_cube_tex_coords(pc, t, src, proj); - } else - if (proj) { - if (src[0]->type == P_TEMP && src[0]->rhw != -1) { - mode = pc->interp_mode[src[0]->index]; - - t[3]->rhw = src[3]->rhw; - emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID)); - emit_flop(pc, 0, t[3], t[3]); - - for (c = 0; c < dim; c++) { - t[c]->rhw = src[c]->rhw; - emit_interp(pc, t[c], t[3], - (mode | INTERP_PERSPECTIVE)); - } - } else { - emit_flop(pc, 0, t[3], src[3]); - for (c = 0; c < dim; c++) - emit_mul(pc, t[c], src[c], t[3]); + if (pc->p->type != PIPE_SHADER_FRAGMENT) { + emit(pc, tex); + return; + } + pc->allow32 = FALSE; - /* XXX: for some reason the blob sometimes uses MAD: - * emit_mad(pc, t[c], src[0][c], t[3], t[3]) - * pc->p->exec_tail->inst[1] |= 0x080fc000; - */ + /* Subtract lod of each pixel from lod of top left pixel, jump + * texlod insn if result is 0, then repeat for 2 other pixels. + */ + join_at = emit_joinat(pc); + emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55); + emit_branch(pc, 0, 2)->param.index = target; + + for (i = 1; i < 4; ++i) { + emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55); + emit_branch(pc, 0, 2)->param.index = target; + } + + emit_mov(pc, tlod, src); /* target */ + emit(pc, tex); /* texlod */ + + join_at->param.index = target + 2 * 2; + JOIN_ON(emit_nop(pc)); /* join _after_ tex */ +} + +static void +emit_texbias_sequence(struct nv50_pc *pc, struct nv50_reg *t[4], unsigned arg, + struct nv50_program_exec *tex) +{ + struct nv50_program_exec *e; + struct nv50_reg imm_1248, *t123[4][4], *r_bits = alloc_temp(pc, NULL); + int r_pred = 0; + unsigned n, c, i, cc[4] = { 0x0a, 0x13, 0x11, 0x10 }; + + pc->allow32 = FALSE; + ctor_reg(&imm_1248, P_IMMD, -1, ctor_immd_4u32(pc, 1, 2, 4, 8) * 4); + + /* Subtract bias value of thread i from bias values of each thread, + * store result in r_pred, and set bit i in r_bits if result was 0. + */ + assert(arg < 4); + for (i = 0; i < 4; ++i, ++imm_1248.hw) { + emit_quadop(pc, NULL, r_pred, i, t[arg], t[arg], 0x55); + emit_mov(pc, r_bits, &imm_1248); + set_pred(pc, 2, r_pred, pc->p->exec_tail); + } + emit_mov_to_pred(pc, r_pred, r_bits); + + /* The lanes of a quad are now grouped by the bit in r_pred they have + * set. Put the input values for TEX into a new register set for each + * group and execute TEX only for a specific group. + * We cannot use the same register set for each group because we need + * the derivatives, which are implicitly calculated, to be correct. + */ + for (i = 1; i < 4; ++i) { + alloc_temp4(pc, t123[i], 0); + + for (c = 0; c <= arg; ++c) + emit_mov(pc, t123[i][c], t[c]); + + *(e = exec(pc)) = *(tex); + e->inst[0] &= ~0x01fc; + set_dst(pc, t123[i][0], e); + set_pred(pc, cc[i], r_pred, e); + emit(pc, e); + } + /* finally TEX on the original regs (where we kept the input) */ + set_pred(pc, cc[0], r_pred, tex); + emit(pc, tex); + + /* put the 3 * n other results into regs for lane 0 */ + n = popcnt4(((e->inst[0] >> 25) & 0x3) | ((e->inst[1] >> 12) & 0xc)); + for (i = 1; i < 4; ++i) { + for (c = 0; c < n; ++c) { + emit_mov(pc, t[c], t123[i][c]); + set_pred(pc, cc[i], r_pred, pc->p->exec_tail); } - } else { - for (c = 0; c < dim; c++) - emit_mov(pc, t[c], src[c]); + free_temp4(pc, t123[i]); } + emit_nop(pc); + free_temp(pc, r_bits); +} + +static void +emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, + struct nv50_reg **src, unsigned unit, unsigned type, + boolean proj, int bias_lod) +{ + struct nv50_reg *t[4]; + struct nv50_program_exec *e; + unsigned c, dim, arg; + + /* t[i] must be within a single 128 bit super-reg */ + alloc_temp4(pc, t, 0); + e = exec(pc); + e->inst[0] = 0xf0000000; set_long(pc, e); - e->inst[0] |= 0xf0000000; - e->inst[1] |= 0x00000004; set_dst(pc, t[0], e); - e->inst[0] |= (unit << 9); - if (dim == 2) - e->inst[0] |= 0x00400000; - else - if (dim == 3) { - e->inst[0] |= 0x00800000; - if (type == TGSI_TEXTURE_CUBE) - e->inst[0] |= 0x08000000; + /* TIC and TSC binding indices (TSC is ignored as TSC_LINKED = TRUE): */ + e->inst[0] |= (unit << 9) /* | (unit << 17) */; + + /* live flag (don't set if TEX results affect input to another TEX): */ + /* e->inst[0] |= 0x00000004; */ + + get_tex_dim(type, &dim, &arg); + + if (type == TGSI_TEXTURE_CUBE) { + e->inst[0] |= 0x08000000; + load_cube_tex_coords(pc, t, src, arg, proj); + } else + if (proj) + load_proj_tex_coords(pc, t, src, dim, arg); + else { + for (c = 0; c < dim; c++) + emit_mov(pc, t[c], src[c]); + if (arg != dim) /* depth reference value (always src.z here) */ + emit_mov(pc, t[dim], src[2]); } e->inst[0] |= (mask & 0x3) << 25; e->inst[1] |= (mask & 0xc) << 12; - emit(pc, e); + if (!bias_lod) { + e->inst[0] |= (arg - 1) << 22; + emit(pc, e); + } else + if (bias_lod < 0) { + assert(pc->p->type == PIPE_SHADER_FRAGMENT); + e->inst[0] |= arg << 22; + e->inst[1] |= 0x20000000; /* texbias */ + emit_mov(pc, t[arg], src[3]); + emit_texbias_sequence(pc, t, arg, e); + } else { + e->inst[0] |= arg << 22; + e->inst[1] |= 0x40000000; /* texlod */ + emit_mov(pc, t[arg], src[3]); + emit_texlod_sequence(pc, t[arg], src[3], e); + } + #if 1 c = 0; if (mask & 1) emit_mov(pc, dst[0], t[c++]); @@ -1389,46 +1732,14 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, } static void -emit_branch(struct nv50_pc *pc, int pred, unsigned cc, - struct nv50_program_exec **join) -{ - struct nv50_program_exec *e = exec(pc); - - if (join) { - set_long(pc, e); - e->inst[0] |= 0xa0000002; - emit(pc, e); - *join = e; - e = exec(pc); - } - - set_long(pc, e); - e->inst[0] |= 0x10000002; - if (pred >= 0) - set_pred(pc, cc, pred, e); - emit(pc, e); -} - -static void -emit_nop(struct nv50_pc *pc) -{ - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xf0000000; - set_long(pc, e); - e->inst[1] = 0xe0000000; - emit(pc, e); -} - -static void emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { struct nv50_program_exec *e = exec(pc); assert(src->type == P_TEMP); - e->inst[0] = 0xc0140000; - e->inst[1] = 0x89800000; + e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0240000 : 0xc0140000; + e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x86400000 : 0x89800000; set_long(pc, e); set_dst(pc, dst, e); set_src_0(pc, src, e); @@ -1444,11 +1755,8 @@ emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) assert(src->type == P_TEMP); - if (!(src->mod & NV50_MOD_NEG)) /* ! double negation */ - emit_neg(pc, src, src); - - e->inst[0] = 0xc0150000; - e->inst[1] = 0x8a400000; + e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0250000 : 0xc0150000; + e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x85800000 : 0x8a400000; set_long(pc, e); set_dst(pc, dst, e); set_src_0(pc, src, e); @@ -1509,47 +1817,35 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) static boolean negate_supported(const struct tgsi_full_instruction *insn, int i) { - int s; - switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_DDX: case TGSI_OPCODE_DDY: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: - case TGSI_OPCODE_MUL: + case TGSI_OPCODE_EX2: case TGSI_OPCODE_KIL: - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: + case TGSI_OPCODE_LG2: case TGSI_OPCODE_MAD: - break; + case TGSI_OPCODE_MUL: case TGSI_OPCODE_POW: - if (i == 1) - break; - return FALSE; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */ + case TGSI_OPCODE_SCS: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SUB: + return TRUE; default: return FALSE; } - - /* Watch out for possible multiple uses of an nv50_reg, we - * can't use nv50_reg::neg in these cases. - */ - for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) { - if (s == i) - continue; - if ((insn->FullSrcRegisters[s].SrcRegister.Index == - insn->FullSrcRegisters[i].SrcRegister.Index) && - (insn->FullSrcRegisters[s].SrcRegister.File == - insn->FullSrcRegisters[i].SrcRegister.File)) - return FALSE; - } - - return TRUE; } /* Return a read mask for source registers deduced from opcode & write mask. */ static unsigned nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) { - unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask; + unsigned x, mask = insn->Dst[0].Register.WriteMask; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_COS: @@ -1564,30 +1860,40 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) case TGSI_OPCODE_DST: return mask & (c ? 0xa : 0x6); case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: case TGSI_OPCODE_SCS: return 0x1; + case TGSI_OPCODE_IF: + return 0x1; case TGSI_OPCODE_LIT: return 0xb; case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: { - const struct tgsi_instruction_ext_texture *tex; + const struct tgsi_instruction_texture *tex; - assert(insn->Instruction.Extended); - tex = &insn->InstructionExtTexture; + assert(insn->Instruction.Texture); + tex = &insn->Texture; mask = 0x7; - if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) - mask |= 0x8; + if (insn->Instruction.Opcode != TGSI_OPCODE_TEX && + insn->Instruction.Opcode != TGSI_OPCODE_TXD) + mask |= 0x8; /* bias, lod or proj */ switch (tex->Texture) { case TGSI_TEXTURE_1D: mask &= 0x9; break; + case TGSI_TEXTURE_SHADOW1D: + mask &= 0x5; + break; case TGSI_TEXTURE_2D: mask &= 0xb; break; @@ -1612,17 +1918,17 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { - switch (dst->DstRegister.File) { + switch (dst->Register.File) { case TGSI_FILE_TEMPORARY: - return &pc->temp[dst->DstRegister.Index * 4 + c]; + return &pc->temp[dst->Register.Index * 4 + c]; case TGSI_FILE_OUTPUT: - return &pc->result[dst->DstRegister.Index * 4 + c]; + return &pc->result[dst->Register.Index * 4 + c]; case TGSI_FILE_ADDRESS: { - struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c]; + struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c]; if (!r) { r = alloc_addr(pc, NULL); - pc->addr[dst->DstRegister.Index * 4 + c] = r; + pc->addr[dst->Register.Index * 4 + c] = r; } assert(r); return r; @@ -1644,8 +1950,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, struct nv50_reg *temp; unsigned sgn, c, swz; - if (src->SrcRegister.File != TGSI_FILE_CONSTANT) - assert(!src->SrcRegister.Indirect); + if (src->Register.File != TGSI_FILE_CONSTANT) + assert(!src->Register.Indirect); sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); @@ -1655,36 +1961,36 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, case TGSI_SWIZZLE_Y: case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (src->SrcRegister.File) { + switch (src->Register.File) { case TGSI_FILE_INPUT: - r = &pc->attr[src->SrcRegister.Index * 4 + c]; + r = &pc->attr[src->Register.Index * 4 + c]; break; case TGSI_FILE_TEMPORARY: - r = &pc->temp[src->SrcRegister.Index * 4 + c]; + r = &pc->temp[src->Register.Index * 4 + c]; break; case TGSI_FILE_CONSTANT: - if (!src->SrcRegister.Indirect) { - r = &pc->param[src->SrcRegister.Index * 4 + c]; + if (!src->Register.Indirect) { + r = &pc->param[src->Register.Index * 4 + c]; break; } /* Indicate indirection by setting r->acc < 0 and * use the index field to select the address reg. */ - r = MALLOC_STRUCT(nv50_reg); + r = reg_instance(pc, NULL); swz = tgsi_util_get_src_register_swizzle( - &src->SrcRegisterInd, 0); + &src->Indirect, 0); ctor_reg(r, P_CONST, - src->SrcRegisterInd.Index * 4 + swz, - src->SrcRegister.Index * 4 + c); + src->Indirect.Index * 4 + swz, + src->Register.Index * 4 + c); r->acc = -1; break; case TGSI_FILE_IMMEDIATE: - r = &pc->immd[src->SrcRegister.Index * 4 + c]; + r = &pc->immd[src->Register.Index * 4 + c]; break; case TGSI_FILE_SAMPLER: break; case TGSI_FILE_ADDRESS: - r = pc->addr[src->SrcRegister.Index * 4 + c]; + r = pc->addr[src->Register.Index * 4 + c]; assert(r); break; default: @@ -1724,6 +2030,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; } + if (r && r->acc >= 0 && r != temp) + return reg_instance(pc, r); return r; } @@ -1776,9 +2084,13 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c) assert(0); return 0x0; } + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_LIT: case TGSI_OPCODE_SCS: case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: /* these take care of dangerous swizzles themselves */ return 0x0; @@ -1814,34 +2126,51 @@ nv50_kill_branch(struct nv50_pc *pc) if (pc->if_insn[lvl]->next != pc->p->exec_tail) return FALSE; + if (is_immd(pc->p->exec_tail)) + return FALSE; /* if ccode == 'true', the BRA is from an ELSE and the predicate * reg may no longer be valid, since we currently always use $p0 */ if (has_pred(pc->if_insn[lvl], 0xf)) return FALSE; - assert(pc->if_insn[lvl] && pc->br_join[lvl]); + assert(pc->if_insn[lvl] && pc->if_join[lvl]); - /* We'll use the exec allocated for JOIN_AT (as we can't easily - * update prev's next); if exec_tail is BRK, update the pointer. + /* We'll use the exec allocated for JOIN_AT (we can't easily + * access nv50_program_exec's prev). */ - if (pc->loop_lvl && pc->br_loop[pc->loop_lvl - 1] == pc->p->exec_tail) - pc->br_loop[pc->loop_lvl - 1] = pc->br_join[lvl]; - pc->p->exec_size -= 4; /* remove JOIN_AT and BRA */ - *pc->br_join[lvl] = *pc->p->exec_tail; + *pc->if_join[lvl] = *pc->p->exec_tail; FREE(pc->if_insn[lvl]); FREE(pc->p->exec_tail); - pc->p->exec_tail = pc->br_join[lvl]; + pc->p->exec_tail = pc->if_join[lvl]; pc->p->exec_tail->next = NULL; set_pred(pc, 0xd, 0, pc->p->exec_tail); return TRUE; } +static void +nv50_fp_move_results(struct nv50_pc *pc) +{ + struct nv50_reg reg; + unsigned i; + + ctor_reg(®, P_TEMP, -1, -1); + + for (i = 0; i < pc->result_nr * 4; ++i) { + if (pc->result[i].rhw < 0 || pc->result[i].hw < 0) + continue; + if (pc->result[i].rhw != pc->result[i].hw) { + reg.hw = pc->result[i].rhw; + emit_mov(pc, ®, &pc->result[i]); + } + } +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *inst) @@ -1850,29 +2179,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, unsigned mask, sat, unit; int i, c; - mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + mask = inst->Dst[0].Register.WriteMask; sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; memset(src, 0, sizeof(src)); for (c = 0; c < 4; c++) { if ((mask & (1 << c)) && !pc->r_dst[c]) - dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); + dst[c] = tgsi_dst(pc, c, &inst->Dst[0]); else dst[c] = pc->r_dst[c]; rdst[c] = dst[c]; } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *fs = &inst->Src[i]; unsigned src_mask; boolean neg_supp; src_mask = nv50_tgsi_src_mask(inst, i); neg_supp = negate_supported(inst, i); - if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) - unit = fs->SrcRegister.Index; + if (fs->Register.File == TGSI_FILE_SAMPLER) + unit = fs->Register.Index; for (c = 0; c < 4; c++) if (src_mask & (1 << c)) @@ -1928,13 +2257,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_arl(pc, dst[0], temp, 4); break; case TGSI_OPCODE_BGNLOOP: + pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc); pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_BGNSUB: + assert(!pc->in_subroutine); + pc->in_subroutine = TRUE; + /* probably not necessary, but align to 8 byte boundary */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + break; case TGSI_OPCODE_BRK: - emit_branch(pc, -1, 0, NULL); assert(pc->loop_lvl > 0); - pc->br_loop[pc->loop_lvl - 1] = pc->p->exec_tail; + emit_break(pc, -1, 0); + break; + case TGSI_OPCODE_CAL: + assert(inst->Label.Label < pc->insn_nr); + emit_call(pc, -1, 0)->param.index = inst->Label.Label; + /* replaced by actual offset in nv50_program_fixup_insns */ break; case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { @@ -1956,17 +2297,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */ } break; + case TGSI_OPCODE_CONT: + assert(pc->loop_lvl > 0); + emit_branch(pc, -1, 0)->param.index = + pc->loop_pos[pc->loop_lvl - 1]; + break; case TGSI_OPCODE_COS: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 5, dst[3], temp); + emit_flop(pc, NV50_FLOP_COS, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) temp = brdc = temp_temp(pc); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 5, brdc, temp); + emit_flop(pc, NV50_FLOP_COS, brdc, temp); break; case TGSI_OPCODE_DDX: for (c = 0; c < 4; c++) { @@ -2010,7 +2356,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mov_immdval(pc, dst[0], 1.0f); break; case TGSI_OPCODE_ELSE: - emit_branch(pc, -1, 0, NULL); + emit_branch(pc, -1, 0); pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; terminate_mbb(pc); @@ -2022,26 +2368,56 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (nv50_kill_branch(pc) == TRUE) break; - if (pc->br_join[pc->if_lvl]) { - pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size; - pc->br_join[pc->if_lvl] = NULL; + if (pc->if_join[pc->if_lvl]) { + pc->if_join[pc->if_lvl]->param.index = pc->p->exec_size; + pc->if_join[pc->if_lvl] = NULL; } terminate_mbb(pc); /* emit a NOP as join point, we could set it on the next * one, but would have to make sure it is long and !immd */ - emit_nop(pc); - pc->p->exec_tail->inst[1] |= 2; + JOIN_ON(emit_nop(pc)); break; case TGSI_OPCODE_ENDLOOP: - emit_branch(pc, -1, 0, NULL); - pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl]; - pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size; + emit_branch(pc, -1, 0)->param.index = + pc->loop_pos[--pc->loop_lvl]; + pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDSUB: + assert(pc->in_subroutine); + pc->in_subroutine = FALSE; + break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); - emit_flop(pc, 6, brdc, temp); + emit_flop(pc, NV50_FLOP_EX2, brdc, temp); + break; + case TGSI_OPCODE_EXP: + { + struct nv50_reg *t[2]; + + assert(!temp); + t[0] = temp_temp(pc); + t[1] = temp_temp(pc); + + if (mask & 0x6) + emit_mov(pc, t[0], src[0][0]); + if (mask & 0x3) + emit_flr(pc, t[1], src[0][0]); + + if (mask & (1 << 1)) + emit_sub(pc, dst[1], t[0], t[1]); + if (mask & (1 << 0)) { + emit_preex2(pc, t[1], t[1]); + emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]); + } + if (mask & (1 << 2)) { + emit_preex2(pc, t[0], t[0]); + emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -2060,26 +2436,56 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_IF: - /* emitting a join_at may not be necessary */ - assert(pc->if_lvl < MAX_IF_DEPTH); - /* set_pred_wr(pc, 1, 0, pc->if_cond); */ + assert(pc->if_lvl < NV50_MAX_COND_NESTING); emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN, CVT_F32_F32); - emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]); - pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + pc->if_join[pc->if_lvl] = emit_joinat(pc); + pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);; terminate_mbb(pc); break; case TGSI_OPCODE_KIL: + assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]); emit_kil(pc, src[0][0]); emit_kil(pc, src[0][1]); emit_kil(pc, src[0][2]); emit_kil(pc, src[0][3]); break; + case TGSI_OPCODE_KILP: + emit_kil(pc, NULL); + break; case TGSI_OPCODE_LIT: emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - emit_flop(pc, 3, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]); + break; + case TGSI_OPCODE_LOG: + { + struct nv50_reg *t[2]; + + t[0] = temp_temp(pc); + if (mask & (1 << 1)) + t[1] = temp_temp(pc); + else + t[1] = t[0]; + + emit_abs(pc, t[0], src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], t[1]); + emit_flr(pc, t[1], t[1]); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], t[1]); + if (mask & (1 << 1)) { + t[1]->mod = NV50_MOD_NEG; + emit_preex2(pc, t[1], t[1]); + t[1]->mod = 0; + emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]); + emit_mul(pc, dst[1], t[0], t[1]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } break; case TGSI_OPCODE_LRP: temp = temp_temp(pc); @@ -2129,19 +2535,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: - emit_flop(pc, 0, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); + break; + case TGSI_OPCODE_RET: + if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine) + nv50_fp_move_results(pc); + emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: - emit_flop(pc, 2, brdc, src[0][0]); + src[0][0]->mod |= NV50_MOD_ABS; + emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); break; case TGSI_OPCODE_SCS: temp = temp_temp(pc); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) - emit_flop(pc, 5, dst[0], temp); + emit_flop(pc, NV50_FLOP_COS, dst[0], temp); if (mask & (1 << 1)) - emit_flop(pc, 4, dst[1], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[1], temp); if (mask & (1 << 2)) emit_mov_immdval(pc, dst[2], 0.0); if (mask & (1 << 3)) @@ -2150,14 +2562,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_SIN: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 4, dst[3], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) temp = brdc = temp_temp(pc); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 4, brdc, temp); + emit_flop(pc, NV50_FLOP_SIN, brdc, temp); break; case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: @@ -2181,11 +2593,19 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_TEX: emit_tex(pc, dst, mask, src[0], unit, - inst->InstructionExtTexture.Texture, FALSE); + inst->Texture.Texture, FALSE, 0); + break; + case TGSI_OPCODE_TXB: + emit_tex(pc, dst, mask, src[0], unit, + inst->Texture.Texture, FALSE, -1); + break; + case TGSI_OPCODE_TXL: + emit_tex(pc, dst, mask, src[0], unit, + inst->Texture.Texture, FALSE, 1); break; case TGSI_OPCODE_TXP: emit_tex(pc, dst, mask, src[0], unit, - inst->InstructionExtTexture.Texture, TRUE); + inst->Texture.Texture, TRUE, 0); break; case TGSI_OPCODE_TRUNC: for (c = 0; c < 4; c++) { @@ -2213,6 +2633,17 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mov_immdval(pc, dst[3], 1.0); break; case TGSI_OPCODE_END: + if (pc->p->type == PIPE_SHADER_FRAGMENT) + nv50_fp_move_results(pc); + + /* last insn must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + else + if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail)) + emit_nop(pc); + + pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -2239,20 +2670,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, } } - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - for (c = 0; c < 4; c++) { - if (!src[i][c]) - continue; - src[i][c]->mod = 0; - if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) - FREE(src[i][c]); - else - if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST) - FREE(src[i][c]); /* indirect constant */ - } - } - kill_temp_temp(pc); + pc->reg_instance_nr = 0; + return TRUE; } @@ -2264,14 +2684,20 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) const struct tgsi_dst_register *dst; unsigned i, c, k, mask; - dst = &insn->FullDstRegisters[0].DstRegister; + dst = &insn->Dst[0].Register; mask = dst->WriteMask; if (dst->File == TGSI_FILE_TEMPORARY) - reg = pc->temp; + reg = pc->temp; else - if (dst->File == TGSI_FILE_OUTPUT) - reg = pc->result; + if (dst->File == TGSI_FILE_OUTPUT) { + reg = pc->result; + + if (insn->Instruction.Opcode == TGSI_OPCODE_MOV && + dst->Index == pc->edgeflag_out && + insn->Src[0].Register.File == TGSI_FILE_INPUT) + pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index; + } if (reg) { for (c = 0; c < 4; c++) { @@ -2282,12 +2708,12 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) } for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { - src = &insn->FullSrcRegisters[i]; + src = &insn->Src[i]; - if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) + if (src->Register.File == TGSI_FILE_TEMPORARY) reg = pc->temp; else - if (src->SrcRegister.File == TGSI_FILE_INPUT) + if (src->Register.File == TGSI_FILE_INPUT) reg = pc->attr; else continue; @@ -2299,7 +2725,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) continue; k = tgsi_util_get_full_src_register_swizzle(src, c); - reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr; + reg[src->Register.Index * 4 + k].acc = pc->insn_nr; } } } @@ -2359,13 +2785,13 @@ static struct nv50_reg * tgsi_broadcast_dst(struct nv50_pc *pc, const struct tgsi_full_dst_register *fd, unsigned mask) { - if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) { - int c = ffs(~mask & fd->DstRegister.WriteMask); + if (fd->Register.File == TGSI_FILE_TEMPORARY) { + int c = ffs(~mask & fd->Register.WriteMask); if (c) return tgsi_dst(pc, c - 1, fd); } else { - int c = ffs(fd->DstRegister.WriteMask) - 1; - if ((1 << c) == fd->DstRegister.WriteMask) + int c = ffs(fd->Register.WriteMask) - 1; + if ((1 << c) == fd->Register.WriteMask) return tgsi_dst(pc, c, fd); } @@ -2379,7 +2805,7 @@ static unsigned nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, unsigned rdep[4]) { - const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0]; + const struct tgsi_full_dst_register *fd = &insn->Dst[0]; const struct tgsi_full_src_register *fs; unsigned i, deqs = 0; @@ -2390,9 +2816,9 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, unsigned chn, mask = nv50_tgsi_src_mask(insn, i); boolean neg_supp = negate_supported(insn, i); - fs = &insn->FullSrcRegisters[i]; - if (fs->SrcRegister.File != fd->DstRegister.File || - fs->SrcRegister.Index != fd->DstRegister.Index) + fs = &insn->Src[i]; + if (fs->Register.File != fd->Register.File || + fs->Register.Index != fd->Register.Index) continue; for (chn = 0; chn < 4; ++chn) { @@ -2403,7 +2829,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, c = tgsi_util_get_full_src_register_swizzle(fs, chn); s = tgsi_util_get_full_src_register_sign_mode(fs, chn); - if (!(fd->DstRegister.WriteMask & (1 << c))) + if (!(fd->Register.WriteMask & (1 << c))) continue; /* no danger if src is copied to TEMP first */ @@ -2427,7 +2853,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) const struct tgsi_full_dst_register *fd; unsigned i, deqs, rdep[4], m[4]; - fd = &tok->FullInstruction.FullDstRegisters[0]; + fd = &tok->FullInstruction.Dst[0]; deqs = nv50_tgsi_scan_swizzle(&insn, rdep); if (is_scalar_op(insn.Instruction.Opcode)) { @@ -2438,7 +2864,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } pc->r_brdc = NULL; - if (!deqs) + if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3])) return nv50_program_tx_insn(pc, &insn); deqs = nv50_revdep_reorder(m, rdep); @@ -2446,10 +2872,10 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (i = 0; i < 4; ++i) { assert(pc->r_dst[m[i]] == NULL); - insn.FullDstRegisters[0].DstRegister.WriteMask = - fd->DstRegister.WriteMask & (1 << m[i]); + insn.Dst[0].Register.WriteMask = + fd->Register.WriteMask & (1 << m[i]); - if (!insn.FullDstRegisters[0].DstRegister.WriteMask) + if (!insn.Dst[0].Register.WriteMask) continue; if (deqs & (1 << i)) @@ -2489,7 +2915,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); - emit_flop(pc, 0, iv, iv); + emit_flop(pc, NV50_FLOP_RCP, iv, iv); /* XXX: when loading interpolants dynamically, move these * to the program head, or make sure it can't be skipped. @@ -2535,10 +2961,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) const struct tgsi_full_immediate *imm = &tp.FullToken.FullImmediate; - ctor_immd(pc, imm->u[0].Float, - imm->u[1].Float, - imm->u[2].Float, - imm->u[3].Float); + ctor_immd_4f32(pc, imm->u[0].Float, + imm->u[1].Float, + imm->u[2].Float, + imm->u[3].Float); } break; case TGSI_TOKEN_TYPE_DECLARATION: @@ -2547,8 +2973,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) unsigned si, last, first, mode; d = &tp.FullToken.FullDeclaration; - first = d->DeclarationRange.First; - last = d->DeclarationRange.Last; + first = d->Range.First; + last = d->Range.Last; switch (d->Declaration.File) { case TGSI_FILE_TEMPORARY: @@ -2558,8 +2984,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->type == PIPE_SHADER_FRAGMENT) break; - si = d->Semantic.SemanticIndex; - switch (d->Semantic.SemanticName) { + si = d->Semantic.Index; + switch (d->Semantic.Name) { case TGSI_SEMANTIC_BCOLOR: p->cfg.two_side[si].hw = first; if (p->cfg.io_nr > first) @@ -2570,6 +2996,9 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (p->cfg.io_nr > first) p->cfg.io_nr = first; break; + case TGSI_SEMANTIC_EDGEFLAG: + pc->edgeflag_out = first; + break; /* case TGSI_SEMANTIC_CLIP_DISTANCE: p->cfg.clpd = MIN2(p->cfg.clpd, first); @@ -2637,7 +3066,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0, rid = 0; i < pc->result_nr; ++i) { p->cfg.io[i].hw = rid; - p->cfg.io[i].id_vp = i; + p->cfg.io[i].id = i; for (c = 0; c < 4; ++c) { int n = i * 4 + c; @@ -2669,14 +3098,12 @@ nv50_program_tx_prep(struct nv50_pc *pc) * the lower hardware IDs, so sort them: */ for (i = 0; i < pc->attr_nr; i++) { - if (pc->interp_mode[i] == INTERP_FLAT) { - p->cfg.io[m].id_vp = i + base; - p->cfg.io[m++].id_fp = i; - } else { + if (pc->interp_mode[i] == INTERP_FLAT) + p->cfg.io[m++].id = i; + else { if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) p->cfg.io[n].linear = TRUE; - p->cfg.io[n].id_vp = i + base; - p->cfg.io[n++].id_fp = i; + p->cfg.io[n++].id = i; } } @@ -2688,7 +3115,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (n = 0; n < pc->attr_nr; ++n) { p->cfg.io[n].hw = rid = aid; - i = p->cfg.io[n].id_fp; + i = p->cfg.io[n].id; if (p->info.input_semantic_name[n] == TGSI_SEMANTIC_FACE) { @@ -2728,8 +3155,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0; i < pc->attr_nr; i++) { ubyte si, sn; - sn = p->info.input_semantic_name[p->cfg.io[i].id_fp]; - si = p->info.input_semantic_index[p->cfg.io[i].id_fp]; + sn = p->info.input_semantic_name[p->cfg.io[i].id]; + si = p->info.input_semantic_index[p->cfg.io[i].id]; if (sn == TGSI_SEMANTIC_COLOR) { p->cfg.two_side[si] = p->cfg.io[i]; @@ -2820,6 +3247,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; @@ -2894,27 +3323,9 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) } static void -nv50_fp_move_results(struct nv50_pc *pc) -{ - struct nv50_reg reg; - unsigned i; - - ctor_reg(®, P_TEMP, -1, -1); - - for (i = 0; i < pc->result_nr * 4; ++i) { - if (pc->result[i].rhw < 0 || pc->result[i].hw < 0) - continue; - if (pc->result[i].rhw != pc->result[i].hw) { - reg.hw = pc->result[i].rhw; - emit_mov(pc, ®, &pc->result[i]); - } - } -} - -static void nv50_program_fixup_insns(struct nv50_pc *pc) { - struct nv50_program_exec *e, *prev = NULL, **bra_list; + struct nv50_program_exec *e, **bra_list; unsigned i, n, pos; bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *)); @@ -2934,27 +3345,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc) for (i = 0; i < n; ++i) if (bra_list[i]->param.index >= pos) bra_list[i]->param.index += 1; + for (i = 0; i < pc->insn_nr; ++i) + if (pc->insn_pos[i] >= pos) + pc->insn_pos[i] += 1; convert_to_long(pc, e); ++pos; } - if (e->next) - prev = e; } - assert(!is_immd(pc->p->exec_head)); - assert(!is_immd(pc->p->exec_tail)); + FREE(bra_list); - /* last instruction must be long so it can have the end bit set */ - if (!is_long(pc->p->exec_tail)) { - convert_to_long(pc, pc->p->exec_tail); - if (prev) - convert_to_long(pc, prev); - } - assert(!(pc->p->exec_tail->inst[1] & 2)); - /* set the end-bit */ - pc->p->exec_tail->inst[1] |= 1; + if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL]) + return; - FREE(bra_list); + /* fill in CALL offsets */ + for (e = pc->p->exec_head; e; e = e->next) { + if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2) + e->param.index = pc->insn_pos[e->param.index]; + } } static boolean @@ -2976,19 +3384,20 @@ nv50_program_tx(struct nv50_program *p) if (ret == FALSE) goto out_cleanup; + pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned)); + tgsi_parse_init(&parse, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { const union tgsi_full_token *tok = &parse.FullToken; - /* don't allow half insn/immd on first and last instruction */ + /* previously allow32 was FALSE for first & last instruction */ pc->allow32 = TRUE; - if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr) - pc->allow32 = FALSE; tgsi_parse_token(&parse); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: + pc->insn_pos[pc->insn_cur] = pc->p->exec_size; ++pc->insn_cur; ret = nv50_tgsi_insn(pc, tok); if (ret == FALSE) @@ -2999,9 +3408,6 @@ nv50_program_tx(struct nv50_program *p) } } - if (pc->p->type == PIPE_SHADER_FRAGMENT) - nv50_fp_move_results(pc); - nv50_program_fixup_insns(pc); p->param_nr = pc->param_nr * 4; @@ -3025,7 +3431,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) } static void -nv50_program_upload_data(struct nv50_context *nv50, float *map, +nv50_program_upload_data(struct nv50_context *nv50, uint32_t *map, unsigned start, unsigned count, unsigned cbuf) { struct nouveau_channel *chan = nv50->screen->base.channel; @@ -3073,8 +3479,8 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) if (p->param_nr) { unsigned cb; - float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], - PIPE_BUFFER_USAGE_CPU_READ); + uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], + PIPE_BUFFER_USAGE_CPU_READ); if (p->type == PIPE_SHADER_VERTEX) cb = NV50_CB_PVP; @@ -3173,7 +3579,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(13, 2); + so = so_new(5, 8, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3209,7 +3615,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(64, 2); + so = so_new(6, 7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3219,7 +3625,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); + so_method(so, tesla, NV50TCL_FP_CONTROL, 1); so_data (so, p->cfg.regs[2]); so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); so_data (so, p->cfg.regs[3]); @@ -3236,15 +3642,15 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; - /* XXX: This can't work correctly in all cases yet, we either - * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has - * to be per FP input instead of per VP output + /* XXX: this might not work correctly in all cases yet - we'll + * just assume that an FP generic input that is not written in + * the VP is PointCoord. */ memset(pntc, 0, 8 * sizeof(uint32_t)); for (i = 0; i < fp->cfg.io_nr; i++) { uint8_t sn, si; - uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp; + uint8_t j, k = fp->cfg.io[i].id; unsigned n = popcnt4(fp->cfg.io[i].mask); if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { @@ -3252,10 +3658,16 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) continue; } - sn = vp->info.input_semantic_name[j]; - si = vp->info.input_semantic_index[j]; + for (j = 0; j < vp->info.num_outputs; ++j) { + sn = vp->info.output_semantic_name[j]; + si = vp->info.output_semantic_index[j]; + + if (sn == fp->info.input_semantic_name[k] && + si == fp->info.input_semantic_index[k]) + break; + } - if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) { + if (j < vp->info.num_outputs) { ubyte mode = nv50->rasterizer->pipe.sprite_coord_mode[si]; @@ -3343,20 +3755,24 @@ nv50_linkage_validate(struct nv50_context *nv50) reg[0] += m - 4; /* adjust FFC0 id */ reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ - i = 0; - if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) - i = 1; - for (; i < fp->cfg.io_nr; i++) { - ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp]; - ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp]; - - n = fp->cfg.io[i].id_vp; - if (n >= vp->cfg.io_nr || - vp->info.output_semantic_name[n] != sn || - vp->info.output_semantic_index[n] != si) - vpo = &dummy; - else - vpo = &vp->cfg.io[n]; + for (i = 0; i < fp->cfg.io_nr; i++) { + ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id]; + ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id]; + + /* position must be mapped first */ + assert(i == 0 || sn != TGSI_SEMANTIC_POSITION); + + /* maybe even remove these from cfg.io */ + if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE) + continue; + + /* VP outputs and vp->cfg.io are in the same order */ + for (n = 0; n < vp->info.num_outputs; ++n) { + if (vp->info.output_semantic_name[n] == sn && + vp->info.output_semantic_index[n] == si) + break; + } + vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy; m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); } @@ -3367,7 +3783,7 @@ nv50_linkage_validate(struct nv50_context *nv50) } /* now fill the stateobj */ - so = so_new(64, 0); + so = so_new(6, 58, 0); n = (m + 3) / 4; so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); @@ -3381,7 +3797,7 @@ nv50_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); so_data (so, reg[4]); - so_method(so, tesla, 0x1540, 4); + so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4); so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index d78dee083f1..461fec1d89c 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -17,8 +17,7 @@ struct nv50_program_exec { struct nv50_sreg4 { uint8_t hw; - uint8_t id_vp; - uint8_t id_fp; + uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */ uint8_t mask; boolean linear; @@ -38,7 +37,7 @@ struct nv50_program { struct nouveau_bo *bo; - float *immd; + uint32_t *immd; unsigned immd_nr; unsigned param_nr; @@ -59,6 +58,7 @@ struct nv50_program { /* VP only */ uint8_t clpd, clpd_nr; uint8_t psiz; + uint8_t edgeflag_in; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 5305c93d59a..5d9e18218ae 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_query *q = nv50_query(pq); - BEGIN_RING(chan, tesla, 0x1530, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1); OUT_RING (chan, 1); - BEGIN_RING(chan, tesla, 0x1514, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1); OUT_RING (chan, 1); q->ready = FALSE; @@ -93,7 +93,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_query *q = nv50_query(pq); - WAIT_RING (chan, 5); + MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */ BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4); OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index e1b2f11239a..28e2b35deaa 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -76,6 +76,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_UNORM: @@ -97,6 +98,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return 32; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 32; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 64; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: @@ -122,10 +127,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; + return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case NOUVEAU_CAP_HW_VTXBUF: @@ -162,6 +165,21 @@ static void nv50_screen_destroy(struct pipe_screen *pscreen) { struct nv50_screen *screen = nv50_screen(pscreen); + unsigned i; + + for (i = 0; i < 2; i++) { + if (screen->constbuf_parm[i]) + nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); + } + + if (screen->constbuf_misc[0]) + nouveau_bo_ref(NULL, &screen->constbuf_misc[0]); + if (screen->tic) + nouveau_bo_ref(NULL, &screen->tic); + if (screen->tsc) + nouveau_bo_ref(NULL, &screen->tsc); + if (screen->static_init) + so_ref(NULL, &screen->static_init); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->tesla); @@ -171,6 +189,28 @@ nv50_screen_destroy(struct pipe_screen *pscreen) FREE(screen); } +static int +nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, + unsigned usage) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *ctx = screen->cur_ctx; + + if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX)) + return 0; + + /* Our vtxbuf got mapped, it can no longer be considered part of current + * state, remove it to avoid emitting reloc markers. + */ + if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf, + nouveau_bo(pb))) { + so_ref(NULL, &ctx->state.vtxbuf); + ctx->dirty |= NV50_NEW_ARRAYS; + } + + return 0; +} + struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { @@ -198,6 +238,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->is_format_supported = nv50_screen_is_format_supported; + screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map; nv50_screen_init_miptree_functions(pscreen); nv50_transfer_init_screen_functions(pscreen); @@ -210,7 +251,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->m2mf, 1); /* 2D object */ ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d); @@ -219,7 +259,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->eng2d, 2); /* 3D object */ switch (chipset & 0xf0) { @@ -228,8 +267,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) break; case 0x80: case 0x90: - /* this stupid name should be corrected. */ - tesla_class = NV54TCL; + tesla_class = NV84TCL; break; case 0xa0: switch (chipset) { @@ -239,7 +277,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) tesla_class = NVA0TCL; break; default: - tesla_class = 0x8597; + tesla_class = NVA8TCL; break; } break; @@ -256,7 +294,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->tesla, 3); /* Sync notifier */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); @@ -267,7 +304,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static M2MF init */ - so = so_new(32, 0); + so = so_new(1, 3, 0); so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -276,7 +313,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref (NULL, &so); /* Static 2D init */ - so = so_new(64, 0); + so = so_new(4, 7, 0); so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -284,7 +321,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, chan->vram->handle); so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); so_data (so, NV50_2D_OPERATION_SRCCOPY); - so_method(so, screen->eng2d, 0x0290, 1); + so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1); so_data (so, 0); so_method(so, screen->eng2d, 0x0888, 1); so_data (so, 1); @@ -292,33 +329,35 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(256, 20); + so = so_new(40, 84, 20); - so_method(so, screen->tesla, 0x1558, 1); - so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); + so_data (so, NV50TCL_COND_MODE_ALWAYS); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), - NV50TCL_DMA_UNK0__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11); + for (i = 0; i < 11; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), - NV50TCL_DMA_UNK1__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0), + NV50TCL_DMA_COLOR__SIZE); + for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, 0x121c, 1); + so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1); so_data (so, 1); /* activate all 32 lanes (threads) in a warp */ - so_method(so, screen->tesla, 0x19a0, 1); + so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1); so_data (so, 0x2); so_method(so, screen->tesla, 0x1400, 1); so_data (so, 0xf); - so_method(so, screen->tesla, 0x13bc, 1); + /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); + so_data (so, 0x54); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); so_data (so, 0x54); /* origin is top left (set to 1 for bottom left) */ - so_method(so, screen->tesla, 0x13ac, 1); + so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, 8); @@ -354,7 +393,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) // B = buffer ID (maybe more than 1 byte) // N = CB index used in shader instruction // P = program type (0 = VP, 2 = GP, 3 = FP) - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x000BBNP1); */ @@ -387,7 +426,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic); + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32, + &screen->tic); if (ret) { nv50_screen_destroy(pscreen); return NULL; @@ -398,9 +438,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x000007ff); + so_data (so, PIPE_SHADER_TYPES * 32 - 1); - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc); + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32, + &screen->tsc); if (ret) { nv50_screen_destroy(pscreen); return NULL; @@ -411,27 +452,31 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00000000); + so_data (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */ /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { - so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2); + so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_data (so, 0x000000ff); so_data (so, 0xffffffff); } - so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2); so_data (so, fui(0.0)); so_data (so, fui(1.0)); - so_method(so, screen->tesla, 0x1234, 1); + /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */ + so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1); so_data (so, 1); /* activate first scissor rectangle */ - so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1); + so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1); so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, 1); /* default edgeflag to TRUE */ + so_emit(chan, so); so_ref (so, &screen->static_init); so_ref (NULL, &so); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 61e24a5b571..a038a4e3c2a 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -2,6 +2,7 @@ #define __NV50_SCREEN_H__ #include "nouveau/nouveau_screen.h" +#include "nv50_context.h" struct nv50_screen { struct nouveau_screen base; @@ -9,6 +10,7 @@ struct nv50_screen { struct nouveau_winsys *nvws; unsigned cur_pctx; + struct nv50_context *cur_ctx; struct nouveau_grobj *tesla; struct nouveau_grobj *eng2d; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index ffaa5e29d1c..1f67df814b1 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -35,7 +35,7 @@ static void * nv50_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(5, 24, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); unsigned cmask = 0, i; @@ -146,7 +146,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, (wrap_mode(cso->wrap_r) << 6)); switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; break; @@ -157,7 +156,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, } switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MINF_LINEAR; break; @@ -196,8 +194,9 @@ nv50_sampler_state_create(struct pipe_context *pipe, } if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - tsc[0] |= (1 << 8); - tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7); + /* XXX: must be deactivated for non-shadow textures */ + tsc[0] |= (1 << 9); + tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10; } limit = CLAMP(cso->lod_bias, -16.0, 15.0); @@ -215,46 +214,71 @@ nv50_sampler_state_create(struct pipe_context *pipe, return (void *)sso; } -static void -nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +static INLINE void +nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type, + unsigned nr, void **sampler) { struct nv50_context *nv50 = nv50_context(pipe); - int i; - nv50->sampler_nr = nr; - for (i = 0; i < nv50->sampler_nr; i++) - nv50->sampler[i] = sampler[i]; + memcpy(nv50->sampler[type], sampler, nr * sizeof(void *)); + nv50->sampler_nr[type] = nr; nv50->dirty |= NV50_NEW_SAMPLER; } static void +nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nv50_sampler_state_bind(pipe, PIPE_SHADER_VERTEX, nr, s); +} + +static void +nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nv50_sampler_state_bind(pipe, PIPE_SHADER_FRAGMENT, nr, s); +} + +static void nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { FREE(hwcso); } -static void -nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, - struct pipe_texture **pt) +static INLINE void +nv50_set_sampler_texture(struct pipe_context *pipe, unsigned type, + unsigned nr, struct pipe_texture **pt) { struct nv50_context *nv50 = nv50_context(pipe); - int i; + unsigned i; for (i = 0; i < nr; i++) - pipe_texture_reference((void *)&nv50->miptree[i], pt[i]); - for (i = nr; i < nv50->miptree_nr; i++) - pipe_texture_reference((void *)&nv50->miptree[i], NULL); + pipe_texture_reference((void *)&nv50->miptree[type][i], pt[i]); + for (i = nr; i < nv50->miptree_nr[type]; i++) + pipe_texture_reference((void *)&nv50->miptree[type][i], NULL); - nv50->miptree_nr = nr; + nv50->miptree_nr[type] = nr; nv50->dirty |= NV50_NEW_TEXTURE; } +static void +nv50_set_vp_sampler_textures(struct pipe_context *pipe, + unsigned nr, struct pipe_texture **pt) +{ + nv50_set_sampler_texture(pipe, PIPE_SHADER_VERTEX, nr, pt); +} + +static void +nv50_set_fp_sampler_textures(struct pipe_context *pipe, + unsigned nr, struct pipe_texture **pt) +{ + nv50_set_sampler_texture(pipe, PIPE_SHADER_FRAGMENT, nr, pt); +} + static void * nv50_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(15, 21, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_rasterizer_stateobj *rso = CALLOC_STRUCT(nv50_rasterizer_stateobj); @@ -269,7 +293,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : NV50TCL_SHADE_MODEL_SMOOTH); - so_method(so, tesla, 0x1684, 1); + so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1); so_data (so, cso->flatshade_first ? 0 : 1); so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); @@ -366,7 +390,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); so_data (so, fui(cso->offset_scale)); so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); - so_data (so, fui(cso->offset_units)); + so_data (so, fui(cso->offset_units * 2.0f)); } rso->pipe = *cso; @@ -399,7 +423,7 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(8, 22, 0); so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); so_data (so, cso->depth.writemask ? 1 : 0); @@ -413,9 +437,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } - /* XXX: keep hex values until header is updated (names reversed) */ if (cso->stencil[0].enabled) { - so_method(so, tesla, 0x1380, 8); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); @@ -425,23 +448,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, cso->stencil[0].writemask); so_data (so, cso->stencil[0].valuemask); } else { - so_method(so, tesla, 0x1380, 1); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, tesla, 0x1594, 5); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, tesla, 0x0f54, 3); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); so_data (so, cso->stencil[1].ref_value); so_data (so, cso->stencil[1].writemask); so_data (so, cso->stencil[1].valuemask); } else { - so_method(so, tesla, 0x1594, 1); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } @@ -648,9 +671,11 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.delete_blend_state = nv50_blend_state_delete; nv50->pipe.create_sampler_state = nv50_sampler_state_create; - nv50->pipe.bind_sampler_states = nv50_sampler_state_bind; nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; - nv50->pipe.set_sampler_textures = nv50_set_sampler_texture; + nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind; + nv50->pipe.bind_vertex_sampler_states = nv50_vp_sampler_state_bind; + nv50->pipe.set_fragment_sampler_textures = nv50_set_fp_sampler_textures; + nv50->pipe.set_vertex_sampler_textures = nv50_set_vp_sampler_textures; nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 799d2758fee..f83232f43cf 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -33,7 +33,7 @@ static void nv50_state_validate_fb(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(128, 18); + struct nouveau_stateobj *so = so_new(32, 79, 18); struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i, w, h, gw = 0; @@ -41,7 +41,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) * FP result 0 always goes to RT[0], bits 4 - 6 are ignored. * Ambiguous assignment results in no rendering (no DATA_ERROR). */ - so_method(so, tesla, 0x121c, 1); + so_method(so, tesla, NV50TCL_RT_CONTROL, 1); so_data (so, fb->nr_cbufs | (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) | (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25)); @@ -87,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1224, 1); + so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1); so_data (so, 1); } @@ -124,22 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 1); so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3); so_data (so, fb->zsbuf->width); so_data (so, fb->zsbuf->height); so_data (so, 0x00010001); } else { - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 0); } - so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); so_data (so, w << 16); so_data (so, h << 16); /* set window lower left corner */ - so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2); + so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2); so_data (so, 0); so_data (so, 0); /* set screen scissor rectangle */ @@ -156,11 +156,38 @@ nv50_state_validate_fb(struct nv50_context *nv50) } static void +nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so, + unsigned p) +{ + struct nouveau_grobj *eng2d = nv50->screen->eng2d; + unsigned i, j, dw = nv50->sampler_nr[p] * 8; + + if (!dw) + return; + nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM, + p * (32 * 8 * 4), dw * 4); + + so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), dw); + + for (i = 0; i < nv50->sampler_nr[p]; ++i) { + if (nv50->sampler[p][i]) + so_datap(so, nv50->sampler[p][i]->tsc, 8); + else { + for (j = 0; j < 8; ++j) /* you get punished */ + so_data(so, 0); /* ... for leaving holes */ + } + } +} + +static void nv50_state_emit(struct nv50_context *nv50) { struct nv50_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; + /* I don't want to copy headers from the winsys. */ + screen->cur_ctx = nv50; + if (nv50->pctx_id != screen->cur_pctx) { if (nv50->state.fb) nv50->state.dirty |= NV50_NEW_FRAMEBUFFER; @@ -201,7 +228,8 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); - if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | + NV50_NEW_RASTERIZER)) so_emit(chan, nv50->state.programs); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); @@ -245,7 +273,6 @@ boolean nv50_state_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_stateobj *so; unsigned i; @@ -264,14 +291,15 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); - if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | + NV50_NEW_RASTERIZER)) nv50_linkage_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { - so = so_new(5, 0); + so = so_new(1, 4, 0); so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); so_data (so, fui(nv50->blend_colour.color[0])); so_data (so, fui(nv50->blend_colour.color[1])); @@ -282,10 +310,10 @@ nv50_state_validate(struct nv50_context *nv50) } if (nv50->dirty & NV50_NEW_STIPPLE) { - so = so_new(33, 0); + so = so_new(1, 32, 0); so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) - so_data(so, nv50->stipple.stipple[i]); + so_data(so, util_bswap32(nv50->stipple.stipple[i])); so_ref(so, &nv50->state.stipple); so_ref(NULL, &so); } @@ -299,8 +327,8 @@ nv50_state_validate(struct nv50_context *nv50) goto scissor_uptodate; nv50->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); - so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2); + so = so_new(1, 2, 0); + so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); if (nv50->state.scissor_enabled) { so_data(so, (s->maxx << 16) | s->minx); so_data(so, (s->maxy << 16) | s->miny); @@ -328,13 +356,13 @@ scissor_uptodate: goto viewport_uptodate; nv50->state.viewport_bypass = bypass; - so = so_new(14, 0); + so = so_new(5, 9, 0); if (!bypass) { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); so_data (so, fui(nv50->viewport.translate[0])); so_data (so, fui(nv50->viewport.translate[1])); so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); so_data (so, fui(nv50->viewport.scale[0])); so_data (so, fui(nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); @@ -367,22 +395,17 @@ scissor_uptodate: viewport_uptodate: if (nv50->dirty & NV50_NEW_SAMPLER) { - unsigned i; + unsigned nr = 0; - so = so_new(nv50->sampler_nr * 9 + 23 + 4, 2); + for (i = 0; i < PIPE_SHADER_TYPES; ++i) + nr += nv50->sampler_nr[i]; - nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM, - nv50->sampler_nr * 8 * 4); + so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES + + nr * 8, PIPE_SHADER_TYPES * 2); - for (i = 0; i < nv50->sampler_nr; i++) { - if (!nv50->sampler[i]) - continue; - so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8); - so_datap (so, nv50->sampler[i]->tsc, 8); - } + nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); + nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); - so_method(so, tesla, 0x1440, 1); /* sync SIFC */ - so_data (so, 0); so_method(so, tesla, 0x1334, 1); /* flush TSC */ so_data (so, 0); @@ -405,10 +428,13 @@ viewport_uptodate: void nv50_so_init_sifc(struct nv50_context *nv50, struct nouveau_stateobj *so, - struct nouveau_bo *bo, unsigned reloc, unsigned size) + struct nouveau_bo *bo, unsigned reloc, + unsigned offset, unsigned size) { struct nouveau_grobj *eng2d = nv50->screen->eng2d; + reloc |= NOUVEAU_BO_WR; + so_method(so, eng2d, NV50_2D_DST_FORMAT, 2); so_data (so, NV50_2D_DST_FORMAT_R8_UNORM); so_data (so, 1); @@ -416,9 +442,9 @@ void nv50_so_init_sifc(struct nv50_context *nv50, so_data (so, 262144); so_data (so, 65536); so_data (so, 1); - so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2); + so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0); + so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); so_data (so, 0); so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM); so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 6bf6f773b0c..6378132979e 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -62,6 +62,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) return 1; if (!bo->tile_flags) { + MARK_RING (chan, 9, 2); /* flush on lack of space or relocs */ BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); @@ -72,6 +73,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) OUT_RELOCh(chan, bo, ps->offset, flags); OUT_RELOCl(chan, bo, ps->offset, flags); } else { + MARK_RING (chan, 11, 2); /* flush on lack of space or relocs */ BEGIN_RING(chan, eng2d, mthd, 5); OUT_RING (chan, format); OUT_RING (chan, 0); @@ -174,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, if (ret) return; - BEGIN_RING(chan, eng2d, 0x0580, 3); - OUT_RING (chan, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3); + OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES); OUT_RING (chan, format); OUT_RING (chan, value); - BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4); OUT_RING (chan, destx); OUT_RING (chan, desty); OUT_RING (chan, width); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 2813f544770..bef548b7286 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -25,6 +25,8 @@ #include "nouveau/nouveau_stateobj.h" +#include "util/u_format.h" + #define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f) \ { \ PIPE_FORMAT_##pf, \ @@ -68,6 +70,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] = _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5), _MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8), + _MIXED(S8Z24_UNORM, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24), _(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16), _(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16), @@ -85,10 +88,11 @@ static const struct nv50_texture_format nv50_tex_format_list[] = static int nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, - struct nv50_miptree *mt, int unit) + struct nv50_miptree *mt, int unit, unsigned p) { unsigned i; uint32_t mode; + const struct util_format_description *desc; for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++) if (nv50_tex_format_list[i].pf == mt->base.base.format) @@ -96,7 +100,7 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, if (i == NV50_TEX_FORMAT_LIST_SIZE) return 1; - if (nv50->sampler[unit]->normalized) + if (nv50->sampler[p][unit]->normalized) mode = 0x50001000 | (1 << 31); else { mode = 0x50001000 | (7 << 14); @@ -106,7 +110,10 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, mode |= ((mt->base.bo->tile_mode & 0x0f) << 22) | ((mt->base.bo->tile_mode & 0xf0) << 21); - if (pf_type(mt->base.base.format) == PIPE_FORMAT_TYPE_SRGB) + desc = util_format_description(mt->base.base.format); + assert(desc); + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) mode |= 0x0400; switch (mt->base.base.target) { @@ -131,57 +138,89 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, NOUVEAU_BO_RD, 0, 0); so_data (so, mode); so_data (so, 0x00300000); - so_data (so, mt->base.base.width[0] | (1 << 31)); + so_data (so, mt->base.base.width0 | (1 << 31)); so_data (so, (mt->base.base.last_level << 28) | - (mt->base.base.depth[0] << 16) | mt->base.base.height[0]); + (mt->base.base.depth0 << 16) | mt->base.base.height0); so_data (so, 0x03000000); so_data (so, mt->base.base.last_level << 4); return 0; } -void -nv50_tex_validate(struct nv50_context *nv50) +#ifndef NV50TCL_BIND_TIC +#define NV50TCL_BIND_TIC(n) (0x1448 + 8 * n) +#endif + +static boolean +nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so, + unsigned p) { + static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 }; + struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so; - unsigned i, unit, push; - - push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6; - so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr * 2 + 2); + unsigned unit, j, p_hw = p_remap[p]; nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM, - nv50->miptree_nr * 8 * 4); - - for (i = 0, unit = 0; unit < nv50->miptree_nr; ++unit) { - struct nv50_miptree *mt = nv50->miptree[unit]; + p * (32 * 8 * 4), nv50->miptree_nr[p] * 8 * 4); - if (!mt) - continue; + for (unit = 0; unit < nv50->miptree_nr[p]; ++unit) { + struct nv50_miptree *mt = nv50->miptree[p][unit]; so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8); - if (nv50_tex_construct(nv50, so, mt, unit)) { - NOUVEAU_ERR("failed tex validate\n"); - so_ref(NULL, &so); - return; + if (mt) { + if (nv50_tex_construct(nv50, so, mt, unit, p)) + return FALSE; + /* Set TEX insn $t src binding $unit in program type p + * to TIC, TSC entry (32 * p + unit), mark valid (1). + */ + so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1); + so_data (so, ((32 * p + unit) << 9) | (unit << 1) | 1); + } else { + for (j = 0; j < 8; ++j) + so_data(so, 0); + so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1); + so_data (so, (unit << 1) | 0); } + } + + for (; unit < nv50->state.miptree_nr[p]; unit++) { + /* Make other bindings invalid. */ + so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1); + so_data (so, (unit << 1) | 0); + } - so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1); - so_data (so, (i++ << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) | - (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | - NV50TCL_SET_SAMPLER_TEX_VALID); + nv50->state.miptree_nr[p] = nv50->miptree_nr[p]; + return TRUE; +} + +void +nv50_tex_validate(struct nv50_context *nv50) +{ + struct nouveau_stateobj *so; + struct nouveau_grobj *tesla = nv50->screen->tesla; + unsigned p, start, push, nrlc; + + for (nrlc = 0, start = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) { + start += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); + push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); + nrlc += nv50->miptree_nr[p]; } + start = start * 2 + 4 * PIPE_SHADER_TYPES + 2; + push = push * 9 + 19 * PIPE_SHADER_TYPES + 2; + nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES; + + so = so_new(start, push, nrlc); - for (; unit < nv50->state.miptree_nr; unit++) { - so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1); - so_data (so, - (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0); + if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE || + nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) { + so_ref(NULL, &so); + + NOUVEAU_ERR("failed tex validate\n"); + return; } /* not sure if the following really do what I think: */ - so_method(so, tesla, 0x1440, 1); /* sync SIFC */ - so_data (so, 0); so_method(so, tesla, 0x1330, 1); /* flush TIC */ so_data (so, 0); so_method(so, tesla, 0x1338, 1); /* flush texture caches */ @@ -189,6 +228,4 @@ nv50_tex_validate(struct nv50_context *nv50) so_ref(so, &nv50->state.tic_upload); so_ref(NULL, &so); - nv50->state.miptree_nr = nv50->miptree_nr; } - diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h index d531e611327..b870302019a 100644 --- a/src/gallium/drivers/nv50/nv50_texture.h +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -82,6 +82,7 @@ #define NV50TIC_0_0_FMT_RGTC1 0x00000027 #define NV50TIC_0_0_FMT_RGTC2 0x00000028 #define NV50TIC_0_0_FMT_24_8 0x00000029 +#define NV50TIC_0_0_FMT_8_24 0x0000002a #define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f #define NV50TIC_0_0_FMT_32_8 0x00000030 diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index ea61357aaa6..a2f1db2914c 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -1,6 +1,8 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" +#include "util/u_math.h" #include "nv50_context.h" @@ -15,16 +17,19 @@ struct nv50_transfer { int level_depth; int level_x; int level_y; + int level_z; + unsigned nblocksx; + unsigned nblocksy; }; static void nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo, unsigned src_offset, int src_pitch, unsigned src_tile_mode, - int sx, int sy, int sw, int sh, int sd, + int sx, int sy, int sz, int sw, int sh, int sd, struct nouveau_bo *dst_bo, unsigned dst_offset, int dst_pitch, unsigned dst_tile_mode, - int dx, int dy, int dw, int dh, int dd, + int dx, int dy, int dz, int dw, int dh, int dd, int cpp, int width, int height, unsigned src_reloc, unsigned dst_reloc) { @@ -42,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); OUT_RING (chan, src_pitch); src_offset += (sy * src_pitch) + (sx * cpp); } else { @@ -53,7 +58,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, sw * cpp); OUT_RING (chan, sh); OUT_RING (chan, sd); - OUT_RING (chan, 0); + OUT_RING (chan, sz); /* copying only 1 zslice per call */ } if (!dst_bo->tile_flags) { @@ -61,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); OUT_RING (chan, dst_pitch); dst_offset += (dy * dst_pitch) + (dx * cpp); } else { @@ -72,19 +77,19 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, dw * cpp); OUT_RING (chan, dh); OUT_RING (chan, dd); - OUT_RING (chan, 0); + OUT_RING (chan, dz); /* copying only 1 zslice per call */ } while (height) { int line_count = height > 2047 ? 2047 : height; - WAIT_RING (chan, 15); + MARK_RING (chan, 15, 4); /* flush on lack of space or relocs */ BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2); OUT_RELOCh(chan, src_bo, src_offset, src_reloc); OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); + NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); if (src_bo->tile_flags) { @@ -102,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, dst_offset += (line_count * dst_pitch); } BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); + NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); OUT_RING (chan, width * cpp); OUT_RING (chan, line_count); OUT_RING (chan, 0x00000101); @@ -115,20 +120,6 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, } } -static INLINE unsigned -get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny) -{ - unsigned tile_h = get_tile_height(tile_mode); - unsigned tile_d = get_tile_depth(tile_mode); - - /* pitch_2d == to next slice within this volume-tile */ - /* pitch_3d == to next slice in next 2D array of blocks */ - unsigned pitch_2d = tile_h * 64; - unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch; - - return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d; -} - static struct pipe_transfer * nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, @@ -150,56 +141,43 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; + tx->nblocksx = util_format_get_nblocksx(pt->format, u_minify(pt->width0, level)); + tx->nblocksy = util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)); tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - if (!pt->nblocksx[level]) { - tx->base.nblocksx = pf_get_nblocksx(&pt->block, - pt->width[level]); - tx->base.nblocksy = pf_get_nblocksy(&pt->block, - pt->height[level]); - } else { - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; - } - tx->base.stride = tx->base.nblocksx * pt->block.size; + tx->base.stride = tx->nblocksx * util_format_get_blocksize(pt->format); tx->base.usage = usage; tx->level_pitch = lvl->pitch; - tx->level_width = mt->base.base.width[level]; - tx->level_height = mt->base.base.height[level]; - tx->level_depth = mt->base.base.depth[level]; + tx->level_width = u_minify(mt->base.base.width0, level); + tx->level_height = u_minify(mt->base.base.height0, level); + tx->level_depth = u_minify(mt->base.base.depth0, level); tx->level_offset = lvl->image_offset[image]; tx->level_tiling = lvl->tile_mode; - tx->level_x = pf_get_nblocksx(&tx->base.block, x); - tx->level_y = pf_get_nblocksy(&tx->base.block, y); + tx->level_z = zslice; + tx->level_x = util_format_get_nblocksx(pt->format, x); + tx->level_y = util_format_get_nblocksy(pt->format, y); ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - tx->base.nblocksy * tx->base.stride, &tx->bo); + tx->nblocksy * tx->base.stride, &tx->bo); if (ret) { FREE(tx); return NULL; } - if (pt->target == PIPE_TEXTURE_3D) - tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice, - lvl->pitch, - tx->base.nblocksy); - if (usage & PIPE_TRANSFER_READ) { - nx = pf_get_nblocksx(&tx->base.block, tx->base.width); - ny = pf_get_nblocksy(&tx->base.block, tx->base.height); + nx = util_format_get_nblocksx(pt->format, tx->base.width); + ny = util_format_get_nblocksy(pt->format, tx->base.height); nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, - x, y, - tx->base.nblocksx, tx->base.nblocksy, + x, y, zslice, + tx->nblocksx, tx->nblocksy, tx->level_depth, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, - 0, 0, - tx->base.nblocksx, tx->base.nblocksy, 1, - tx->base.block.size, nx, ny, + 0, 0, 0, + tx->nblocksx, tx->nblocksy, 1, + util_format_get_blocksize(pt->format), nx, ny, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART, NOUVEAU_BO_GART); } @@ -212,23 +190,24 @@ nv50_transfer_del(struct pipe_transfer *ptx) { struct nv50_transfer *tx = (struct nv50_transfer *)ptx; struct nv50_miptree *mt = nv50_miptree(ptx->texture); + struct pipe_texture *pt = ptx->texture; - unsigned nx = pf_get_nblocksx(&tx->base.block, tx->base.width); - unsigned ny = pf_get_nblocksy(&tx->base.block, tx->base.height); + unsigned nx = util_format_get_nblocksx(pt->format, tx->base.width); + unsigned ny = util_format_get_nblocksy(pt->format, tx->base.height); if (ptx->usage & PIPE_TRANSFER_WRITE) { - struct pipe_screen *pscreen = ptx->texture->screen; + struct pipe_screen *pscreen = pt->screen; nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, - 0, 0, - tx->base.nblocksx, tx->base.nblocksy, 1, + 0, 0, 0, + tx->nblocksx, tx->nblocksy, 1, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, - tx->level_x, tx->level_y, - tx->base.nblocksx, tx->base.nblocksy, + tx->level_x, tx->level_y, tx->level_z, + tx->nblocksx, tx->nblocksy, tx->level_depth, - tx->base.block.size, nx, ny, + util_format_get_blocksize(pt->format), nx, ny, NOUVEAU_BO_GART, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); } @@ -287,7 +266,7 @@ nv50_upload_sifc(struct nv50_context *nv50, reloc |= NOUVEAU_BO_WR; - WAIT_RING (chan, 32); + MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */ if (bo->tile_flags) { BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); @@ -312,7 +291,7 @@ nv50_upload_sifc(struct nv50_context *nv50, /* NV50_2D_OPERATION_SRCCOPY assumed already set */ - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2); + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); OUT_RING (chan, 0); OUT_RING (chan, src_format); BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); @@ -355,6 +334,6 @@ nv50_upload_sifc(struct nv50_context *nv50, src += src_pitch; } - BEGIN_RING(chan, tesla, 0x1440, 1); + BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index db54380241f..f2e510fba61 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -24,6 +24,8 @@ #include "pipe/p_state.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" + #include "nv50_context.h" static boolean @@ -62,18 +64,25 @@ nv50_prim(unsigned mode) } static INLINE uint32_t -nv50_vbo_type_to_hw(unsigned type) +nv50_vbo_type_to_hw(enum pipe_format format) { - switch (type) { - case PIPE_FORMAT_TYPE_FLOAT: + const struct util_format_description *desc; + + desc = util_format_description(format); + assert(desc); + + switch (desc->channel[0].type) { + case UTIL_FORMAT_TYPE_FLOAT: return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT; - case PIPE_FORMAT_TYPE_UNORM: - return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM; - case PIPE_FORMAT_TYPE_SNORM: - return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM; - case PIPE_FORMAT_TYPE_USCALED: + case UTIL_FORMAT_TYPE_UNSIGNED: + if (desc->channel[0].normalized) { + return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM; + } return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED; - case PIPE_FORMAT_TYPE_SSCALED: + case UTIL_FORMAT_TYPE_SIGNED: + if (desc->channel[0].normalized) { + return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM; + } return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED; /* case PIPE_FORMAT_TYPE_UINT: @@ -90,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c) { static const uint32_t hw_values[] = { 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16, 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 }; + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 }; /* we'd also have R11G11B10 and R10G10B10A2 */ @@ -120,9 +129,15 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) { uint32_t hw_type, hw_size; enum pipe_format pf = ve->src_format; - unsigned size = pf_size_x(pf) << pf_exp2(pf); + const struct util_format_description *desc; + unsigned size; + + desc = util_format_description(pf); + assert(desc); - hw_type = nv50_vbo_type_to_hw(pf_type(pf)); + size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0); + + hw_type = nv50_vbo_type_to_hw(pf); hw_size = nv50_vbo_size_to_hw(size, ve->nr_components); if (!hw_type || !hw_size) { @@ -131,13 +146,13 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) return 0x24e80000; } - if (pf_swizzle_x(pf) == 2) /* BGRA */ + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */ hw_size |= (1 << 31); /* no real swizzle bits :-( */ return (hw_type | hw_size); } -boolean +void nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { @@ -167,7 +182,9 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); - return ret; + /* XXX: not sure what to do if ret != TRUE: flush and retry? + */ + assert(ret); } static INLINE boolean @@ -183,7 +200,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, return nv50_push_elements_u08(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -193,7 +210,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -216,7 +233,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, return nv50_push_elements_u16(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -226,7 +243,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -251,7 +268,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, 0x400015e8, nr); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -260,7 +277,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, return TRUE; } -boolean +void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -302,8 +319,10 @@ nv50_draw_elements(struct pipe_context *pipe, OUT_RING (chan, 0); pipe_buffer_unmap(pscreen, indexBuffer); - - return ret; + + /* XXX: what to do if ret != TRUE? Flush and retry? + */ + assert(ret); } static INLINE boolean @@ -319,9 +338,13 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, float *v; int ret; enum pipe_format pf = ve->src_format; + const struct util_format_description *desc; - if ((pf_type(pf) != PIPE_FORMAT_TYPE_FLOAT) || - (pf_size_x(pf) << pf_exp2(pf)) != 32) + desc = util_format_description(pf); + assert(desc); + + if ((desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT) || + util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0) != 32) return FALSE; ret = nouveau_bo_map(bo, NOUVEAU_BO_RD); @@ -331,7 +354,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so = *pso; if (!so) - *pso = so = so_new(nv50->vtxelt_nr * 5, 0); + *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); switch (ve->nr_components) { case 4: @@ -353,6 +376,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so_data (so, fui(v[1])); break; case 1: + if (attrib == nv50->vertprog->cfg.edgeflag_in) { + so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, v[0] ? 1 : 0); + } so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1); so_data (so, fui(v[0])); break; @@ -382,11 +409,14 @@ nv50_vbo_validate(struct nv50_context *nv50) !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; + if (nv50->vertprog->cfg.edgeflag_in < 16) + nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ + n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); vtxattr = NULL; - vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4); - vtxfmt = so_new(n_ve + 1, 0); + vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4); + vtxfmt = so_new(1, n_ve, 0); so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); for (i = 0; i < nv50->vtxelt_nr; i++) { @@ -426,7 +456,7 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); /* vertex array limits */ - so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2); + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_reloc (vtxbuf, bo, vb->buffer->size - 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -460,6 +490,9 @@ struct nv50_vbo_emitctx unsigned nr_ve; unsigned vtx_dwords; unsigned vtx_max; + + float edgeflag; + unsigned ve_edgeflag; }; static INLINE void @@ -603,13 +636,17 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, if (nv50_map_vbufs(nv50) == FALSE) return FALSE; + emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in; + + emit->edgeflag = 0.5f; emit->nr_ve = 0; emit->vtx_dwords = 0; for (i = 0; i < nv50->vtxelt_nr; ++i) { struct pipe_vertex_element *ve; struct pipe_vertex_buffer *vb; - unsigned n, type, size; + unsigned n, size; + const struct util_format_description *desc; ve = &nv50->vtxelt[i]; vb = &nv50->vtxbuf[ve->vertex_buffer_index]; @@ -621,8 +658,11 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, emit->map[n] = nouveau_bo(vb->buffer)->map + (start * vb->stride + ve->src_offset); - type = pf_type(ve->src_format); - size = pf_size_x(ve->src_format) << pf_exp2(ve->src_format); + desc = util_format_description(ve->src_format); + assert(desc); + + size = util_format_get_component_bits( + ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); assert(ve->nr_components > 0 && ve->nr_components <= 4); @@ -664,10 +704,31 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, } emit->vtx_max = 512 / emit->vtx_dwords; + if (emit->ve_edgeflag < 16) + emit->vtx_max = 1; return TRUE; } +static INLINE void +set_edgeflag(struct nouveau_channel *chan, + struct nouveau_grobj *tesla, + struct nv50_vbo_emitctx *emit, uint32_t index) +{ + unsigned i = emit->ve_edgeflag; + + if (i < 16) { + float f = *((float *)(emit->map[i] + index * emit->stride[i])); + + if (emit->edgeflag != f) { + emit->edgeflag = f; + + BEGIN_RING(chan, tesla, 0x15e4, 1); + OUT_RING (chan, f ? 1 : 0); + } + } +} + static boolean nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) { @@ -682,6 +743,8 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx_next(chan, &emit); @@ -707,6 +770,8 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -732,6 +797,8 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -757,6 +824,8 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 121b65063f3..afddcb161fa 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -4,8 +4,8 @@ include $(TOP)/configs/current LIBNAME = r300 C_SOURCES = \ + r300_blit.c \ r300_chipset.c \ - r300_clear.c \ r300_context.c \ r300_debug.c \ r300_emit.c \ @@ -17,13 +17,13 @@ C_SOURCES = \ r300_state.c \ r300_state_derived.c \ r300_state_invariant.c \ - r300_vbo.c \ r300_vs.c \ r300_texture.c \ r300_tgsi_to_rc.c LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler + -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ + -I$(TOP)/src/gallium/winsys/drm/radeon/core COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 97989040d2e..183aa17f9b3 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -4,13 +4,18 @@ r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') env = env.Clone() # add the paths for r300compiler -env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa']) +env.Append(CPPPATH = [ + '#/src/mesa/drivers/dri/r300/compiler', + '#/src/gallium/winsys/drm/radeon/core', + '#/include', + '#/src/mesa', +]) r300 = env.ConvenienceLibrary( target = 'r300', source = [ + 'r300_blit.c', 'r300_chipset.c', - 'r300_clear.c', 'r300_context.c', 'r300_debug.c', 'r300_emit.c', diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c new file mode 100644 index 00000000000..ffe066d5369 --- /dev/null +++ b/src/gallium/drivers/r300/r300_blit.c @@ -0,0 +1,130 @@ +/* + * Copyright 2009 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_blit.h" +#include "r300_context.h" + +#include "util/u_rect.h" + +static void r300_blitter_save_states(struct r300_context* r300) +{ + util_blitter_save_blend(r300->blitter, r300->blend_state); + util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state); + util_blitter_save_rasterizer(r300->blitter, r300->rs_state); + util_blitter_save_fragment_shader(r300->blitter, r300->fs); + util_blitter_save_vertex_shader(r300->blitter, r300->vs); +} + +/* Clear currently bound buffers. */ +void r300_clear(struct pipe_context* pipe, + unsigned buffers, + const float* rgba, + double depth, + unsigned stencil) +{ + /* XXX Implement fastfill. + * + * If fastfill is enabled, a few facts should be considered: + * + * 1) Zbuffer must be micro-tiled and whole microtiles must be + * written. + * + * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be + * equal to 0. + * + * 3) RB3D_COLOR_CLEAR_VALUE is used to clear a colorbuffer and + * RB3D_COLOR_CHANNEL_MASK must be equal to 0. + * + * 4) ZB_CB_CLEAR can be used to make the ZB units help in clearing + * the colorbuffer. The color clear value is supplied through both + * RB3D_COLOR_CLEAR_VALUE and ZB_DEPTHCLEARVALUE, and the colorbuffer + * must be set in ZB_DEPTHOFFSET and ZB_DEPTHPITCH in addition to + * RB3D_COLOROFFSET and RB3D_COLORPITCH. It's obvious that the zbuffer + * will not be cleared and multiple render targets cannot be cleared + * this way either. + * + * 5) For 16-bit integer buffering, compression causes a hung with one or + * two samples and should not be used. + * + * 6) Fastfill must not be used if reading of compressed Z data is disabled + * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), + * i.e. it cannot be used to compress the zbuffer. + * (what the hell does that mean and how does it fit in clearing + * the buffers?) + * + * - Marek + */ + + struct r300_context* r300 = r300_context(pipe); + + r300_blitter_save_states(r300); + + util_blitter_clear(r300->blitter, + r300->framebuffer_state.width, + r300->framebuffer_state.height, + r300->framebuffer_state.nr_cbufs, + buffers, rgba, depth, stencil); +} + +/* Copy a block of pixels from one surface to another. */ +void r300_surface_copy(struct pipe_context* pipe, + struct pipe_surface* dst, + unsigned dstx, unsigned dsty, + struct pipe_surface* src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct r300_context* r300 = r300_context(pipe); + + /* Yeah we have to save all those states to ensure this blitter operation + * is really transparent. The states will be restored by the blitter once + * copying is done. */ + r300_blitter_save_states(r300); + util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state); + + util_blitter_save_fragment_sampler_states( + r300->blitter, r300->sampler_count, (void**)r300->sampler_states); + + util_blitter_save_fragment_sampler_textures( + r300->blitter, r300->texture_count, + (struct pipe_texture**)r300->textures); + + /* Do a copy */ + util_blitter_copy(r300->blitter, + dst, dstx, dsty, src, srcx, srcy, width, height, TRUE); +} + +/* Fill a region of a surface with a constant value. */ +void r300_surface_fill(struct pipe_context* pipe, + struct pipe_surface* dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value) +{ + struct r300_context* r300 = r300_context(pipe); + + r300_blitter_save_states(r300); + util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state); + + util_blitter_fill(r300->blitter, + dst, dstx, dsty, width, height, value); +} diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_blit.h index b8fcdf273c7..029e4f98e7d 100644 --- a/src/gallium/drivers/r300/r300_clear.h +++ b/src/gallium/drivers/r300/r300_blit.h @@ -1,5 +1,5 @@ /* - * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2008 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,10 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef R300_CLEAR_H -#define R300_CLEAR_H +#ifndef R300_BLIT_H +#define R300_BLIT_H struct pipe_context; +struct pipe_surface; void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -31,4 +32,17 @@ void r300_clear(struct pipe_context* pipe, double depth, unsigned stencil); -#endif /* R300_CLEAR_H */ +void r300_surface_copy(struct pipe_context* pipe, + struct pipe_surface* dst, + unsigned dstx, unsigned dsty, + struct pipe_surface* src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height); + +void r300_surface_fill(struct pipe_context* pipe, + struct pipe_surface* dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value); + +#endif /* R300_BLIT_H */ diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c deleted file mode 100644 index 02d6d504fc0..00000000000 --- a/src/gallium/drivers/r300/r300_clear.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_clear.h" -#include "r300_context.h" - -#include "util/u_clear.h" - -/* Clears currently bound buffers. */ -void r300_clear(struct pipe_context* pipe, - unsigned buffers, - const float* rgba, - double depth, - unsigned stencil) -{ - /* XXX we can and should do one clear if both color and zs are set */ - util_clear(pipe, &r300_context(pipe)->framebuffer_state, - buffers, rgba, depth, stencil); -} diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index ae23329b83f..d5c2d63d393 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -28,7 +28,7 @@ #include "util/u_memory.h" #include "util/u_simple_list.h" -#include "r300_clear.h" +#include "r300_blit.h" #include "r300_context.h" #include "r300_flush.h" #include "r300_query.h" @@ -36,6 +36,7 @@ #include "r300_screen.h" #include "r300_state_derived.h" #include "r300_state_invariant.h" +#include "r300_texture.h" #include "r300_winsys.h" static enum pipe_error r300_clear_hash_table(void* key, void* value, @@ -51,6 +52,8 @@ static void r300_destroy_context(struct pipe_context* context) struct r300_context* r300 = r300_context(context); struct r300_query* query, * temp; + util_blitter_destroy(r300->blitter); + util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table, NULL); util_hash_table_destroy(r300->shader_hash_table); @@ -69,6 +72,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->blend_color_state); FREE(r300->rs_block); FREE(r300->scissor_state); + FREE(r300->vertex_info); FREE(r300->viewport_state); FREE(r300); } @@ -104,7 +108,7 @@ static void r300_flush_cb(void *data) } struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct r300_winsys* r300_winsys) + struct radeon_winsys* radeon_winsys) { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); @@ -112,9 +116,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300) return NULL; - r300->winsys = r300_winsys; + r300->winsys = radeon_winsys; - r300->context.winsys = (struct pipe_winsys*)r300_winsys; + r300->context.winsys = (struct pipe_winsys*)radeon_winsys; r300->context.screen = screen; r300_init_debug(r300); @@ -122,16 +126,27 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.destroy = r300_destroy_context; r300->context.clear = r300_clear; + r300->context.surface_copy = r300_surface_copy; + r300->context.surface_fill = r300_surface_fill; - if (r300screen->caps->has_tcl) - { + if (r300screen->caps->has_tcl) { r300->context.draw_arrays = r300_draw_arrays; r300->context.draw_elements = r300_draw_elements; r300->context.draw_range_elements = r300_draw_range_elements; - } - else - { - assert(0); + } else { + r300->context.draw_arrays = r300_swtcl_draw_arrays; + r300->context.draw_elements = r300_draw_elements; + r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + + /* Create a Draw. This is used for SW TCL. */ + r300->draw = draw_create(); + /* Enable our renderer. */ + draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); + /* Enable Draw's clipping. */ + draw_set_driver_clipping(r300->draw, FALSE); + /* Force Draw to never do viewport transform, since we can do + * transform in hardware, always. */ + draw_set_viewport_state(r300->draw, &r300_viewport_identity); } r300->context.is_texture_referenced = r300_is_texture_referenced; @@ -143,18 +158,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); - /* Create a Draw. This is used for vert collation and SW TCL. */ - r300->draw = draw_create(); - /* Enable our renderer. */ - draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); - /* Disable Draw's clipping if TCL is present. */ - draw_set_driver_clipping(r300->draw, r300_screen(screen)->caps->has_tcl); - /* Force Draw to never do viewport transform, since (again) we can do - * transform in hardware, always. */ - draw_set_viewport_state(r300->draw, &r300_viewport_identity); - /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, PIPE_BUFFER_USAGE_VERTEX, 4096); @@ -173,5 +179,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw++; + + r300->blitter = util_blitter_create(&r300->context); + return &r300->context; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index f954ba7f9aa..232530b7dc4 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -25,6 +25,8 @@ #include "draw/draw_vertex.h" +#include "util/u_blitter.h" + #include "pipe/p_context.h" #include "pipe/p_inlines.h" @@ -89,14 +91,28 @@ struct r300_rs_block { }; struct r300_sampler_state { + struct pipe_sampler_state state; + uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ + + /* Min/max LOD must be clamped to [0, last_level], thus + * it's dependent on a currently bound texture */ + unsigned min_lod, max_lod; +}; + +struct r300_scissor_regs { + uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ + uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ + + /* Whether everything is culled by scissoring. */ + boolean empty_area; }; struct r300_scissor_state { - uint32_t scissor_top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ + struct r300_scissor_regs framebuffer; + struct r300_scissor_regs scissor; }; struct r300_texture_state { @@ -219,11 +235,6 @@ struct r300_texture { struct r300_vertex_info { /* Parent class */ struct vertex_info vinfo; - /* Map of vertex attributes into PVS memory for HW TCL, - * or GA memory for SW TCL. */ - int vs_tab[16]; - /* Map of rasterizer attributes from GB through RS to US. */ - int fs_tab[16]; /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ uint32_t vap_prog_stream_cntl[8]; @@ -238,9 +249,11 @@ struct r300_context { struct pipe_context context; /* The interface to the windowing system, etc. */ - struct r300_winsys* winsys; + struct radeon_winsys* winsys; /* Draw module. Used mostly for SW TCL. */ struct draw_context* draw; + /* Accelerated blit support. */ + struct blitter_context* blitter; /* Vertex buffer for rendering. */ struct pipe_buffer* vbo; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 5342488d0d4..d142fee0502 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -26,7 +26,8 @@ #include "util/u_math.h" #include "r300_reg.h" -#include "r300_winsys.h" + +#include "radeon_winsys.h" /* Yes, I know macros are ugly. However, they are much prettier than the code * that they neatly hide away, and don't have the cost of function setup,so @@ -50,11 +51,11 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ - struct r300_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0 + struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ + int cs_count = 0; #define CHECK_CS(size) \ - cs_winsys->check_cs(cs_winsys, (size)) + assert(cs_winsys->check_cs(cs_winsys, (size))) #define BEGIN_CS(size) do { \ CHECK_CS(size); \ @@ -114,6 +115,15 @@ cs_count -= 3; \ } while (0) +#define OUT_CS_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \ + "domains (%d, %d, %d)\n", \ + bo, rd, wd, flags); \ + assert(bo); \ + cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + cs_count -= 2; \ +} while (0) + #define END_CS do { \ if (VERY_VERBOSE_CS) { \ DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index b44c7bdbb3d..1dc9216a7b2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2009 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,6 +23,7 @@ /* r300_emit: Functions for emitting state. */ +#include "util/u_format.h" #include "util/u_math.h" #include "r300_context.h" @@ -40,9 +42,16 @@ void r300_emit_blend_state(struct r300_context* r300, CS_LOCALS(r300); BEGIN_CS(8); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); - OUT_CS(blend->blend_control); - OUT_CS(blend->alpha_blend_control); - OUT_CS(blend->color_channel_mask); + if (r300->framebuffer_state.nr_cbufs) { + OUT_CS(blend->blend_control); + OUT_CS(blend->alpha_blend_control); + OUT_CS(blend->color_channel_mask); + } else { + OUT_CS(0); + OUT_CS(0); + OUT_CS(0); + /* XXX also disable fastfill here once it's supported */ + } OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); END_CS; @@ -129,7 +138,9 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { - static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 }; + static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; + struct pipe_texture *tex; + switch(constant->Type) { case RC_CONSTANT_EXTERNAL: return externals->constants[constant->u.External]; @@ -137,11 +148,38 @@ static const float * get_shader_constant( case RC_CONSTANT_IMMEDIATE: return constant->u.Immediate; + case RC_CONSTANT_STATE: + switch (constant->u.State[0]) { + /* Factor for converting rectangle coords to + * normalized coords. Should only show up on non-r500. */ + case RC_STATE_R300_TEXRECT_FACTOR: + tex = &r300->textures[constant->u.State[1]]->tex; + vec[0] = 1.0 / tex->width0; + vec[1] = 1.0 / tex->height0; + break; + + /* Texture compare-fail value. */ + /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient, + * this is always (0,0,0,0). */ + case RC_STATE_SHADOW_AMBIENT: + vec[3] = 0; + break; + + default: + debug_printf("r300: Implementation error: " + "Unknown RC_CONSTANT type %d\n", constant->u.State[0]); + } + break; + default: - debug_printf("r300: Implementation error: Unhandled constant type %i\n", - constant->Type); - return zero; + debug_printf("r300: Implementation error: " + "Unhandled constant type %d\n", constant->Type); } + + /* This should either be (0, 0, 0, 1), which should be a relatively safe + * RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE + * state factors. */ + return vec; } /* Convert a normal single-precision float into the 7.16 format @@ -254,7 +292,7 @@ void r500_emit_fragment_program_code(struct r300_context* r300, BEGIN_CS(13 + ((code->inst_end + 1) * 6)); - OUT_CS_REG(R500_US_CONFIG, 0); + OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CS_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); @@ -308,7 +346,13 @@ void r300_emit_fb_state(struct r300_context* r300, int i; CS_LOCALS(r300); - BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4); + /* Shouldn't fail unless there is a bug in the state tracker. */ + assert(fb->nr_cbufs <= 4); + + BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) + + (fb->zsbuf ? 10 : 0) + 6); + + /* Flush and free renderbuffer caches. */ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); @@ -316,6 +360,10 @@ void r300_emit_fb_state(struct r300_context* r300, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + /* Set the number of colorbuffers. */ + OUT_CS_REG(R300_RB3D_CCTL, R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs)); + + /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { surf = fb->cbufs[i]; tex = (struct r300_texture*)surf->texture; @@ -333,6 +381,12 @@ void r300_emit_fb_state(struct r300_context* r300, r300_translate_out_fmt(surf->format)); } + /* Disable unused colorbuffers. */ + for (; i < 4; i++) { + OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); + } + + /* Set up a zbuffer. */ if (fb->zsbuf) { surf = fb->zsbuf; tex = (struct r300_texture*)surf->texture; @@ -360,8 +414,6 @@ static void r300_emit_query_start(struct r300_context *r300) if (!query) return; - /* XXX This will almost certainly not return good results - * for overlapping queries. */ BEGIN_CS(4); if (caps->family == CHIP_FAMILY_RV530) { OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); @@ -543,24 +595,36 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor) +static void r300_emit_scissor_regs(struct r300_context* r300, + struct r300_scissor_regs* scissor) { CS_LOCALS(r300); BEGIN_CS(3); OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->scissor_top_left); - OUT_CS(scissor->scissor_bottom_right); + OUT_CS(scissor->top_left); + OUT_CS(scissor->bottom_right); END_CS; } +void r300_emit_scissor_state(struct r300_context* r300, + struct r300_scissor_state* scissor) +{ + if (r300->rs_state->rs.scissor) { + r300_emit_scissor_regs(r300, &scissor->scissor); + } else { + r300_emit_scissor_regs(r300, &scissor->framebuffer); + } +} + void r300_emit_texture(struct r300_context* r300, struct r300_sampler_state* sampler, struct r300_texture* tex, unsigned offset) { uint32_t filter0 = sampler->filter0; + uint32_t format0 = tex->state.format0; + unsigned min_level, max_level; CS_LOCALS(r300); /* to emulate 1D textures through 2D ones correctly */ @@ -569,13 +633,20 @@ void r300_emit_texture(struct r300_context* r300, filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + format0 |= R300_TX_NUM_LEVELS(max_level); + filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + BEGIN_CS(16); OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | (offset << 28)); OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); - OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), tex->state.format0); + OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0); OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); @@ -584,50 +655,68 @@ void r300_emit_texture(struct r300_context* r300, END_CS; } -/* XXX I can't read this and that's not good */ -void r300_emit_aos(struct r300_context* r300, unsigned offset) +static boolean r300_validate_aos(struct r300_context *r300) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->vertex_element; - CS_LOCALS(r300); int i; - unsigned aos_count = r300->vertex_element_count; + /* Check if formats and strides are aligned to the size of DWORD. */ + for (i = 0; i < r300->vertex_element_count; i++) { + if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || + util_format_get_blocksize(velem[i].src_format) % 4 != 0) { + return FALSE; + } + } + return TRUE; +} + +void r300_emit_aos(struct r300_context* r300, unsigned offset) +{ + struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + int i; + unsigned size1, size2, aos_count = r300->vertex_element_count; unsigned packet_size = (aos_count * 3 + 1) / 2; + CS_LOCALS(r300); + + /* XXX Move this checking to a more approriate place. */ + if (!r300_validate_aos(r300)) { + /* XXX We should fallback using Draw. */ + assert(0); + } + BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(aos_count); + for (i = 0; i < aos_count - 1; i += 2) { - int buf_num1 = velem[i].vertex_buffer_index; - int buf_num2 = velem[i+1].vertex_buffer_index; - assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); - assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_size(velem[i+1].src_format) % 4 == 0); - OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) | - (pf_get_size(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22)); - OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset + - offset * vbuf[buf_num1].stride); - OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset + - offset * vbuf[buf_num2].stride); + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = util_format_get_blocksize(velem[i].src_format); + size2 = util_format_get_blocksize(velem[i+1].src_format); + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); } + if (aos_count & 1) { - int buf_num = velem[i].vertex_buffer_index; - assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); - OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6)); - OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset + - offset * vbuf[buf_num].stride); + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = util_format_get_blocksize(velem[i].src_format); + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); } - /* XXX bare CS reloc */ for (i = 0; i < aos_count; i++) { - cs_winsys->write_cs_reloc(cs_winsys, - vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, - 0, - 0); - cs_count -= 2; + OUT_CS_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer, + RADEON_GEM_DOMAIN_GTT, 0, 0); } END_CS; } + #if 0 void r300_emit_draw_packet(struct r300_context* r300) { @@ -690,12 +779,22 @@ void r300_emit_vertex_format_state(struct r300_context* r300) END_CS; } + void r300_emit_vertex_program_code(struct r300_context* r300, struct r300_vertex_program_code* code) { int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); unsigned instruction_count = code->length / 4; + + int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; + int input_count = MAX2(util_bitcount(code->InputsRead), 1); + int output_count = MAX2(util_bitcount(code->OutputsWritten), 1); + int temp_count = MAX2(code->num_temporaries, 1); + int pvs_num_slots = MIN3(vtx_mem_size / input_count, + vtx_mem_size / output_count, 10); + int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); + CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { @@ -708,8 +807,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300, /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 - * See the r5xx docs for instructions on how to use these. - * XXX these could be optimized to select better values... */ + * See the r5xx docs for instructions on how to use these. */ OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3); OUT_CS(R300_PVS_FIRST_INST(0) | R300_PVS_XYZW_VALID_INST(instruction_count - 1) | @@ -722,10 +820,11 @@ void r300_emit_vertex_program_code(struct r300_context* r300, for (i = 0; i < code->length; i++) OUT_CS(code->body.d[i]); - OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) | - R300_PVS_NUM_CNTLRS(5) | + OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | + R300_PVS_NUM_CNTLRS(pvs_num_controllers) | R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) | - R300_PVS_VF_MAX_VTX_NUM(12)); + R300_PVS_VF_MAX_VTX_NUM(12) | + (r300screen->caps->is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0)); END_CS; } @@ -790,13 +889,33 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +void r300_emit_texture_count(struct r300_context* r300) +{ + uint32_t tx_enable = 0; + int i; + CS_LOCALS(r300); + + /* Notice that texture_count and sampler_count are just sizes + * of the respective arrays. We still have to check for the individual + * elements. */ + for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { + if (r300->textures[i]) { + tx_enable |= 1 << i; + } + } + + BEGIN_CS(2); + OUT_CS_REG(R300_TX_ENABLE, tx_enable); + END_CS; + +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); - BEGIN_CS(4); + BEGIN_CS(2); OUT_CS_REG(R300_TX_INVALTAGS, 0); - OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1); END_CS; } @@ -821,10 +940,17 @@ void r300_emit_dirty_state(struct r300_context* r300) return; } + /* Check size of CS. */ + /* Make sure we have at least 8*1024 spare dwords. */ + /* XXX It would be nice to know the number of dwords we really need to + * XXX emit. */ + if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + r300->context.flush(&r300->context, 0, NULL); + } + /* Clean out BOs. */ r300->winsys->reset_bos(r300->winsys); - /* XXX check size */ validate: /* Color buffers... */ for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) { @@ -911,18 +1037,20 @@ validate: if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { if (r300screen->caps->is_r500) { - r500_emit_fragment_program_code(r300, &r300->fs->code); + r500_emit_fragment_program_code(r300, &r300->fs->shader->code); } else { - r300_emit_fragment_program_code(r300, &r300->fs->code); + r300_emit_fragment_program_code(r300, &r300->fs->shader->code); } r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER; } if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER_CONSTANTS) { if (r300screen->caps->is_r500) { - r500_emit_fs_constant_buffer(r300, &r300->fs->code.constants); + r500_emit_fs_constant_buffer(r300, + &r300->fs->shader->code.constants); } else { - r300_emit_fs_constant_buffer(r300, &r300->fs->code.constants); + r300_emit_fs_constant_buffer(r300, + &r300->fs->shader->code.constants); } r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -950,6 +1078,8 @@ validate: /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { + r300_emit_texture_count(r300); + for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { if (r300->dirty_state & ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 7c83c5166de..3797d3d332a 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -92,6 +92,8 @@ void r300_emit_vertex_shader(struct r300_context* r300, void r300_emit_viewport_state(struct r300_context* r300, struct r300_viewport_state* viewport); +void r300_emit_texture_count(struct r300_context* r300); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 29ddc84c411..4e1b61ca409 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -1,6 +1,7 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> * Joakim Sindholt <[email protected]> + * Copyright 2009 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,6 +22,9 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_math.h" +#include "util/u_memory.h" + #include "tgsi/tgsi_dump.h" #include "r300_context.h" @@ -31,6 +35,40 @@ #include "radeon_code.h" #include "radeon_compiler.h" +/* Convert info about FS input semantics to r300_shader_semantics. */ +void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, + struct r300_shader_semantics* fs_inputs) +{ + int i; + unsigned index; + + r300_shader_semantics_reset(fs_inputs); + + for (i = 0; i < info->num_inputs; i++) { + index = info->input_semantic_index[i]; + + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + assert(index <= ATTR_COLOR_COUNT); + fs_inputs->color[index] = i; + break; + + case TGSI_SEMANTIC_GENERIC: + assert(index <= ATTR_GENERIC_COUNT); + fs_inputs->generic[index] = i; + break; + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + fs_inputs->fog = i; + break; + + default: + assert(0); + } + } +} + static void find_output_registers(struct r300_fragment_program_compiler * compiler, struct r300_fragment_shader * fs) { @@ -58,61 +96,65 @@ static void allocate_hardware_inputs( void (*allocate)(void * data, unsigned input, unsigned hwreg), void * mydata) { - struct tgsi_shader_info* info = &((struct r300_fragment_shader*)c->UserData)->info; - int total_colors = 0; - int colors = 0; - int total_generic = 0; - int generic = 0; - int i; - - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - total_colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - total_generic++; - break; + struct r300_shader_semantics* inputs = + (struct r300_shader_semantics*)c->UserData; + int i, reg = 0; + + /* Allocate input registers. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (inputs->color[i] != ATTR_UNUSED) { + allocate(mydata, inputs->color[i], reg++); } } + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (inputs->generic[i] != ATTR_UNUSED) { + allocate(mydata, inputs->generic[i], reg++); + } + } + if (inputs->fog != ATTR_UNUSED) { + allocate(mydata, inputs->fog, reg++); + } +} - for(i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - allocate(mydata, i, colors); - colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - allocate(mydata, i, total_colors + generic); - generic++; - break; +static void get_compare_state( + struct r300_context* r300, + struct r300_fragment_program_external_state* state, + unsigned shadow_samplers) +{ + memset(state, 0, sizeof(*state)); + + for (int i = 0; i < r300->sampler_count; i++) { + struct r300_sampler_state* s = r300->sampler_states[i]; + + if (s && s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* XXX Gallium doesn't provide us with any information regarding + * this mode, so we are screwed. I'm setting 0 = LUMINANCE. */ + state->unit[i].depth_texture_mode = 0; + + /* Fortunately, no need to translate this. */ + state->unit[i].texture_compare_func = s->state.compare_func; } } } -void r300_translate_fragment_shader(struct r300_context* r300, - struct r300_fragment_shader* fs) +static void r300_translate_fragment_shader( + struct r300_context* r300, + struct r300_fragment_shader_code* shader) { + struct r300_fragment_shader* fs = r300->fs; struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; + /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); compiler.Base.Debug = DBG_ON(r300, DBG_FP); - compiler.code = &fs->code; + compiler.code = &shader->code; + compiler.state = shader->compare_state; compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500; compiler.AllocateHwInputs = &allocate_hardware_inputs; - compiler.UserData = fs; - - /* TODO: Program compilation depends on texture compare modes, - * which are sampler state. Therefore, programs need to be recompiled - * depending on this state as in the classic Mesa driver. - * - * This is not yet handled correctly. - */ + compiler.UserData = &fs->inputs; find_output_registers(&compiler, fs); @@ -127,15 +169,64 @@ void r300_translate_fragment_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, fs->state.tokens); + fs->shadow_samplers = compiler.Base.Program.ShadowSamplers; + /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); if (compiler.Base.Error) { /* XXX failover maybe? */ DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n", compiler.Base.ErrorMsg); + assert(0); } /* And, finally... */ rc_destroy(&compiler.Base); - fs->translated = TRUE; +} + +boolean r300_pick_fragment_shader(struct r300_context* r300) +{ + struct r300_fragment_shader* fs = r300->fs; + struct r300_fragment_program_external_state state; + struct r300_fragment_shader_code* ptr; + + if (!fs->first) { + /* Build the fragment shader for the first time. */ + fs->first = fs->shader = CALLOC_STRUCT(r300_fragment_shader_code); + + /* BTW shadow samplers will be known after the first translation, + * therefore we set ~0, which means it should look at all sampler + * states. This choice doesn't have any impact on the correctness. */ + get_compare_state(r300, &fs->shader->compare_state, ~0); + r300_translate_fragment_shader(r300, fs->shader); + return TRUE; + + } else if (fs->shadow_samplers) { + get_compare_state(r300, &state, fs->shadow_samplers); + + /* Check if the currently-bound shader has been compiled + * with the texture-compare state we need. */ + if (memcmp(&fs->shader->compare_state, &state, sizeof(state)) != 0) { + /* Search for the right shader. */ + ptr = fs->first; + while (ptr) { + if (memcmp(&ptr->compare_state, &state, sizeof(state)) == 0) { + fs->shader = ptr; + return TRUE; + } + ptr = ptr->next; + } + + /* Not found, gotta compile a new one. */ + ptr = CALLOC_STRUCT(r300_fragment_shader_code); + ptr->next = fs->first; + fs->first = fs->shader = ptr; + + ptr->compare_state = state; + r300_translate_fragment_shader(r300, ptr); + return TRUE; + } + } + + return FALSE; } diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index e831c30301b..40ce874353c 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -1,6 +1,7 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> * Joakim Sindholt <[email protected]> + * Copyright 2009 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,31 +26,46 @@ #define R300_FS_H #include "pipe/p_state.h" - #include "tgsi/tgsi_scan.h" - #include "radeon_code.h" +#include "r300_shader_semantics.h" + +struct r300_fragment_shader_code { + struct r300_fragment_program_external_state compare_state; + struct rX00_fragment_program_code code; + + struct r300_fragment_shader_code* next; +}; struct r300_fragment_shader { /* Parent class */ struct pipe_shader_state state; + struct tgsi_shader_info info; + struct r300_shader_semantics inputs; - /* Has this shader been translated yet? */ - boolean translated; + /* Bits 0-15: TRUE if it's a shadow sampler, FALSE otherwise. */ + unsigned shadow_samplers; - /* Compiled code */ - struct rX00_fragment_program_code code; + /* Currently-bound fragment shader. */ + struct r300_fragment_shader_code* shader; + + /* List of the same shaders compiled with different texture-compare + * states. */ + struct r300_fragment_shader_code* first; }; +void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, + struct r300_shader_semantics* fs_inputs); -void r300_translate_fragment_shader(struct r300_context* r300, - struct r300_fragment_shader* fs); +/* Return TRUE if the shader was switched and should be re-emitted. */ +boolean r300_pick_fragment_shader(struct r300_context* r300); -static inline boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs) +static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs) { if (!fs) - return FALSE; - return (fs->code.writes_depth) ? TRUE : FALSE; + return FALSE; + return (fs->shader->code.writes_depth) ? TRUE : FALSE; } + #endif /* R300_FS_H */ diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 744ea6568d3..0aa1da07f8b 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -661,20 +661,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GB_SUPER_TILE_B (1 << 15) # define R300_GB_SUBPIXEL_1_12 (0 << 16) # define R300_GB_SUBPIXEL_1_16 (1 << 16) -# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) -# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) -# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) -# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) -# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) -# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21) -# define GB_TILE_CONFIG_SUBPRECISION (0 << 22) -# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) -# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) -# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) -# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) +# define R300_GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) +# define R300_GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) +# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) +# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) +# define R300_GB_TILE_CONFIG_ALT_OFFSET (0 << 21) +# define R300_GB_TILE_CONFIG_SUBPRECISION (0 << 22) +# define R300_GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) +# define R300_GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) +# define R300_GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) +# define R300_GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) /* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ #define R300_GB_FIFO_SIZE 0x4024 @@ -700,9 +700,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ # define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 -#define GB_Z_PEQ_CONFIG 0x4028 -# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) -# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) +#define R300_GB_Z_PEQ_CONFIG 0x4028 +# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) +# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) /* Specifies various polygon specific selects (fog, depth, perspective). */ #define R300_GB_SELECT 0x401c @@ -725,39 +725,39 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Specifies the graphics pipeline configuration for antialiasing. */ #define R300_GB_AA_CONFIG 0x4020 -# define GB_AA_CONFIG_AA_DISABLE (0 << 0) -# define GB_AA_CONFIG_AA_ENABLE (1 << 0) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) +# define R300_GB_AA_CONFIG_AA_DISABLE (0 << 0) +# define R300_GB_AA_CONFIG_AA_ENABLE (1 << 0) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) /* Selects which of 4 pipes are active. */ -#define GB_PIPE_SELECT 0x402c -# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 -# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 -# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 -# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 -# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 -# define GB_PIPE_SELECT_MAX_PIPE 12 -# define GB_PIPE_SELECT_BAD_PIPES 14 -# define GB_PIPE_SELECT_CONFIG_PIPES 18 +#define R300_GB_PIPE_SELECT 0x402c +# define R300_GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 +# define R300_GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 +# define R300_GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 +# define R300_GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 +# define R300_GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 +# define R300_GB_PIPE_SELECT_MAX_PIPE 12 +# define R300_GB_PIPE_SELECT_BAD_PIPES 14 +# define R300_GB_PIPE_SELECT_CONFIG_PIPES 18 /* Specifies the sizes of the various FIFO`s in the sc/rs. */ -#define GB_FIFO_SIZE1 0x4070 +#define R300_GB_FIFO_SIZE1 0x4070 /* High water mark for SC input fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 -# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f /* High water mark for SC input fifo (B) */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 -# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 /* High water mark for RS colors' fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 -# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 /* High water mark for RS textures' fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 -# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 /* This table specifies the source location and format for up to 16 texture * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) @@ -1293,7 +1293,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_RS_INST_TEX_ID(x) ((x) << 0) #define R500_RS_INST_TEX_CN_WRITE (1 << 4) #define R500_RS_INST_TEX_ADDR_SHIFT 5 -# define R500_RS_INST_TEX_ADDR(x) ((x) << 0) +# define R500_RS_INST_TEX_ADDR(x) ((x) << 5) #define R500_RS_INST_COL_ID_SHIFT 12 # define R500_RS_INST_COL_ID(x) ((x) << 12) #define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) @@ -1463,6 +1463,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) # define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) # define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) +# define R300_TX_MAX_MIP_LEVEL_SHIFT 17 +# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 17) # define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) # define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) # define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) @@ -1471,6 +1473,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MAX_ANISO_MASK (7 << 21) # define R300_TX_WRAP_S(x) ((x) << 0) # define R300_TX_WRAP_T(x) ((x) << 3) +# define R300_TX_MAX_MIP_LEVEL(x) ((x) << 17) #define R300_TX_FILTER1_0 0x4440 # define R300_CHROMA_KEY_MODE_DISABLE 0 @@ -1500,8 +1503,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_HEIGHTMASK_MASK (2047 << 11) # define R300_TX_DEPTHMASK_SHIFT 22 # define R300_TX_DEPTHMASK_MASK (0xf << 22) -# define R300_TX_MAX_MIP_LEVEL_SHIFT 26 -# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) # define R300_TX_SIZE_PROJECTED (1 << 30) # define R300_TX_PITCH_EN (1 << 31) # define R300_TX_WIDTH(x) ((x) << 0) @@ -2144,6 +2145,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Unpipelined. */ #define R300_RB3D_CCTL 0x4e00 +# define R300_RB3D_CCTL_NUM_MULTIWRITES(x) (MAX2(((x)-1), 0) << 5) # define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5) # define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5) # define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5) @@ -3292,6 +3294,11 @@ enum { */ #define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 +# define R300_VBPNTR_SIZE0(x) ((x) >> 2) +# define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8) +# define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) +# define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24) + #define R300_PACKET3_INDX_BUFFER 0x00003300 # define R300_INDX_BUFFER_DST_SHIFT 0 # define R300_INDX_BUFFER_SKIP_SHIFT 16 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 62e1456ed36..a4ac9ad9a7b 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -37,7 +37,6 @@ #include "r300_reg.h" #include "r300_render.h" #include "r300_state_derived.h" -#include "r300_vbo.h" /* r300_render: Vertex and index buffer primitive emission. */ #define R300_MAX_VBO_SIZE (1024 * 1024) @@ -70,14 +69,67 @@ uint32_t r300_translate_primitive(unsigned prim) } } +static boolean r300_nothing_to_draw(struct r300_context *r300) +{ + return r300->rs_state->rs.scissor && + r300->scissor_state->scissor.empty_area; +} + +static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, + unsigned mode) +{ + uint32_t color_control = r300->rs_state->color_control; + + /* By default (see r300_state.c:r300_create_rs_state) color_control is + * initialized to provoking the first vertex. + * + * Triangle fans must be reduced to the second vertex, not the first, in + * Gallium flatshade-first mode, as per the GL spec. + * (http://www.opengl.org/registry/specs/ARB/provoking_vertex.txt) + * + * Quads never provoke correctly in flatshade-first mode. The first + * vertex is never considered as provoking, so only the second, third, + * and fourth vertices can be selected, and both "third" and "last" modes + * select the fourth vertex. This is probably due to D3D lacking quads. + * + * Similarly, polygons reduce to the first, not the last, vertex, when in + * "last" mode, and all other modes start from the second vertex. + * + * ~ C. + */ + + if (r300->rs_state->rs.flatshade_first) { + switch (mode) { + case PIPE_PRIM_TRIANGLE_FAN: + color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND; + break; + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + case PIPE_PRIM_POLYGON: + color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; + break; + default: + color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST; + break; + } + } else { + color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; + } + + return color_control; +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) { CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); + BEGIN_CS(8); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300_translate_primitive(mode)); @@ -102,7 +154,10 @@ static void r300_emit_draw_elements(struct r300_context *r300, assert((start * indexSize) % 4 == 0); assert(offset_dwords == 0); - BEGIN_CS(10); + BEGIN_CS(14); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex); OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { @@ -158,7 +213,7 @@ validate: } /* This is the fast-path drawing & emission for HW TCL. */ -boolean r300_draw_range_elements(struct pipe_context* pipe, +void r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, unsigned minIndex, @@ -170,22 +225,34 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; + /* XXX: use aux/indices functions to split this into smaller + * primitives. + */ + return; + } + + if (r300_nothing_to_draw(r300)) { + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; } - setup_vertex_attributes(r300); + if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, + RADEON_GEM_DOMAIN_GTT, 0)) { + return; + } - setup_index_buffer(r300, indexBuffer, indexSize); + if (!r300->winsys->validate(r300->winsys)) { + return; + } r300_emit_dirty_state(r300); @@ -193,48 +260,49 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); - - return TRUE; } /* Simple helpers for context setup. Should probably be moved to util. */ -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count) +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) { - return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); } -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; + /* XXX: driver needs to handle this -- use the functions in + * aux/indices to split this into several smaller primitives. + */ + return; + } + + if (r300_nothing_to_draw(r300)) { + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; } - setup_vertex_attributes(r300); - r300_emit_dirty_state(r300); r300_emit_aos(r300, start); r300_emit_draw_arrays(r300, mode, count); - - return TRUE; } /**************************************************************************** @@ -243,7 +311,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, ***************************************************************************/ /* SW TCL arrays, using Draw. */ -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, +void r300_swtcl_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) @@ -252,7 +320,11 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, int i; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; + } + + if (r300_nothing_to_draw(r300)) { + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -265,8 +337,9 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, draw_set_mapped_element_buffer(r300->draw, 0, NULL); draw_set_mapped_constant_buffer(r300->draw, - r300->shader_constants[PIPE_SHADER_VERTEX].constants, - r300->shader_constants[PIPE_SHADER_VERTEX].count * + PIPE_SHADER_VERTEX, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); draw_arrays(r300->draw, mode, start, count); @@ -275,12 +348,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } - - return TRUE; } /* SW TCL elements, using Draw. */ -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, unsigned minIndex, @@ -291,9 +362,14 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); int i; + void* indices; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; + } + + if (r300_nothing_to_draw(r300)) { + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -303,12 +379,13 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - void* indices = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(r300->draw, indexSize, minIndex, maxIndex, indices); draw_set_mapped_constant_buffer(r300->draw, + PIPE_SHADER_VERTEX, r300->shader_constants[PIPE_SHADER_VERTEX].constants, r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); @@ -323,8 +400,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(r300->draw, 0, start, start + count - 1, NULL); - - return TRUE; } /* Object for rendering using Draw. */ @@ -400,7 +475,7 @@ static void* r300_render_map_vertices(struct vbuf_render* render) r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo, PIPE_BUFFER_USAGE_CPU_WRITE); - return (r300render->vbo_ptr + r300render->vbo_offset); + return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); } static void r300_render_unmap_vertices(struct vbuf_render* render, diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index da83069083d..27b5e6a9630 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -25,35 +25,35 @@ uint32_t r300_translate_primitive(unsigned prim); -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count); - -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count); - -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count); + +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count); + +void r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count); + +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); #endif /* R300_RENDER_H */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 390b63007e5..2a8667d4835 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -21,13 +21,15 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_simple_screen.h" #include "r300_context.h" #include "r300_screen.h" #include "r300_texture.h" -#include "r300_winsys.h" + +#include "radeon_winsys.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -140,6 +142,10 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) return 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 8; default: debug_printf("r300: Implementation error: Bad param %d\n", param); @@ -219,12 +225,18 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, /* Z buffer or texture */ case PIPE_FORMAT_Z16_UNORM: + retval = usage & + (PIPE_TEXTURE_USAGE_DEPTH_STENCIL | + PIPE_TEXTURE_USAGE_SAMPLER); + break; + + /* 24bit Z buffer can only be used as a texture on R500. */ case PIPE_FORMAT_Z24X8_UNORM: /* Z buffer with stencil or texture */ case PIPE_FORMAT_Z24S8_UNORM: retval = usage & (PIPE_TEXTURE_USAGE_DEPTH_STENCIL | - PIPE_TEXTURE_USAGE_SAMPLER); + (is_r500 ? PIPE_TEXTURE_USAGE_SAMPLER : 0)); break; /* Definitely unsupported formats. */ @@ -311,14 +323,10 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans = CALLOC_STRUCT(r300_transfer); if (trans) { pipe_texture_reference(&trans->transfer.texture, texture); - trans->transfer.format = texture->format; trans->transfer.x = x; trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; - trans->transfer.block = texture->block; - trans->transfer.nblocksx = texture->nblocksx[level]; - trans->transfer.nblocksy = texture->nblocksy[level]; trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; @@ -344,6 +352,7 @@ static void* r300_transfer_map(struct pipe_screen* screen, { struct r300_texture* tex = (struct r300_texture*)transfer->texture; char* map; + enum pipe_format format = tex->tex.format; map = pipe_buffer_map(screen, tex->buffer, pipe_transfer_buffer_flags(transfer)); @@ -353,8 +362,8 @@ static void* r300_transfer_map(struct pipe_screen* screen, } return map + r300_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / util_format_get_blockheight(format) * transfer->stride + + transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } static void r300_transfer_unmap(struct pipe_screen* screen, @@ -372,7 +381,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) FREE(r300screen); } -struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) +struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) { struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); @@ -380,14 +389,14 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) if (!r300screen || !caps) return NULL; - caps->pci_id = r300_winsys->pci_id; - caps->num_frag_pipes = r300_winsys->gb_pipes; - caps->num_z_pipes = r300_winsys->z_pipes; + caps->pci_id = radeon_winsys->pci_id; + caps->num_frag_pipes = radeon_winsys->gb_pipes; + caps->num_z_pipes = radeon_winsys->z_pipes; r300_parse_chipset(caps); r300screen->caps = caps; - r300screen->screen.winsys = (struct pipe_winsys*)r300_winsys; + r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; r300screen->screen.get_vendor = r300_get_vendor; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 41df31f670f..2217988addd 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -27,6 +27,8 @@ #include "r300_chipset.h" +struct radeon_winsys; + struct r300_screen { /* Parent class */ struct pipe_screen screen; @@ -56,6 +58,6 @@ r300_transfer(struct pipe_transfer* transfer) } /* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys); +struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); #endif /* R300_SCREEN_H */ diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h new file mode 100644 index 00000000000..85184e2cfd7 --- /dev/null +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -0,0 +1,64 @@ +/* + * Copyright 2009 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_SHADER_SEMANTICS_H +#define R300_SHADER_SEMANTICS_H + +#define ATTR_UNUSED (-1) +#define ATTR_COLOR_COUNT 2 +#define ATTR_GENERIC_COUNT 16 + +/* This structure contains information about what attributes are written by VS + * or read by FS. (but not both) It's much easier to work with than + * tgsi_shader_info. + * + * The variables contain indices to tgsi_shader_info semantics and those + * indices are nothing else than input/output register numbers. */ +struct r300_shader_semantics { + int pos; + int psize; + int color[ATTR_COLOR_COUNT]; + int bcolor[ATTR_COLOR_COUNT]; + int generic[ATTR_GENERIC_COUNT]; + int fog; +}; + +static INLINE void r300_shader_semantics_reset( + struct r300_shader_semantics* info) +{ + int i; + + info->pos = ATTR_UNUSED; + info->psize = ATTR_UNUSED; + info->fog = ATTR_UNUSED; + + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + info->color[i] = ATTR_UNUSED; + info->bcolor[i] = ATTR_UNUSED; + } + + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + info->generic[i] = ATTR_UNUSED; + } +} + +#endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index d1eced61db1..534c1b5935f 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -151,9 +151,10 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + union util_color uc; - util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, - &r300->blend_color_state->blend_color); + util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); + r300->blend_color_state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ r300->blend_color_state->blend_color_red_alpha = @@ -282,11 +283,29 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_edgeflags(struct pipe_context* pipe, - const unsigned* bitfield) +static void r300_set_scissor_regs(const struct pipe_scissor_state* state, + struct r300_scissor_regs *scissor, + boolean is_r500) { - /* XXX you know it's bad when i915 has this blank too */ - /* XXX and even worse, I have no idea WTF the bitfield is */ + if (is_r500) { + scissor->top_left = + (state->minx << R300_SCISSORS_X_SHIFT) | + (state->miny << R300_SCISSORS_Y_SHIFT); + scissor->bottom_right = + ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); + } else { + /* Offset of 1440 in non-R500 chipsets. */ + scissor->top_left = + ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); + scissor->bottom_right = + (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); + } + + scissor->empty_area = state->minx >= state->maxx || + state->miny >= state->maxy; } static void @@ -294,6 +313,7 @@ static void const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); + struct pipe_scissor_state scissor; if (r300->draw) { draw_flush(r300->draw); @@ -301,7 +321,18 @@ static void r300->framebuffer_state = *state; + scissor.minx = scissor.miny = 0; + scissor.maxx = state->width; + scissor.maxy = state->height; + r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer, + r300_screen(r300->context.screen)->caps->is_r500); + + /* Don't rely on the order of states being set for the first time. */ + if (!r300->rs_state || !r300->rs_state->rs.scissor) { + r300->dirty_state |= R300_NEW_SCISSOR; + } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; + r300->dirty_state |= R300_NEW_BLEND; } /* Create fragment shader state. */ @@ -317,6 +348,7 @@ static void* r300_create_fs_state(struct pipe_context* pipe, fs->state.tokens = tgsi_dup_tokens(shader->tokens); tgsi_scan_shader(shader->tokens, &fs->info); + r300_shader_read_fs_inputs(&fs->info, &fs->inputs); return (void*)fs; } @@ -330,11 +362,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) if (fs == NULL) { r300->fs = NULL; return; - } else if (!fs->translated) { - r300_translate_fragment_shader(r300, fs); } r300->fs = fs; + r300_pick_fragment_shader(r300); r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -343,7 +374,14 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) { struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; - rc_constants_destroy(&fs->code.constants); + struct r300_fragment_shader_code *tmp, *ptr = fs->first; + + while (ptr) { + tmp = ptr; + ptr = ptr->next; + rc_constants_destroy(&tmp->code.constants); + FREE(tmp); + } FREE((void*)fs->state.tokens); FREE(shader); } @@ -382,8 +420,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, if (state->bypass_vs_clip_and_viewport || !r300_screen(pipe->screen)->caps->has_tcl) { rs->vap_control_status |= R300_VAP_TCL_BYPASS; - } else { - rs->rs.bypass_vs_clip_and_viewport = TRUE; } rs->point_size = pack_float_16_6x(state->point_size) | @@ -474,10 +510,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->color_control = R300_SHADE_MODEL_SMOOTH; } - if (!state->flatshade_first) { - rs->color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; - } - return (void*)rs; } @@ -513,6 +545,9 @@ static void* struct r300_context* r300 = r300_context(pipe); struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); int lod_bias; + union util_color uc; + + sampler->state = *state; sampler->filter0 |= (r300_translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) | @@ -521,7 +556,13 @@ static void* sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, state->mag_img_filter, - state->min_mip_filter); + state->min_mip_filter, + state->max_anisotropy > 1.0); + + /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ + /* We must pass these to the emit function to clamp them properly. */ + sampler->min_lod = MAX2((unsigned)state->min_lod, 0); + sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0); lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1); @@ -529,8 +570,8 @@ static void* sampler->filter1 |= r300_anisotropy(state->max_anisotropy); - util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, - &sampler->border_color); + util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); + sampler->border_color = uc.ui; /* R500-specific fixups and optimizations */ if (r300_screen(r300->context.screen)->caps->is_r500) { @@ -559,6 +600,20 @@ static void r300_bind_sampler_states(struct pipe_context* pipe, } r300->sampler_count = count; + + /* Pick a fragment shader based on the texture compare state. */ + if (r300->fs && (r300->dirty_state & R300_ANY_NEW_SAMPLERS)) { + if (r300_pick_fragment_shader(r300)) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | + R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } + } +} + +static void r300_lacks_vertex_textures(struct pipe_context* pipe, + unsigned count, + void** states) +{ } static void r300_delete_sampler_state(struct pipe_context* pipe, void* state) @@ -571,6 +626,7 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, struct pipe_texture** texture) { struct r300_context* r300 = r300_context(pipe); + boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; int i; /* XXX magic num */ @@ -578,13 +634,18 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, return; } - r300->context.flush(&r300->context, 0, NULL); - for (i = 0; i < count; i++) { if (r300->textures[i] != (struct r300_texture*)texture[i]) { pipe_texture_reference((struct pipe_texture**)&r300->textures[i], texture[i]); r300->dirty_state |= (R300_NEW_TEXTURE << i); + + /* R300-specific - set the texrect factor in a fragment shader */ + if (!is_r500 && r300->textures[i]->is_npot) { + /* XXX It would be nice to re-emit just 1 constant, + * XXX not all of them */ + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } } @@ -604,24 +665,13 @@ static void r300_set_scissor_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - if (r300_screen(r300->context.screen)->caps->is_r500) { - r300->scissor_state->scissor_top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->scissor_bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - r300->scissor_state->scissor_top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->scissor_bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } + r300_set_scissor_regs(state, &r300->scissor_state->scissor, + r300_screen(r300->context.screen)->caps->is_r500); - r300->dirty_state |= R300_NEW_SCISSOR; + /* Don't rely on the order of states being set for the first time. */ + if (!r300->rs_state || r300->rs_state->rs.scissor) { + r300->dirty_state |= R300_NEW_SCISSOR; + } } static void r300_set_viewport_state(struct pipe_context* pipe, @@ -674,6 +724,8 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } + + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; } static void r300_set_vertex_elements(struct pipe_context* pipe, @@ -706,9 +758,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe, tgsi_scan_shader(shader->tokens, &vs->info); - /* Appease Draw. */ - vs->draw = draw_create_vertex_shader(r300->draw, shader); - return (void*)vs; } else { return draw_create_vertex_shader(r300->draw, shader); @@ -719,8 +768,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); - if (r300_screen(pipe->screen)->caps->has_tcl) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; @@ -731,10 +778,10 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300_translate_vertex_shader(r300, vs); } - draw_bind_vertex_shader(r300->draw, vs->draw); r300->vs = vs; r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; } else { + draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)shader); } @@ -748,7 +795,6 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; rc_constants_destroy(&vs->code.constants); - draw_delete_vertex_shader(r300->draw, vs->draw); FREE((void*)vs->state.tokens); FREE(shader); } else { @@ -798,8 +844,6 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state; r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state; - r300->context.set_edgeflags = r300_set_edgeflags; - r300->context.set_framebuffer_state = r300_set_framebuffer_state; r300->context.create_fs_state = r300_create_fs_state; @@ -813,10 +857,11 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.delete_rasterizer_state = r300_delete_rs_state; r300->context.create_sampler_state = r300_create_sampler_state; - r300->context.bind_sampler_states = r300_bind_sampler_states; + r300->context.bind_fragment_sampler_states = r300_bind_sampler_states; + r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures; r300->context.delete_sampler_state = r300_delete_sampler_state; - r300->context.set_sampler_textures = r300_set_sampler_textures; + r300->context.set_fragment_sampler_textures = r300_set_sampler_textures; r300->context.set_scissor_state = r300_set_scissor_state; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 7166694edf4..727ae7ade6d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2009 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,6 +29,7 @@ #include "r300_context.h" #include "r300_fs.h" #include "r300_screen.h" +#include "r300_shader_semantics.h" #include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_vs.h" @@ -47,8 +49,8 @@ struct r300_shader_derived_value { unsigned r300_shader_key_hash(void* key) { struct r300_shader_key* shader_key = (struct r300_shader_key*)key; - unsigned vs = (unsigned)shader_key->vs; - unsigned fs = (unsigned)shader_key->fs; + unsigned vs = (intptr_t)shader_key->vs; + unsigned fs = (intptr_t)shader_key->fs; return (vs << 16) | (fs & 0xffff); } @@ -61,209 +63,152 @@ int r300_shader_key_compare(void* key1, void* key2) { (shader_key1->fs == shader_key2->fs); } -/* Set up the vs_tab and routes. */ -static void r300_vs_tab_routes(struct r300_context* r300, - struct r300_vertex_info* vformat) +static void r300_draw_emit_attrib(struct r300_context* r300, + enum attrib_emit emit, + enum interp_mode interp, + int index) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct vertex_info* vinfo = &vformat->vinfo; - int* tab = vformat->vs_tab; - boolean pos = FALSE, psize = FALSE, fog = FALSE; - int i, texs = 0, cols = 0; - struct tgsi_shader_info* info; + struct tgsi_shader_info* info = &r300->vs->info; + int output; - if (r300screen->caps->has_tcl) { - /* Use vertex shader to determine required routes. */ - info = &r300->vs->info; + output = draw_find_shader_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); + draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); +} + +static void r300_draw_emit_all_attribs(struct r300_context* r300) +{ + struct r300_shader_semantics* vs_outputs = &r300->vs->outputs; + int i, gen_count; + + /* Position. */ + if (vs_outputs->pos != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->pos); } else { - /* Use fragment shader to determine required routes. */ - info = &r300->fs->info; + assert(0); } - assert(info->num_inputs <= 16); - - if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte) - { - for (i = 0; i < info->num_inputs; i++) { - switch (r300->vs->code.inputs[i]) { - case TGSI_SEMANTIC_POSITION: - pos = TRUE; - tab[i] = 0; - break; - case TGSI_SEMANTIC_COLOR: - tab[i] = 2 + cols; - cols++; - break; - case TGSI_SEMANTIC_PSIZE: - assert(psize == FALSE); - psize = TRUE; - tab[i] = 15; - break; - case TGSI_SEMANTIC_FOG: - assert(fog == FALSE); - fog = TRUE; - /* Fall through */ - case TGSI_SEMANTIC_GENERIC: - tab[i] = 6 + texs; - texs++; - break; - default: - debug_printf("r300: Unknown vertex input %d\n", - info->input_semantic_name[i]); - break; - } - } + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, + vs_outputs->psize); } - else - { - /* Just copy vert attribs over as-is. */ - for (i = 0; i < info->num_inputs; i++) { - tab[i] = i; - } - for (i = 0; i < info->num_outputs; i++) { - switch (info->output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - pos = TRUE; - break; - case TGSI_SEMANTIC_COLOR: - cols++; - break; - case TGSI_SEMANTIC_PSIZE: - psize = TRUE; - break; - case TGSI_SEMANTIC_FOG: - fog = TRUE; - /* Fall through */ - case TGSI_SEMANTIC_GENERIC: - texs++; - break; - default: - debug_printf("r300: Unknown vertex output %d\n", - info->output_semantic_name[i]); - break; - } + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, + vs_outputs->color[i]); } } - /* XXX magic */ - assert(texs <= 8); + /* XXX Back-face colors. */ - /* Do the actual vertex_info setup. - * - * vertex_info has four uints of hardware-specific data in it. - * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL - * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM - * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 - * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ - - vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - - /* We need to add vertex position attribute only for SW TCL case, - * for HW TCL case it could be generated by vertex shader */ - if (!pos && !r300screen->caps->has_tcl) { - debug_printf("r300: Forcing vertex position attribute emit...\n"); - /* Make room for the position attribute - * at the beginning of the tab. */ - for (i = 15; i > 0; i--) { - tab[i] = tab[i-1]; + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->generic[i]); + gen_count++; } - tab[0] = 0; } - /* Position. */ - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0)); + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->fog); + gen_count++; } - vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS; - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - /* Point size. */ - if (psize) { - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0)); - } - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - } + /* XXX magic */ + assert(gen_count <= 8); +} - /* Colors. */ - for (i = 0; i < cols; i++) { - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i)); - } - vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR; - vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i); +/* Update the PSC tables. */ +static void r300_vertex_psc(struct r300_context* r300) +{ + struct r300_vertex_info *vformat = r300->vertex_info; + uint16_t type, swizzle; + enum pipe_format format; + unsigned i; + int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + int* stream_tab; + + /* If TCL is bypassed, map vertex streams to equivalent VS output + * locations. */ + if (r300->rs_state->enable_vte) { + stream_tab = identity; + } else { + stream_tab = r300->vs->stream_loc_notcl; } - /* Init i right here, increment it if fog is enabled. - * This gets around a double-increment problem. */ - i = 0; + /* Vertex shaders have no semantics on their inputs, + * so PSC should just route stuff based on the vertex elements, + * and not on attrib information. */ + DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements" + " in psc\n", + r300->vs->info.num_inputs, + r300->vertex_element_count); - /* Fog. This is a special-cased texcoord. */ - if (fog) { - i++; - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); - } - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); - vinfo->hwfmt[3] |= (4 << (3 * i)); - } + for (i = 0; i < r300->vertex_element_count; i++) { + format = r300->vertex_element[i].src_format; + + type = r300_translate_vertex_data_type(format) | + (stream_tab[i] << R300_DST_VEC_LOC_SHIFT); + swizzle = r300_translate_vertex_data_swizzle(format); - /* Texcoords. */ - for (; i < texs; i++) { - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); + if (i & 1) { + vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; + } else { + vformat->vap_prog_stream_cntl[i >> 1] |= type; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; } - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); - vinfo->hwfmt[3] |= (4 << (3 * i)); } - draw_compute_vertex_size(vinfo); + assert(i <= 15); + + /* Set the last vector in the PSC. */ + if (i) { + i -= 1; + } + vformat->vap_prog_stream_cntl[i >> 1] |= + (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Update the PSC tables. */ -static void r300_vertex_psc(struct r300_context* r300, - struct r300_vertex_info* vformat) +/* Update the PSC tables for SW TCL, using Draw. */ +static void r300_swtcl_vertex_psc(struct r300_context* r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_vertex_info *vformat = r300->vertex_info; struct vertex_info* vinfo = &vformat->vinfo; - int* tab = vformat->vs_tab; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; + int* vs_output_tab = r300->vs->stream_loc_notcl; - /* Vertex shaders have no semantics on their inputs, - * so PSC should just route stuff based on their info, - * and not on attrib information. */ - if (r300screen->caps->has_tcl) { - attrib_count = r300->vs->info.num_inputs; - DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n", - attrib_count); - } else { - attrib_count = vinfo->num_attribs; - DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); - for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," - " tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - tab[i]); - } + /* For each Draw attribute, route it to the fragment shader according + * to the vs_output_tab. */ + attrib_count = vinfo->num_attribs; + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); + for (i = 0; i < attrib_count; i++) { + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," + " vs_output_tab %d\n", vinfo->attrib[i].src_index, + vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, + vs_output_tab[i]); } for (i = 0; i < attrib_count; i++) { /* Make sure we have a proper destination for our attribute. */ - assert(tab[i] != -1); + assert(vs_output_tab[i] != -1); format = draw_translate_vinfo_format(vinfo->attrib[i].emit); /* Obtain the type of data in this attribute. */ type = r300_translate_vertex_data_type(format) | - tab[i] << R300_DST_VEC_LOC_SHIFT; + vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; /* Obtain the swizzle for this attribute. Note that the default * swizzle in the hardware is not XYZW! */ @@ -272,12 +217,10 @@ static void r300_vertex_psc(struct r300_context* r300, /* Add the attribute to the PSC table. */ if (i & 1) { vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; } else { - vformat->vap_prog_stream_cntl[i >> 1] |= type << 0; - - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0; + vformat->vap_prog_stream_cntl[i >> 1] |= type; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; } } @@ -289,185 +232,200 @@ static void r300_vertex_psc(struct r300_context* r300, (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Set up the mappings from GB to US, for RS block. */ -static void r300_update_fs_tab(struct r300_context* r300, - struct r300_vertex_info* vformat) +static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_0001) +{ + rs->ip[id] |= R300_RS_COL_PTR(ptr); + if (swizzle_0001) { + rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + } else { + rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + } + rs->inst[id] |= R300_RS_INST_COL_ID(id); +} + +static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) { - struct tgsi_shader_info* info = &r300->fs->info; - int i, cols = 0, texs = 0, cols_emitted = 0; - int* tab = vformat->fs_tab; + rs->inst[id] |= R300_RS_INST_COL_CN_WRITE | + R300_RS_INST_COL_ADDR(fp_offset); +} - for (i = 0; i < 16; i++) { - tab[i] = -1; +static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_X001) +{ + if (swizzle_X001) { + rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_K0) | + R300_RS_SEL_R(R300_RS_SEL_K0) | + R300_RS_SEL_Q(R300_RS_SEL_K1); + } else { + rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_C1) | + R300_RS_SEL_R(R300_RS_SEL_C2) | + R300_RS_SEL_Q(R300_RS_SEL_C3); } + rs->inst[id] |= R300_RS_INST_TEX_ID(id); +} - assert(info->num_inputs <= 16); - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - tab[i] = INTERP_LINEAR; - cols++; - break; - case TGSI_SEMANTIC_POSITION: - case TGSI_SEMANTIC_PSIZE: - debug_printf("r300: Implementation error: Can't use " - "pos attribs in fragshader yet!\n"); - /* Pass through for now */ - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - tab[i] = INTERP_PERSPECTIVE; - break; - default: - debug_printf("r300: Unknown vertex input %d\n", - info->input_semantic_name[i]); - break; - } +static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE | + R300_RS_INST_TEX_ADDR(fp_offset); +} + +static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_0001) +{ + rs->ip[id] |= R500_RS_COL_PTR(ptr); + if (swizzle_0001) { + rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + } else { + rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); } + rs->inst[id] |= R500_RS_INST_COL_ID(id); +} - /* Now that we know where everything is... */ - DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs); - for (i = 0; i < info->num_inputs; i++) { - switch (tab[i]) { - case INTERP_LINEAR: - DBG(r300, DBG_DRAW, "r300: attrib: " - "stack offset %d, color, tab %d\n", - i, cols_emitted); - tab[i] = cols_emitted; - cols_emitted++; - break; - case INTERP_PERSPECTIVE: - DBG(r300, DBG_DRAW, "r300: attrib: " - "stack offset %d, texcoord, tab %d\n", - i, cols + texs); - tab[i] = cols + texs; - texs++; - break; - case -1: - debug_printf("r300: Implementation error: Bad fp interp!\n"); - default: - break; - } +static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | + R500_RS_INST_COL_ADDR(fp_offset); +} + +static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_X001) +{ + int rs_tex_comp = ptr*4; + + if (swizzle_X001) { + rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(R500_RS_IP_PTR_K0) | + R500_RS_SEL_R(R500_RS_IP_PTR_K0) | + R500_RS_SEL_Q(R500_RS_IP_PTR_K1); + } else { + rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(rs_tex_comp + 1) | + R500_RS_SEL_R(rs_tex_comp + 2) | + R500_RS_SEL_Q(rs_tex_comp + 3); } + rs->inst[id] |= R500_RS_INST_TEX_ID(id); +} +static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE | + R500_RS_INST_TEX_ADDR(fp_offset); } -/* Set up the RS block. This is the part of the chipset that actually does - * the rasterization of vertices into fragments. This is also the part of the - * chipset that locks up if any part of it is even slightly wrong. */ +/* Set up the RS block. + * + * This is the part of the chipset that actually does the rasterization + * of vertices into fragments. This is also the part of the chipset that + * locks up if any part of it is even slightly wrong. */ static void r300_update_rs_block(struct r300_context* r300, - struct r300_rs_block* rs) + struct r300_shader_semantics* vs_outputs, + struct r300_shader_semantics* fs_inputs) { - struct tgsi_shader_info* info = &r300->fs->info; - int col_count = 0, fp_offset = 0, i, tex_count = 0; - int rs_tex_comp = 0; + struct r300_rs_block* rs = r300->rs_block; + int i, col_count = 0, tex_count = 0, fp_offset = 0; + void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); + void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); + void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean); + void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); if (r300_screen(r300->context.screen)->caps->is_r500) { - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - rs->ip[col_count] |= - R500_RS_COL_PTR(col_count) | - R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); - col_count++; - break; - case TGSI_SEMANTIC_GENERIC: - rs->ip[tex_count] |= - R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | - R500_RS_SEL_R(rs_tex_comp + 2) | - R500_RS_SEL_Q(rs_tex_comp + 3); - tex_count++; - rs_tex_comp += 4; - break; - default: - break; - } - } + rX00_rs_col = r500_rs_col; + rX00_rs_col_write = r500_rs_col_write; + rX00_rs_tex = r500_rs_tex; + rX00_rs_tex_write = r500_rs_tex_write; + } else { + rX00_rs_col = r300_rs_col; + rX00_rs_col_write = r300_rs_col_write; + rX00_rs_tex = r300_rs_tex; + rX00_rs_tex_write = r300_rs_tex_write; + } - /* Rasterize at least one color, or bad things happen. */ - if ((col_count == 0) && (tex_count == 0)) { - rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + /* Rasterize colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_col(rs, col_count, i, FALSE); + + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->color[i] != ATTR_UNUSED) { + rX00_rs_col_write(rs, col_count, fp_offset); + fp_offset++; + } col_count++; - } - - for (i = 0; i < tex_count; i++) { - rs->inst[i] |= R500_RS_INST_TEX_ID(i) | - R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset); - fp_offset++; - } - - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R500_RS_INST_COL_ID(i) | - R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset); - fp_offset++; - } - } else { - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - rs->ip[col_count] |= - R300_RS_COL_PTR(col_count) | - R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); - col_count++; - break; - case TGSI_SEMANTIC_GENERIC: - rs->ip[tex_count] |= - R300_RS_TEX_PTR(rs_tex_comp) | - R300_RS_SEL_S(R300_RS_SEL_C0) | - R300_RS_SEL_T(R300_RS_SEL_C1) | - R300_RS_SEL_R(R300_RS_SEL_C2) | - R300_RS_SEL_Q(R300_RS_SEL_C3); - tex_count++; - rs_tex_comp+=4; - break; - default: - break; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->color[i] != ATTR_UNUSED) { + fp_offset++; } } + } - if (col_count == 0) { - rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); - } - - if (tex_count == 0) { - rs->ip[0] |= - R300_RS_SEL_S(R300_RS_SEL_K0) | - R300_RS_SEL_T(R300_RS_SEL_K0) | - R300_RS_SEL_R(R300_RS_SEL_K0) | - R300_RS_SEL_Q(R300_RS_SEL_K1); + /* Rasterize texture coordinates. */ + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_tex(rs, tex_count, tex_count, FALSE); + + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->generic[i] != ATTR_UNUSED) { + rX00_rs_tex_write(rs, tex_count, fp_offset); + fp_offset++; + } + tex_count++; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->generic[i] != ATTR_UNUSED) { + fp_offset++; + } } + } - /* Rasterize at least one color, or bad things happen. */ - if ((col_count == 0) && (tex_count == 0)) { - col_count++; - } + /* Rasterize fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_tex(rs, tex_count, tex_count, TRUE); - for (i = 0; i < tex_count; i++) { - rs->inst[i] |= R300_RS_INST_TEX_ID(i) | - R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset); + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->fog != ATTR_UNUSED) { + rX00_rs_tex_write(rs, tex_count, fp_offset); fp_offset++; } - - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R300_RS_INST_COL_ID(i) | - R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset); + tex_count++; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->fog != ATTR_UNUSED) { fp_offset++; } } - rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) | + /* Rasterize at least one color, or bad things happen. */ + if (col_count == 0 && tex_count == 0) { + rX00_rs_col(rs, 0, 0, TRUE); + col_count++; + } + + rs->count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; - rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0); + rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0); } /* Update the vertex format. */ static void r300_update_derived_shader_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_vertex_info* vformat; - struct r300_rs_block* rs_block; - int i; /* struct r300_shader_key* key; @@ -495,27 +453,45 @@ static void r300_update_derived_shader_state(struct r300_context* r300) (void*)key, (void*)value); } */ - /* XXX This will be refactored ASAP. */ - vformat = CALLOC_STRUCT(r300_vertex_info); - rs_block = CALLOC_STRUCT(r300_rs_block); + /* Reset structures */ + memset(r300->rs_block, 0, sizeof(struct r300_rs_block)); + memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info)); + memcpy(r300->vertex_info->vinfo.hwfmt, r300->vs->hwfmt, sizeof(uint)*4); - for (i = 0; i < 16; i++) { - vformat->vs_tab[i] = -1; - vformat->fs_tab[i] = -1; + r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs); + + if (r300screen->caps->has_tcl) { + r300_vertex_psc(r300); + } else { + r300_draw_emit_all_attribs(r300); + draw_compute_vertex_size(&r300->vertex_info->vinfo); + r300_swtcl_vertex_psc(r300); } - r300_vs_tab_routes(r300, vformat); - r300_vertex_psc(r300, vformat); - r300_update_fs_tab(r300, vformat); + r300->dirty_state |= R300_NEW_RS_BLOCK; +} - r300_update_rs_block(r300, rs_block); +static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when a new depth or stencil value + * can be written and changed. */ + + /* We might optionally check for [Z func: never] and inspect the stencil + * state in a similar fashion, but it's not terribly important. */ + return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || + (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || + ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && + (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); +} - FREE(r300->vertex_info); - FREE(r300->rs_block); +static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when alpha testing can kill + * a fragment. */ + uint32_t af = dsa->alpha_function; - r300->vertex_info = vformat; - r300->rs_block = rs_block; - r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK); + return (af & R300_FG_ALPHA_FUNC_ENABLE) && + (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; } static void r300_update_ztop(struct r300_context* r300) @@ -534,28 +510,34 @@ static void r300_update_ztop(struct r300_context* r300) * The docs claim that for the first three cases, if no ZS writes happen, * then ZTOP can be used. * + * (3) will never apply since we do not support chroma-keyed operations. + * (4) will need to be re-examined (and this comment updated) if/when + * Hyper-Z becomes supported. + * * Additionally, the following conditions require disabled ZTOP: - * ~) Depth writes in fragment shader - * ~) Outstanding occlusion queries + * 5) Depth writes in fragment shader + * 6) Outstanding occlusion queries * * ~C. */ - if (r300->dsa_state->alpha_function) { - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->fs->info.uses_kill) { + + /* ZS writes */ + if (r300_dsa_writes_depth_stencil(r300->dsa_state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */ + r300->fs->info.uses_kill)) { /* (2) */ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300->fs)) { + } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { + } else if (r300->query_current) { /* (6) */ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; } } void r300_update_derived_state(struct r300_context* r300) { - /* XXX */ - if (TRUE || r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { + if (r300->dirty_state & + (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | + R300_NEW_VERTEX_FORMAT)) { r300_update_derived_shader_state(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index e6c1cb54dac..02adee27015 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -28,6 +28,8 @@ #include "pipe/p_format.h" +#include "util/u_format.h" + #include "r300_reg.h" /* Some maths. These should probably find their way to u_math, if needed. */ @@ -258,35 +260,33 @@ static INLINE uint32_t r300_translate_wrap(int wrap) static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) { uint32_t retval = 0; - switch (min) { + if (is_anisotropic) + retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO; + else { + switch (min) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MIN_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MIN_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MIN_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", min); assert(0); break; - } - switch (mag) { + } + switch (mag) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MAG_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MAG_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MAG_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", mag); assert(0); break; + } } switch (mip) { case PIPE_TEX_MIPFILTER_NONE: @@ -443,20 +443,22 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) static INLINE unsigned pf_component_count(enum pipe_format format) { unsigned count = 0; - if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { - return count; + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) { + count++; } - - if (pf_size_x(format)) { + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1)) { count++; } - if (pf_size_y(format)) { + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2)) { count++; } - if (pf_size_z(format)) { + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3)) { count++; } - if (pf_size_w(format)) { + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0)) { + count++; + } + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) { count++; } @@ -467,19 +469,23 @@ static INLINE unsigned pf_component_count(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { uint32_t result = 0; + const struct util_format_description *desc; unsigned components = pf_component_count(format); - if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { + desc = util_format_description(format); + + if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) { debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format), __FUNCTION__, __LINE__); assert(0); } - switch (pf_type(format)) { + switch (desc->channel[0].type) { /* Half-floats, floats, doubles */ - case PIPE_FORMAT_TYPE_FLOAT: - switch (pf_size_x(format)) { - case 4: + case UTIL_FORMAT_TYPE_FLOAT: + switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) { + case 32: result = R300_DATA_TYPE_FLOAT_1 + (components - 1); break; default: @@ -488,19 +494,15 @@ r300_translate_vertex_data_type(enum pipe_format format) { assert(0); } break; - /* Normalized unsigned ints */ - case PIPE_FORMAT_TYPE_UNORM: - /* Normalized signed ints */ - case PIPE_FORMAT_TYPE_SNORM: - /* Non-normalized unsigned ints */ - case PIPE_FORMAT_TYPE_USCALED: - /* Non-normalized signed ints */ - case PIPE_FORMAT_TYPE_SSCALED: - switch (pf_size_x(format)) { - case 1: + /* Unsigned ints */ + case UTIL_FORMAT_TYPE_UNSIGNED: + /* Signed ints */ + case UTIL_FORMAT_TYPE_SIGNED: + switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) { + case 8: result = R300_DATA_TYPE_BYTE; break; - case 2: + case 16: if (components > 2) { result = R300_DATA_TYPE_SHORT_4; } else { @@ -510,8 +512,8 @@ r300_translate_vertex_data_type(enum pipe_format format) { default: debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format), __FUNCTION__, __LINE__); - debug_printf("r300: pf_size_x(format) == %d\n", - pf_size_x(format)); + debug_printf("r300: util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == %d\n", + util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)); assert(0); } break; @@ -521,12 +523,11 @@ r300_translate_vertex_data_type(enum pipe_format format) { assert(0); } - if (pf_type(format) == PIPE_FORMAT_TYPE_SSCALED) { + if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { result |= R300_SIGNED; - } else if (pf_type(format) == PIPE_FORMAT_TYPE_UNORM) { + } + if (desc->channel[0].normalized) { result |= R300_NORMALIZE; - } else if (pf_type(format) == PIPE_FORMAT_TYPE_SNORM) { - result |= (R300_SIGNED | R300_NORMALIZE); } return result; @@ -534,17 +535,21 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { + const struct util_format_description *desc = util_format_description(format); + + assert(format); - if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { + if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) { debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format), __FUNCTION__, __LINE__); return 0; } - return ((pf_swizzle_x(format) << R300_SWIZZLE_SELECT_X_SHIFT) | - (pf_swizzle_y(format) << R300_SWIZZLE_SELECT_Y_SHIFT) | - (pf_swizzle_z(format) << R300_SWIZZLE_SELECT_Z_SHIFT) | - (pf_swizzle_w(format) << R300_SWIZZLE_SELECT_W_SHIFT) | + return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | (0xf << R300_WRITE_ENA_SHIFT)); } diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index c07e6ae676d..bcd4c030f9c 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(24 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(20 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -70,9 +70,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_US_W_FMT, 0x0); /*** VAP ***/ - /* Max and min vertex index clamp. */ - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xffffff); /* Sign/normalize control */ OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); /* TCL-only stuff */ @@ -84,15 +81,11 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(60 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); - /* Flush PVS. */ - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); + BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0)); - OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | - R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT); if (caps->has_tcl) { + /*Flushing PVS is required before the VAP_GB registers can be changed*/ + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0); OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); OUT_CS_32F(1.0); OUT_CS_32F(1.0); @@ -123,21 +116,15 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000); OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); - OUT_CS_REG(R300_RB3D_CCTL, 0x00000000); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); if (caps->is_r500) { OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); } - OUT_CS_REG(R300_ZB_FORMAT, 0x00000002); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003); OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); - OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); - OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); - OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); /* XXX */ OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index aea25cf71dd..9a96206a4dc 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -22,6 +22,7 @@ #include "pipe/p_screen.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -34,8 +35,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; - state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | - R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff); + state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | + R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); if (tex->is_npot) { /* rectangles love this */ @@ -43,8 +44,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) state->format2 = (tex->pitch[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ - state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | - R300_TX_NUM_LEVELS(pt->last_level & 0xf); + state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); } state->format1 = r300_translate_texformat(pt->format); @@ -58,17 +58,17 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) /* large textures on r500 */ if (is_r500) { - if (pt->width[0] > 2048) { + if (pt->width0 > 2048) { state->format2 |= R500_TXWIDTH_BIT11; } - if (pt->height[0] > 2048) { + if (pt->height0 > 2048) { state->format2 |= R500_TXHEIGHT_BIT11; } } - assert(is_r500 || (pt->width[0] <= 2048 && pt->height[0] <= 2048)); + assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048)); debug_printf("r300: Set texture state (%dx%d, %d levels)\n", - pt->width[0], pt->height[0], pt->last_level); + pt->width0, pt->height0, pt->last_level); } unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, @@ -106,7 +106,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) return 0; } - return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32); + return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); } static void r300_setup_miptree(struct r300_texture* tex) @@ -116,39 +116,32 @@ static void r300_setup_miptree(struct r300_texture* tex) int i; for (i = 0; i <= base->last_level; i++) { - if (i > 0) { - base->width[i] = minify(base->width[i-1]); - base->height[i] = minify(base->height[i-1]); - base->depth[i] = minify(base->depth[i-1]); - } - - base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); - base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); + unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); stride = r300_texture_get_stride(tex, i); - layer_size = stride * base->nblocksy[i]; + layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else - size = layer_size * base->depth[i]; + size = layer_size * u_minify(base->depth0, i); tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; tex->layer_size[i] = layer_size; - tex->pitch[i] = stride / base->block.size; + tex->pitch[i] = stride / util_format_get_blocksize(base->format); debug_printf("r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes)\n", - i, base->width[i], base->height[i], base->depth[i], - stride); + i, u_minify(base->width0, i), u_minify(base->height0, i), + u_minify(base->depth0, i), stride); } } static void r300_setup_flags(struct r300_texture* tex) { - tex->is_npot = !util_is_power_of_two(tex->tex.width[0]) || - !util_is_power_of_two(tex->tex.height[0]); + tex->is_npot = !util_is_power_of_two(tex->tex.width0) || + !util_is_power_of_two(tex->tex.height0); } /* Create a new texture. */ @@ -208,8 +201,8 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, pipe_reference_init(&surface->reference, 1); pipe_texture_reference(&surface->texture, texture); surface->format = texture->format; - surface->width = texture->width[level]; - surface->height = texture->height[level]; + surface->width = u_minify(texture->width0, level); + surface->height = u_minify(texture->height0, level); surface->offset = offset; surface->usage = flags; surface->zslice = zslice; @@ -237,7 +230,7 @@ static struct pipe_texture* /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || - base->depth[0] != 1 || + base->depth0 != 1 || base->last_level != 0) { return NULL; } @@ -252,7 +245,7 @@ static struct pipe_texture* tex->tex.screen = screen; tex->stride_override = *stride; - tex->pitch[0] = *stride / base->block.size; + tex->pitch[0] = *stride / util_format_get_blocksize(base->format); r300_setup_flags(tex); r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); @@ -287,10 +280,9 @@ r300_video_surface_create(struct pipe_screen *screen, template.target = PIPE_TEXTURE_2D; template.format = PIPE_FORMAT_X8R8G8B8_UNORM; template.last_level = 0; - template.width[0] = util_next_power_of_two(width); - template.height[0] = util_next_power_of_two(height); - template.depth[0] = 1; - pf_get_block(template.format, &template.block); + template.width0 = util_next_power_of_two(width); + template.height0 = util_next_power_of_two(height); + template.depth0 = 1; template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 589f1984ee3..a792c2cf989 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ /* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */ /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ - /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */ + /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */ /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */ /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */ @@ -190,10 +190,10 @@ static void transform_dstreg( struct rc_dst_register * dst, struct tgsi_full_dst_register * src) { - dst->File = translate_register_file(src->DstRegister.File); - dst->Index = translate_register_index(ttr, src->DstRegister.File, src->DstRegister.Index); - dst->WriteMask = src->DstRegister.WriteMask; - dst->RelAddr = src->DstRegister.Indirect; + dst->File = translate_register_file(src->Register.File); + dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); + dst->WriteMask = src->Register.WriteMask; + dst->RelAddr = src->Register.Indirect; } static void transform_srcreg( @@ -201,18 +201,19 @@ static void transform_srcreg( struct rc_src_register * dst, struct tgsi_full_src_register * src) { - dst->File = translate_register_file(src->SrcRegister.File); - dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index); - dst->RelAddr = src->SrcRegister.Indirect; + dst->File = translate_register_file(src->Register.File); + dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); + dst->RelAddr = src->Register.Indirect; dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0); dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; - dst->Abs = src->SrcRegisterExtMod.Absolute; - dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0; + dst->Abs = src->Register.Absolute; + dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0; } -static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src) +static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src, + uint32_t *shadowSamplers) { switch(src.Texture) { case TGSI_TEXTURE_1D: @@ -233,14 +234,17 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi case TGSI_TEXTURE_SHADOW1D: dst->U.I.TexSrcTarget = RC_TEXTURE_1D; dst->U.I.TexShadow = 1; + *shadowSamplers |= 1 << dst->U.I.TexSrcUnit; break; case TGSI_TEXTURE_SHADOW2D: dst->U.I.TexSrcTarget = RC_TEXTURE_2D; dst->U.I.TexShadow = 1; + *shadowSamplers |= 1 << dst->U.I.TexSrcUnit; break; case TGSI_TEXTURE_SHADOWRECT: dst->U.I.TexSrcTarget = RC_TEXTURE_RECT; dst->U.I.TexShadow = 1; + *shadowSamplers |= 1 << dst->U.I.TexSrcUnit; break; } } @@ -258,17 +262,19 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate); if (src->Instruction.NumDstRegs) - transform_dstreg(ttr, &dst->U.I.DstReg, &src->FullDstRegisters[0]); + transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]); for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { - if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER) - dst->U.I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; + if (src->Src[i].Register.File == TGSI_FILE_SAMPLER) + dst->U.I.TexSrcUnit = src->Src[i].Register.Index; else - transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->FullSrcRegisters[i]); + transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]); } /* Texturing. */ - transform_texture(dst, src->InstructionExtTexture); + if (src->Instruction.Texture) + transform_texture(dst, src->Texture, + &ttr->compiler->Program.ShadowSamplers); } static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c deleted file mode 100644 index a6a159667a3..00000000000 --- a/src/gallium/drivers/r300/r300_vbo.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright 2009 Maciej Cencora <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* r300_vbo: Various helpers for emitting vertex buffers. Needs cleanup, - * refactoring, etc. */ - -#include "r300_vbo.h" - -#include "pipe/p_format.h" - -#include "r300_cs.h" -#include "r300_context.h" -#include "r300_state_inlines.h" -#include "r300_reg.h" -#include "r300_winsys.h" - -static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo, - struct pipe_vertex_element *vert_elem, - unsigned attr_num) -{ - uint16_t hw_fmt1, hw_fmt2; - - hw_fmt1 = r300_translate_vertex_data_type(vert_elem->src_format) | - (attr_num << R300_DST_VEC_LOC_SHIFT); - hw_fmt2 = r300_translate_vertex_data_swizzle(vert_elem->src_format); - - if (attr_num % 2 == 0) - { - vinfo->vap_prog_stream_cntl[attr_num >> 1] = hw_fmt1; - vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] = hw_fmt2; - } - else - { - vinfo->vap_prog_stream_cntl[attr_num >> 1] |= hw_fmt1 << 16; - vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] |= hw_fmt2 << 16; - } -} - -static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo, - unsigned attribs_num) -{ - uint32_t last_vec_bit = (attribs_num % 2 == 0) ? - (R300_LAST_VEC << 16) : R300_LAST_VEC; - - assert(attribs_num > 0 && attribs_num <= 16); - vinfo->vap_prog_stream_cntl[(attribs_num - 1) >> 1] |= last_vec_bit; -} - -void setup_vertex_attributes(struct r300_context *r300) -{ - struct pipe_vertex_element *vert_elem; - int i; - - for (i = 0; i < r300->vertex_element_count; i++) { - vert_elem = &r300->vertex_element[i]; - setup_vertex_attribute(r300->vertex_info, vert_elem, i); - } - - finish_vertex_attribs_setup(r300->vertex_info, - r300->vertex_element_count); -} - -static INLINE int get_buffer_offset(struct r300_context *r300, - unsigned int buf_nr, - unsigned int elem_offset) -{ - return r300->vertex_buffer[buf_nr].buffer_offset + elem_offset; -} -#if 0 -/* XXX not called at all */ -static void setup_vertex_buffers(struct r300_context *r300) -{ - struct pipe_vertex_element *vert_elem; - int i; - - for (i = 0; i < r300->aos_count; i++) - { - vert_elem = &r300->vertex_element[i]; - /* XXX use translate module to convert the data */ - if (!format_is_supported(vert_elem->src_format, - vert_elem->nr_components)) { - assert(0); - /* - struct pipe_buffer *buf; - const unsigned int max_index = r300->vertex_buffers[vert_elem->vertex_buffer_index].max_index; - buf = pipe_buffer_create(r300->context.screen, 4, usage, vert_elem->nr_components * max_index * sizeof(float)); - */ - } - - if (get_buffer_offset(r300, - vert_elem->vertex_buffer_index, - vert_elem->src_offset) % 4) { - /* XXX need to align buffer */ - assert(0); - } - } -} -#endif -/* XXX these shouldn't be asserts since we can work around bad indexbufs */ -void setup_index_buffer(struct r300_context *r300, - struct pipe_buffer* indexBuffer, - unsigned indexSize) -{ - if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, - RADEON_GEM_DOMAIN_GTT, 0)) { - assert(0); - } - - if (!r300->winsys->validate(r300->winsys)) { - assert(0); - } -} diff --git a/src/gallium/drivers/r300/r300_vbo.h b/src/gallium/drivers/r300/r300_vbo.h deleted file mode 100644 index 7afa75899cf..00000000000 --- a/src/gallium/drivers/r300/r300_vbo.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2009 Maciej Cencora <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef R300_VBO_H -#define R300_VBO_H - -struct r300_context; -struct pipe_buffer; - -void setup_vertex_attributes(struct r300_context *r300); - -void setup_index_buffer(struct r300_context *r300, - struct pipe_buffer* indexBuffer, - unsigned indexSize); - -#endif diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 74ef416dc14..c4ed0d712f9 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -1,5 +1,6 @@ /* * Copyright 2009 Corbin Simpson <[email protected]> + * Copyright 2009 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,89 +24,229 @@ #include "r300_vs.h" #include "r300_context.h" +#include "r300_screen.h" #include "r300_tgsi_to_rc.h" +#include "r300_reg.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "radeon_compiler.h" - -static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) +/* Convert info about VS output semantics into r300_shader_semantics. */ +static void r300_shader_read_vs_outputs( + struct tgsi_shader_info* info, + struct r300_shader_semantics* vs_outputs) { - struct r300_vertex_shader * vs = c->UserData; - struct tgsi_shader_info* info = &vs->info; - struct tgsi_parse_context parser; - struct tgsi_full_declaration * decl; - boolean pointsize = FALSE; - int out_colors = 0; - int colors = 0; - int out_generic = 0; - int generic = 0; int i; + unsigned index; - /* Fill in the input mapping */ - for (i = 0; i < info->num_inputs; i++) - c->code->inputs[i] = i; + r300_shader_semantics_reset(vs_outputs); - /* Fill in the output mapping */ for (i = 0; i < info->num_outputs; i++) { + index = info->output_semantic_index[i]; + switch (info->output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + assert(index == 0); + vs_outputs->pos = i; + break; + case TGSI_SEMANTIC_PSIZE: - pointsize = TRUE; + assert(index == 0); + vs_outputs->psize = i; break; + case TGSI_SEMANTIC_COLOR: - out_colors++; + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->color[index] = i; break; - case TGSI_SEMANTIC_FOG: + + case TGSI_SEMANTIC_BCOLOR: + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->bcolor[index] = i; + break; + case TGSI_SEMANTIC_GENERIC: - out_generic++; + assert(index <= ATTR_GENERIC_COUNT); + vs_outputs->generic[index] = i; break; + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + vs_outputs->fog = i; + break; + + case TGSI_SEMANTIC_EDGEFLAG: + assert(index == 0); + fprintf(stderr, "r300 VP: cannot handle edgeflag output\n"); + assert(0); + break; + default: + assert(0); } } +} - tgsi_parse_init(&parser, vs->state.tokens); +static void r300_shader_vap_output_fmt( + struct r300_shader_semantics* vs_outputs, + uint* hwfmt) +{ + int i, gen_count; - while (!tgsi_parse_end_of_tokens(&parser)) { - tgsi_parse_token(&parser); + /* Do the actual vertex_info setup. + * + * vertex_info has four uints of hardware-specific data in it. + * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL + * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM + * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 + * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ - if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) - continue; + hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - decl = &parser.FullToken.FullDeclaration; + /* Position. */ + if (vs_outputs->pos != ATTR_UNUSED) { + hwfmt[1] |= R300_INPUT_CNTL_POS; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + } else { + assert(0); + } - if (decl->Declaration.File != TGSI_FILE_OUTPUT) - continue; + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + } - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - c->code->outputs[decl->DeclarationRange.First] = 0; - break; - case TGSI_SEMANTIC_PSIZE: - c->code->outputs[decl->DeclarationRange.First] = 1; - break; - case TGSI_SEMANTIC_COLOR: - c->code->outputs[decl->DeclarationRange.First] = 1 + - (pointsize ? 1 : 0) + - colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - c->code->outputs[decl->DeclarationRange.First] = 1 + - (pointsize ? 1 : 0) + - out_colors + - generic++; - break; - default: - debug_printf("r300: vs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); - break; + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + hwfmt[1] |= R300_INPUT_CNTL_COLOR; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + } + } + + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; + } + } + + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); +} + +/* Sets up stream mapping to equivalent VS outputs if TCL is bypassed + * or isn't present. */ +static void r300_stream_locations_notcl( + struct r300_shader_semantics* vs_outputs, + int* stream_loc) +{ + int i, tabi = 0, gen_count; + + /* Position. */ + stream_loc[tabi++] = 0; + + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + stream_loc[tabi++] = 1; + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + stream_loc[tabi++] = 2 + i; + } + } + + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + stream_loc[tabi++] = 4 + i; + } + } + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + assert(tabi < 16); + stream_loc[tabi++] = 6 + gen_count; + gen_count++; } } - tgsi_parse_free(&parser); + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + assert(tabi < 16); + stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); + + for (; tabi < 16;) { + stream_loc[tabi++] = -1; + } } +static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) +{ + struct r300_vertex_shader * vs = c->UserData; + struct r300_shader_semantics* outputs = &vs->outputs; + struct tgsi_shader_info* info = &vs->info; + int i, reg = 0; + + /* Fill in the input mapping */ + for (i = 0; i < info->num_inputs; i++) + c->code->inputs[i] = i; + + /* Position. */ + if (outputs->pos != ATTR_UNUSED) { + c->code->outputs[outputs->pos] = reg++; + } else { + assert(0); + } + + /* Point size. */ + if (outputs->psize != ATTR_UNUSED) { + c->code->outputs[outputs->psize] = reg++; + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (outputs->color[i] != ATTR_UNUSED) { + c->code->outputs[outputs->color[i]] = reg++; + } + } + + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (outputs->generic[i] != ATTR_UNUSED) { + c->code->outputs[outputs->generic[i]] = reg++; + } + } + + /* Fog coordinates. */ + if (outputs->fog != ATTR_UNUSED) { + c->code->outputs[outputs->fog] = reg++; + } +} void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs) @@ -113,6 +254,11 @@ void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_program_compiler compiler; struct tgsi_to_rc ttr; + /* Initialize. */ + r300_shader_read_vs_outputs(&vs->info, &vs->outputs); + r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); + r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); + /* Setup the compiler */ rc_init(&compiler.Base); @@ -137,7 +283,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { - /* Todo: Fail gracefully */ + /* XXX We should fallback using Draw. */ fprintf(stderr, "r300 VP: Compiler error\n"); abort(); } diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 2a4ce315e32..67e9db5366f 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -1,5 +1,6 @@ /* * Copyright 2009 Corbin Simpson <[email protected]> + * Copyright 2009 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,18 +26,22 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" - #include "radeon_code.h" +#include "r300_shader_semantics.h" + struct r300_context; struct r300_vertex_shader { /* Parent class */ struct pipe_shader_state state; + struct tgsi_shader_info info; + struct r300_shader_semantics outputs; + uint hwfmt[4]; - /* Fallback shader, because Draw has issues */ - struct draw_vertex_shader* draw; + /* Stream locations for SWTCL or if TCL is bypassed. */ + int stream_loc_notcl[16]; /* Has this shader been translated yet? */ boolean translated; @@ -45,9 +50,6 @@ struct r300_vertex_shader { struct r300_vertex_program_code code; }; - -extern struct r300_vertex_program_code r300_passthrough_vertex_shader; - void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 864a6146b22..1ae6de70fee 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -35,76 +35,10 @@ extern "C" { #include "pipe/p_state.h" #include "pipe/internal/p_winsys_screen.h" -struct r300_winsys { - /* Parent class */ - struct pipe_winsys base; - - /* Opaque Radeon-specific winsys object. */ - void* radeon_winsys; - - /* PCI ID */ - uint32_t pci_id; - - /* GB pipe count */ - uint32_t gb_pipes; - - /* Z pipe count (rv530 only) */ - uint32_t z_pipes; - - /* GART size. */ - uint32_t gart_size; - - /* VRAM size. */ - uint32_t vram_size; - - /* Add a pipe_buffer to the list of buffer objects to validate. */ - boolean (*add_buffer)(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd); - - /* Revalidate all currently setup pipe_buffers. - * Returns TRUE if a flush is required. */ - boolean (*validate)(struct r300_winsys* winsys); - - /* Check to see if there's room for commands. */ - boolean (*check_cs)(struct r300_winsys* winsys, int size); - - /* Start a command emit. */ - void (*begin_cs)(struct r300_winsys* winsys, - int size, - const char* file, - const char* function, - int line); - - /* Write a dword to the command buffer. */ - void (*write_cs_dword)(struct r300_winsys* winsys, uint32_t dword); - - /* Write a relocated dword to the command buffer. */ - void (*write_cs_reloc)(struct r300_winsys* winsys, - struct pipe_buffer* bo, - uint32_t rd, - uint32_t wd, - uint32_t flags); - - /* Finish a command emit. */ - void (*end_cs)(struct r300_winsys* winsys, - const char* file, - const char* function, - int line); - - /* Flush the CS. */ - void (*flush_cs)(struct r300_winsys* winsys); - - /* winsys flush - callback from winsys when flush required */ - void (*set_flush_cb)(struct r300_winsys *winsys, - void (*flush_cb)(void *), void *data); - - void (*reset_bos)(struct r300_winsys *winsys); -}; +#include "radeon_winsys.h" struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct r300_winsys* r300_winsys); + struct radeon_winsys* radeon_winsys); boolean r300_get_texture_buffer(struct pipe_texture* texture, struct pipe_buffer** buffer, diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index 8fac8e6e05f..5f130453c39 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,6 +36,7 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_tile_cache.h" @@ -48,12 +49,16 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil) { struct softpipe_context *softpipe = softpipe_context(pipe); + union util_color uc; unsigned cv; uint i; if (softpipe->no_rast) return; + if (!softpipe_check_render_cond(softpipe)) + return; + #if 0 softpipe_update_derived(softpipe); /* not needed?? */ #endif @@ -62,12 +67,12 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { struct pipe_surface *ps = softpipe->framebuffer.cbufs[i]; - util_pack_color(rgba, ps->format, &cv); - sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, cv); + util_pack_color(rgba, ps->format, &uc); + sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, uc.ui); #if !TILE_CLEAR_OPTIMIZATION /* non-cached surface */ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); #endif } } diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 5f60139968a..f3ac6760db5 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -90,14 +90,15 @@ softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { sp_destroy_tile_cache(softpipe->cbuf_cache[i]); pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL); } + sp_destroy_tile_cache(softpipe->zsbuf_cache); pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL); @@ -106,6 +107,11 @@ softpipe_destroy( struct pipe_context *pipe ) pipe_texture_reference(&softpipe->texture[i], NULL); } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + sp_destroy_tex_tile_cache(softpipe->vertex_tex_cache[i]); + pipe_texture_reference(&softpipe->vertex_textures[i], NULL); + } + for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { pipe_buffer_reference(&softpipe->constants[i].buffer, NULL); @@ -152,6 +158,11 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, softpipe->tex_cache[i]->texture == texture) return PIPE_REFERENCED_FOR_READ; } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + if (softpipe->vertex_tex_cache[i] && + softpipe->vertex_tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } return PIPE_UNREFERENCED; } @@ -165,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, } +static void +softpipe_render_condition( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + + softpipe->render_cond_query = query; + softpipe->render_cond_mode = mode; +} + + + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -180,6 +204,7 @@ softpipe_create( struct pipe_screen *screen ) #endif softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); softpipe->pipe.winsys = screen->winsys; softpipe->pipe.screen = screen; @@ -191,7 +216,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; - softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.bind_fragment_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; @@ -210,13 +236,18 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + softpipe->pipe.create_gs_state = softpipe_create_gs_state; + softpipe->pipe.bind_gs_state = softpipe_bind_gs_state; + softpipe->pipe.delete_gs_state = softpipe_delete_gs_state; + softpipe->pipe.set_blend_color = softpipe_set_blend_color; softpipe->pipe.set_clip_state = softpipe_set_clip_state; softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; - softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_fragment_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_vertex_sampler_textures = softpipe_set_vertex_sampler_textures; softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; @@ -225,8 +256,6 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; - softpipe->pipe.set_edgeflags = softpipe_set_edgeflags; - softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; @@ -236,6 +265,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe_init_query_funcs( softpipe ); + softpipe->pipe.render_condition = softpipe_render_condition; + /* * Alloc caches for accessing drawing surfaces and textures. * Must be before quad stage setup! @@ -246,7 +277,9 @@ softpipe_create( struct pipe_screen *screen ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen ); - + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache(screen); + } /* setup quad rendering stages */ softpipe->quad.shade = sp_quad_shade_stage(softpipe); @@ -262,7 +295,7 @@ softpipe_create( struct pipe_screen *screen ) goto fail; draw_texture_samplers(softpipe->draw, - PIPE_MAX_SAMPLERS, + PIPE_MAX_VERTEX_SAMPLERS, (struct tgsi_sampler **) softpipe->tgsi.vert_samplers_list); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a735573d6fb..73fa744f9d4 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -53,10 +53,12 @@ struct softpipe_context { /** Constant state objects */ struct pipe_blend_state *blend; struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; struct sp_vertex_shader *vs; + struct sp_geometry_shader *gs; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -66,12 +68,15 @@ struct softpipe_context { struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned num_samplers; unsigned num_textures; + unsigned num_vertex_samplers; + unsigned num_vertex_textures; unsigned num_vertex_elements; unsigned num_vertex_buffers; @@ -111,6 +116,10 @@ struct softpipe_context { unsigned line_stipple_counter; + /** Conditional query object and mode */ + struct pipe_query *render_cond_query; + uint render_cond_mode; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; @@ -121,7 +130,7 @@ struct softpipe_context { /** TGSI exec things */ struct { - struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS]; struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; @@ -139,9 +148,11 @@ struct softpipe_context { unsigned tex_timestamp; struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS]; unsigned use_sse : 1; unsigned dump_fs : 1; + unsigned dump_gs : 1; unsigned no_rast : 1; }; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index d4045816d03..3826a9e41a6 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,6 +38,7 @@ #include "util/u_prim.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_state.h" #include "draw/draw_context.h" @@ -48,7 +49,7 @@ static void softpipe_map_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; - uint i, size; + uint i, vssize, gssize; for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) @@ -57,13 +58,21 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) } if (sp->constants[PIPE_SHADER_VERTEX].buffer) - size = sp->constants[PIPE_SHADER_VERTEX].buffer->size; + vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size; else - size = 0; + vssize = 0; - draw_set_mapped_constant_buffer(sp->draw, + if (sp->constants[PIPE_SHADER_GEOMETRY].buffer) + gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size; + else + gssize = 0; + + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], - size); + vssize); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, + sp->mapped_constants[PIPE_SHADER_GEOMETRY], + gssize); } @@ -78,9 +87,10 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) */ draw_flush(sp->draw); - draw_set_mapped_constant_buffer(sp->draw, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0); - for (i = 0; i < 2; i++) { + for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) ws->buffer_unmap(ws, sp->constants[i].buffer); sp->mapped_constants[i] = NULL; @@ -88,11 +98,11 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) } -boolean +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); + softpipe_draw_elements(pipe, NULL, 0, mode, start, count); } @@ -101,7 +111,7 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -boolean +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -113,6 +123,9 @@ softpipe_draw_range_elements(struct pipe_context *pipe, struct draw_context *draw = sp->draw; unsigned i; + if (!softpipe_check_render_cond(sp)) + return TRUE; + sp->reduced_api_prim = u_reduced_prim(mode); if (sp->dirty) @@ -168,27 +181,17 @@ softpipe_draw_range_elements(struct pipe_context *pipe, softpipe_unmap_constant_buffers(sp); sp->dirty_render_cache = TRUE; - - return TRUE; } -boolean +void softpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); -} - - -void -softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) -{ - struct softpipe_context *sp = softpipe_context(pipe); - draw_set_edgeflags(sp->draw, edgeflags); + softpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index e38b767cf2c..75dac810a12 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -55,6 +55,9 @@ softpipe_flush( struct pipe_context *pipe, for (i = 0; i < softpipe->num_textures; i++) { sp_flush_tex_tile_cache(softpipe->tex_cache[i]); } + for (i = 0; i < softpipe->num_vertex_textures; i++) { + sp_flush_tex_tile_cache(softpipe->vertex_tex_cache[i]); + } } if (flags & PIPE_FLUSH_SWAPBUFFERS) { diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 4076114d392..27fa126b7c3 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -126,7 +126,10 @@ exec_run( const struct sp_fragment_shader *base, setup_pos_vector(quad->posCoef, (float)quad->input.x0, (float)quad->input.y0, &machine->QuadPos); - + + /* convert 0 to 1.0 and 1 to -1.0 */ + machine->Face = (float) (quad->input.facing * -2 + 1); + quad->inout.mask &= tgsi_exec_machine_run( machine ); if (quad->inout.mask == 0) return FALSE; diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 379cf4ad064..4ef5d9f7b1d 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe, } +/** + * Called by rendering function to check rendering is conditional. + * \return TRUE if we should render, FALSE if we should skip rendering + */ +boolean +softpipe_check_render_cond(struct softpipe_context *sp) +{ + struct pipe_context *pipe = &sp->pipe; + boolean b, wait; + uint64_t result; + + if (!sp->render_cond_query) { + return TRUE; /* no query predicate, draw normally */ + } + + wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT || + sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT); + + b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result); + if (b) + return result > 0; + else + return TRUE; +} + + void softpipe_init_query_funcs(struct softpipe_context *softpipe ) { softpipe->pipe.create_query = softpipe_create_query; diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h index 05060a45759..736c033897e 100644 --- a/src/gallium/drivers/softpipe/sp_query.h +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -32,6 +32,10 @@ #ifndef SP_QUERY_H #define SP_QUERY_H +extern boolean +softpipe_check_render_cond(struct softpipe_context *sp); + + struct softpipe_context; extern void softpipe_init_query_funcs(struct softpipe_context * ); diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 81fb7aa20c6..bd3532de4f4 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -58,7 +58,9 @@ softpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_SAMPLERS; + return PIPE_MAX_VERTEX_SAMPLERS; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: @@ -143,6 +145,11 @@ softpipe_is_format_supported( struct pipe_screen *screen, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_B6UG5SR5S_NORM: + case PIPE_FORMAT_X8UB8UG8SR8S_NORM: + case PIPE_FORMAT_A8B8G8R8_SNORM: + case PIPE_FORMAT_NONE: return FALSE; default: return TRUE; diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 615581b95f9..3da75364c5d 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1268,7 +1268,7 @@ void sp_setup_prepare( struct setup_context *setup ) } /* Note: nr_attrs is only used for debugging (vertex printing) */ - setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw); + setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw); sp->quad.first->begin( sp->quad.first ); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 77ee3c1136b..9b18dac67bd 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -50,6 +50,7 @@ #define SP_NEW_VERTEX 0x1000 #define SP_NEW_VS 0x2000 #define SP_NEW_QUERY 0x4000 +#define SP_NEW_GS 0x8000 struct tgsi_sampler; @@ -90,6 +91,11 @@ struct sp_vertex_shader { int max_sampler; /* -1 if no samplers */ }; +/** Subclass of pipe_shader_state */ +struct sp_geometry_shader { + struct pipe_shader_state shader; + struct draw_geometry_shader *draw_data; +}; void * @@ -104,6 +110,10 @@ void * softpipe_create_sampler_state(struct pipe_context *, const struct pipe_sampler_state *); void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void +softpipe_bind_vertex_sampler_states(struct pipe_context *, + unsigned num_samplers, + void **samplers); void softpipe_delete_sampler_state(struct pipe_context *, void *); void * @@ -139,6 +149,10 @@ void *softpipe_create_vs_state(struct pipe_context *, const struct pipe_shader_state *); void softpipe_bind_vs_state(struct pipe_context *, void *); void softpipe_delete_vs_state(struct pipe_context *, void *); +void *softpipe_create_gs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_gs_state(struct pipe_context *, void *); +void softpipe_delete_gs_state(struct pipe_context *, void *); void softpipe_set_polygon_stipple( struct pipe_context *, const struct pipe_poly_stipple * ); @@ -150,6 +164,11 @@ void softpipe_set_sampler_textures( struct pipe_context *, unsigned num, struct pipe_texture ** ); +void +softpipe_set_vertex_sampler_textures(struct pipe_context *, + unsigned num_textures, + struct pipe_texture **); + void softpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); @@ -165,14 +184,14 @@ void softpipe_set_vertex_buffers(struct pipe_context *, void softpipe_update_derived( struct softpipe_context *softpipe ); -boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); -boolean softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); -boolean +void softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -181,10 +200,6 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void -softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); - - -void softpipe_map_transfers(struct softpipe_context *sp); void diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index efed082f823..95ab3234337 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -29,6 +29,7 @@ */ #include "util/u_memory.h" +#include "draw/draw_context.h" #include "sp_context.h" #include "sp_state.h" @@ -45,6 +46,8 @@ void softpipe_bind_blend_state( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend = (struct pipe_blend_state *)blend; softpipe->dirty |= SP_NEW_BLEND; @@ -62,6 +65,8 @@ void softpipe_set_blend_color( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend_color = *blend_color; softpipe->dirty |= SP_NEW_BLEND; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 3bc96b95385..f6856a5f691 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -67,7 +67,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(softpipe->draw); + const uint num = draw_current_shader_outputs(softpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -117,13 +117,13 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(softpipe->draw, - spfs->info.input_semantic_name[i], - spfs->info.input_semantic_index[i]); + src = draw_find_shader_output(softpipe->draw, + spfs->info.input_semantic_name[i], + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + softpipe->psize_slot = draw_find_shader_output(softpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, @@ -213,6 +213,19 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } } } + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct softpipe_tex_tile_cache *tc = softpipe->vertex_tex_cache[i]; + + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + + if (spt->timestamp != tc->timestamp) { + sp_tex_tile_cache_validate_texture(tc); + tc->timestamp = spt->timestamp; + } + } + } } diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b41f7e8ab72..aa12bb215a8 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -69,7 +69,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->fs = (struct sp_fragment_shader *) fs; + draw_flush(softpipe->draw); + + if (softpipe->fs == fs) + return; + + draw_flush(softpipe->draw); + + softpipe->fs = fs; softpipe->dirty |= SP_NEW_FS; } @@ -159,9 +166,75 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + draw_flush(softpipe->draw); + /* note: reference counting */ pipe_buffer_reference(&softpipe->constants[shader].buffer, buf ? buf->buffer : NULL); softpipe->dirty |= SP_NEW_CONSTANTS; } + +void * +softpipe_create_gs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_geometry_shader *state; + + state = CALLOC_STRUCT(sp_geometry_shader); + if (state == NULL ) + goto fail; + + /* debug */ + if (softpipe->dump_gs) + tgsi_dump(templ->tokens, 0); + + /* copy shader tokens, the ones passed in will go away. + */ + state->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (state->shader.tokens == NULL) + goto fail; + + state->draw_data = draw_create_geometry_shader(softpipe->draw, templ); + if (state->draw_data == NULL) + goto fail; + + return state; + +fail: + if (state) { + FREE( (void *)state->shader.tokens ); + FREE( state->draw_data ); + FREE( state ); + } + return NULL; +} + + +void +softpipe_bind_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->gs = (struct sp_geometry_shader *)gs; + + draw_bind_geometry_shader(softpipe->draw, + (softpipe->gs ? softpipe->gs->draw_data : NULL)); + + softpipe->dirty |= SP_NEW_GS; +} + + +void +softpipe_delete_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + struct sp_geometry_shader *state = + (struct sp_geometry_shader *)gs; + + draw_delete_geometry_shader(softpipe->draw, + (state) ? state->draw_data : 0); + FREE(state); +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 87b72196838..a5b00336d44 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -41,14 +41,17 @@ softpipe_create_rasterizer_state(struct pipe_context *pipe, } void softpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct softpipe_context *softpipe = softpipe_context(pipe); + if (softpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(softpipe->draw, setup); + draw_set_rasterizer_state(softpipe->draw, rasterizer); - softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + softpipe->rasterizer = rasterizer; softpipe->dirty |= SP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index db0b8ab76b1..ceb4e338f1a 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -94,6 +94,34 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, void +softpipe_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; + + assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == softpipe->num_vertex_samplers && + !memcmp(softpipe->vertex_samplers, samplers, num_samplers * sizeof(void *))) + return; + + draw_flush(softpipe->draw); + + for (i = 0; i < num_samplers; ++i) + softpipe->vertex_samplers[i] = samplers[i]; + for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + softpipe->vertex_samplers[i] = NULL; + + softpipe->num_vertex_samplers = num_samplers; + + softpipe->dirty |= SP_NEW_SAMPLER; +} + + +void softpipe_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { @@ -122,6 +150,37 @@ softpipe_set_sampler_textures(struct pipe_context *pipe, } +void +softpipe_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == softpipe->num_vertex_textures && + !memcmp(softpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + + draw_flush(softpipe->draw); + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num_textures ? textures[i] : NULL; + + pipe_texture_reference(&softpipe->vertex_textures[i], tex); + sp_tex_tile_cache_set_texture(softpipe->vertex_tex_cache[i], tex); + } + + softpipe->num_vertex_textures = num_textures; + + softpipe->dirty |= SP_NEW_TEXTURE; +} + + /** * Find/create an sp_sampler_varient object for sampling the given texture, * sampler and tex unit. @@ -185,16 +244,16 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) * fragment programs. */ for (i = 0; i <= softpipe->vs->max_sampler; i++) { - if (softpipe->sampler[i]) { + if (softpipe->vertex_samplers[i]) { softpipe->tgsi.vert_samplers_list[i] = get_sampler_varient( i, - sp_sampler(softpipe->sampler[i]), - softpipe->texture[i], + sp_sampler(softpipe->vertex_samplers[i]), + softpipe->vertex_textures[i], TGSI_PROCESSOR_VERTEX ); sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i], - softpipe->tex_cache[i], - softpipe->texture[i] ); + softpipe->vertex_tex_cache[i], + softpipe->vertex_textures[i] ); } } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index bc0e2011300..f6154109ea8 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -35,6 +35,8 @@ #include "draw/draw_context.h" +#include "util/u_format.h" + /** * XXX this might get moved someday @@ -49,6 +51,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, struct softpipe_context *sp = softpipe_context(pipe); uint i; + draw_flush(sp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { @@ -80,8 +84,9 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, if (sp->framebuffer.zsbuf) { int depth_bits; double mrd; - depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format, - PIPE_FORMAT_COMP_Z); + depth_bits = util_format_get_component_bits(sp->framebuffer.zsbuf->format, + UTIL_FORMAT_COLORSPACE_ZS, + 0); if (depth_bits > 16) { mrd = 0.0000001; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index c22ee86b66c..e26153b1d90 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -521,7 +521,7 @@ compute_lambda_1d(const struct sp_sampler_varient *samp, const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); - float rho = MAX2(dsdx, dsdy) * texture->width[0]; + float rho = MAX2(dsdx, dsdy) * texture->width0; float lambda; lambda = util_fast_log2(rho); @@ -545,8 +545,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); - float maxx = MAX2(dsdx, dsdy) * texture->width[0]; - float maxy = MAX2(dtdx, dtdy) * texture->height[0]; + float maxx = MAX2(dsdx, dsdy) * texture->width0; + float maxy = MAX2(dtdx, dtdy) * texture->height0; float rho = MAX2(maxx, maxy); float lambda; @@ -573,9 +573,9 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]); float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]); - float maxx = MAX2(dsdx, dsdy) * texture->width[0]; - float maxy = MAX2(dtdx, dtdy) * texture->height[0]; - float maxz = MAX2(dpdx, dpdy) * texture->depth[0]; + float maxx = MAX2(dsdx, dsdy) * texture->width0; + float maxy = MAX2(dtdx, dtdy) * texture->height0; + float maxz = MAX2(dpdx, dpdy) * texture->depth0; float rho, lambda; rho = MAX2(maxx, maxy); @@ -644,8 +644,8 @@ get_texel_2d(const struct sp_sampler_varient *samp, const struct pipe_texture *texture = samp->texture; unsigned level = addr.bits.level; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level)) { return samp->sampler->border_color; } else { @@ -737,9 +737,9 @@ get_texel_3d(const struct sp_sampler_varient *samp, const struct pipe_texture *texture = samp->texture; unsigned level = addr.bits.level; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level) || + z < 0 || z >= (int) u_minify(texture->depth0, level)) { return samp->sampler->border_color; } else { @@ -925,7 +925,7 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; + width = u_minify(texture->width0, level0); assert(width > 0); @@ -961,8 +961,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1008,8 +1008,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1046,9 +1046,9 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); assert(width > 0); assert(height > 0); @@ -1088,7 +1088,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; + width = u_minify(texture->width0, level0); assert(width > 0); @@ -1127,8 +1127,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1174,8 +1174,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1221,9 +1221,9 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); addr.value = 0; addr.bits.level = level0; @@ -1778,8 +1778,8 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, samp->texture = texture; samp->cache = tex_cache; - samp->xpot = util_unsigned_logbase2( texture->width[0] ); - samp->ypot = util_unsigned_logbase2( texture->height[0] ); + samp->xpot = util_unsigned_logbase2( texture->width0 ); + samp->ypot = util_unsigned_logbase2( texture->height0 ); samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level); } diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index 407a22a9f4b..e50a76a73bc 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -35,6 +35,7 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "util/u_tile.h" +#include "util/u_math.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_texture.h" @@ -246,9 +247,9 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, addr.bits.level, addr.bits.z, PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); - + u_minify(tc->texture->width0, addr.bits.level), + u_minify(tc->texture->height0, addr.bits.level)); + tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); tc->tex_face = addr.bits.face; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 7caf2928b4b..a9436a33942 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -32,6 +32,8 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" + +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -52,29 +54,28 @@ softpipe_texture_layout(struct pipe_screen *screen, { struct pipe_texture *pt = &spt->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned buffer_size = 0; + pt->width0 = width; + pt->height0 = height; + pt->depth0 = depth; + for (level = 0; level <= pt->last_level; level++) { - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); - spt->stride[level] = pt->nblocksx[level]*pt->block.size; + spt->stride[level] = util_format_get_stride(pt->format, width); spt->level_offset[level] = buffer_size; - buffer_size += (pt->nblocksy[level] * + buffer_size += (util_format_get_nblocksy(pt->format, height) * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * spt->stride[level]); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } spt->buffer = screen->buffer_create(screen, 32, @@ -96,12 +97,9 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, PIPE_BUFFER_USAGE_GPU_READ_WRITE); unsigned tex_usage = spt->base.tex_usage; - spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); - spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); - spt->buffer = screen->surface_buffer_create( screen, - spt->base.width[0], - spt->base.height[0], + spt->base.width0, + spt->base.height0, spt->base.format, usage, tex_usage, @@ -126,9 +124,9 @@ softpipe_texture_create(struct pipe_screen *screen, pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; - spt->pot = (util_is_power_of_two(template->width[0]) && - util_is_power_of_two(template->height[0]) && - util_is_power_of_two(template->depth[0])); + spt->pot = (util_is_power_of_two(template->width0) && + util_is_power_of_two(template->height0) && + util_is_power_of_two(template->depth0)); if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_PRIMARY)) { @@ -163,7 +161,7 @@ softpipe_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -174,8 +172,6 @@ softpipe_texture_blanket(struct pipe_screen * screen, spt->base = *base; pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; - spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); - spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); spt->stride[0] = stride[0]; pipe_buffer_reference(&spt->buffer, buffer); @@ -213,8 +209,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = spt->level_offset[level]; ps->usage = usage; @@ -243,10 +239,12 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->zslice = zslice; if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * pt->nblocksy[level] * spt->stride[level]; + ps->offset += face * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) * + spt->stride[level]; } else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * pt->nblocksy[level] * spt->stride[level]; + ps->offset += zslice * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) * + spt->stride[level]; } else { assert(face == 0); @@ -301,15 +299,12 @@ softpipe_get_tex_transfer(struct pipe_screen *screen, spt = CALLOC_STRUCT(softpipe_transfer); if (spt) { struct pipe_transfer *pt = &spt->base; + int nblocksy = util_format_get_nblocksy(texture->format, u_minify(texture->height0, level)); pipe_texture_reference(&pt->texture, texture); - pt->format = texture->format; - pt->block = texture->block; pt->x = x; pt->y = y; pt->width = w; pt->height = h; - pt->nblocksx = texture->nblocksx[level]; - pt->nblocksy = texture->nblocksy[level]; pt->stride = sptex->stride[level]; pt->usage = usage; pt->face = face; @@ -319,10 +314,10 @@ softpipe_get_tex_transfer(struct pipe_screen *screen, spt->offset = sptex->level_offset[level]; if (texture->target == PIPE_TEXTURE_CUBE) { - spt->offset += face * pt->nblocksy * pt->stride; + spt->offset += face * nblocksy * pt->stride; } else if (texture->target == PIPE_TEXTURE_3D) { - spt->offset += zslice * pt->nblocksy * pt->stride; + spt->offset += zslice * nblocksy * pt->stride; } else { assert(face == 0); @@ -360,9 +355,11 @@ softpipe_transfer_map( struct pipe_screen *screen, { ubyte *map, *xfer_map; struct softpipe_texture *spt; + enum pipe_format format; assert(transfer->texture); spt = softpipe_texture(transfer->texture); + format = transfer->texture->format; map = pipe_buffer_map(screen, spt->buffer, pipe_transfer_buffer_flags(transfer)); if (map == NULL) @@ -379,8 +376,8 @@ softpipe_transfer_map( struct pipe_screen *screen, } xfer_map = map + softpipe_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / util_format_get_blockheight(format) * transfer->stride + + transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); /*printf("map = %p xfer map = %p\n", map, xfer_map);*/ return xfer_map; } @@ -434,10 +431,9 @@ softpipe_video_surface_create(struct pipe_screen *screen, template.format = PIPE_FORMAT_X8R8G8B8_UNORM; template.last_level = 0; /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */ - template.width[0] = util_next_power_of_two(width); - template.height[0] = util_next_power_of_two(height); - template.depth[0] = 1; - pf_get_block(template.format, &template.block); + template.width0 = util_next_power_of_two(width); + template.height0 = util_next_power_of_two(height); + template.depth0 = 1; template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; sp_vsfc->tex = screen->texture_create(screen, &template); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 65872cecc4f..112a6fe0cf3 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -33,6 +33,7 @@ */ #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_tile.h" #include "sp_tile_cache.h" @@ -238,7 +239,7 @@ clear_tile(struct softpipe_cached_tile *tile, { uint i, j; - switch (pf_get_size(format)) { + switch (util_format_get_blocksize(format)) { case 1: memset(tile->data.any, clear_value, TILE_SIZE * TILE_SIZE); break; @@ -284,8 +285,9 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) uint x, y; uint numCleared = 0; + assert(pt->texture); /* clear the scratch tile to the clear value */ - clear_tile(&tc->tile, pt->format, tc->clear_val); + clear_tile(&tc->tile, pt->texture->format, tc->clear_val); /* push the tile to all positions marked as clear */ for (y = 0; y < h; y += TILE_SIZE) { @@ -372,6 +374,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, if (addr.value != tile->addr.value) { + assert(pt->texture); if (tile->addr.bits.invalid == 0) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { @@ -395,10 +398,10 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, if (is_clear_flag_set(tc->clear_flags, addr)) { /* don't get tile from framebuffer, just clear it */ if (tc->depth_stencil) { - clear_tile(tile, pt->format, tc->clear_val); + clear_tile(tile, pt->texture->format, tc->clear_val); } else { - clear_tile_rgba(tile, pt->format, tc->clear_color); + clear_tile_rgba(tile, pt->texture->format, tc->clear_color); } clear_clear_flag(tc->clear_flags, addr); } diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index 9e571862b75..f203ded29ee 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -34,15 +34,17 @@ #ifndef SP_WINSYS_H #define SP_WINSYS_H - #ifdef __cplusplus extern "C" { #endif +#include "pipe/p_defines.h" struct pipe_screen; struct pipe_winsys; struct pipe_context; +struct pipe_texture; +struct pipe_buffer; struct pipe_context *softpipe_create( struct pipe_screen * ); diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 32e9304f819..66259fd0103 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -203,8 +203,6 @@ struct svga_state struct pipe_clip_state clip; struct pipe_viewport_state viewport; - const unsigned *edgeflags; - unsigned num_samplers; unsigned num_textures; unsigned num_vertex_elements; @@ -370,7 +368,7 @@ struct svga_context #define SVGA_NEW_FRAME_BUFFER 0x800 #define SVGA_NEW_STIPPLE 0x1000 #define SVGA_NEW_SCISSOR 0x2000 -#define SVGA_NEW_BLEND_COLOR 0x5000 +#define SVGA_NEW_BLEND_COLOR 0x4000 #define SVGA_NEW_CLIP 0x8000 #define SVGA_NEW_VIEWPORT 0x10000 #define SVGA_NEW_PRESCALE 0x20000 @@ -381,9 +379,8 @@ struct svga_context #define SVGA_NEW_NEED_SWTNL 0x400000 #define SVGA_NEW_FS_RESULT 0x800000 #define SVGA_NEW_VS_RESULT 0x1000000 -#define SVGA_NEW_EDGEFLAGS 0x2000000 -#define SVGA_NEW_ZERO_STRIDE 0x4000000 -#define SVGA_NEW_TEXTURE_FLAGS 0x8000000 +#define SVGA_NEW_ZERO_STRIDE 0x2000000 +#define SVGA_NEW_TEXTURE_FLAGS 0x4000000 diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c index 6195c3897ed..409b3b41cbc 100644 --- a/src/gallium/drivers/svga/svga_pipe_clear.c +++ b/src/gallium/drivers/svga/svga_pipe_clear.c @@ -46,7 +46,7 @@ try_clear(struct svga_context *svga, boolean restore_viewport = FALSE; SVGA3dClearFlag flags = 0; struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; - unsigned color = 0; + union util_color uc; ret = svga_update_state(svga, SVGA_STATE_HW_CLEAR); if (ret) @@ -54,7 +54,7 @@ try_clear(struct svga_context *svga, if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) { flags |= SVGA3D_CLEAR_COLOR; - util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &color); + util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); rect.w = fb->cbufs[0]->width; rect.h = fb->cbufs[0]->height; @@ -77,7 +77,7 @@ try_clear(struct svga_context *svga, return ret; } - ret = SVGA3D_ClearRect(svga->swc, flags, color, depth, stencil, + ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui, depth, stencil, rect.x, rect.y, rect.w, rect.h); if (ret != PIPE_OK) return ret; diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 71a552862e9..0f24ef4ee8d 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -149,7 +149,7 @@ retry: -static boolean +static void svga_draw_range_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, @@ -162,7 +162,7 @@ svga_draw_range_elements( struct pipe_context *pipe, enum pipe_error ret = 0; if (!u_trim_pipe_prim( prim, &count )) - return TRUE; + return; /* * Mark currently bound target surfaces as dirty @@ -183,7 +183,7 @@ svga_draw_range_elements( struct pipe_context *pipe, #ifdef DEBUG if (svga->curr.vs->base.id == svga->debug.disable_shader || svga->curr.fs->base.id == svga->debug.disable_shader) - return 0; + return; #endif if (svga->state.sw.need_swtnl) @@ -225,31 +225,29 @@ svga_draw_range_elements( struct pipe_context *pipe, svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); } - - return ret == PIPE_OK; } -static boolean +static void svga_draw_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements( pipe, index_buffer, - index_size, - 0, 0xffffffff, - prim, start, count ); + svga_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + prim, start, count ); } -static boolean +static void svga_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements(pipe, NULL, 0, - start, start + count - 1, - prim, - start, count); + svga_draw_range_elements(pipe, NULL, 0, + start, start + count - 1, + prim, + start, count); } diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 3eeca6b784b..460a101f8c0 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -76,7 +76,6 @@ static INLINE unsigned translate_img_filter( unsigned filter ) switch (filter) { case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: return SVGA3D_TEX_FILTER_ANISOTROPIC; default: assert(0); return SVGA3D_TEX_FILTER_NEAREST; @@ -101,11 +100,14 @@ svga_create_sampler_state(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state ); + union util_color uc; cso->mipfilter = translate_mip_filter(sampler->min_mip_filter); cso->magfilter = translate_img_filter( sampler->mag_img_filter ); cso->minfilter = translate_img_filter( sampler->min_img_filter ); cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 ); + if(cso->aniso_level != 1) + cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC; cso->lod_bias = sampler->lod_bias; cso->addressu = translate_wrap_mode(sampler->wrap_s); cso->addressv = translate_wrap_mode(sampler->wrap_t); @@ -121,8 +123,8 @@ svga_create_sampler_state(struct pipe_context *pipe, ubyte a = float_to_ubyte(sampler->border_color[3]); util_pack_color_ub( r, g, b, a, - PIPE_FORMAT_B8G8R8A8_UNORM, - &cso->bordercolor ); + PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + cso->bordercolor = uc.ui; } /* No SVGA3D support for: @@ -234,9 +236,9 @@ static void svga_set_sampler_textures(struct pipe_context *pipe, void svga_init_sampler_functions( struct svga_context *svga ) { svga->pipe.create_sampler_state = svga_create_sampler_state; - svga->pipe.bind_sampler_states = svga_bind_sampler_states; + svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states; svga->pipe.delete_sampler_state = svga_delete_sampler_state; - svga->pipe.set_sampler_textures = svga_set_sampler_textures; + svga->pipe.set_fragment_sampler_textures = svga_set_sampler_textures; } diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 28e2787e0d3..42f290d162a 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -84,18 +84,6 @@ static void svga_set_vertex_elements(struct pipe_context *pipe, } -static void svga_set_edgeflags(struct pipe_context *pipe, - const unsigned *bitfield) -{ - struct svga_context *svga = svga_context(pipe); - - if (bitfield != NULL || svga->curr.edgeflags != NULL) { - svga->curr.edgeflags = bitfield; - svga->dirty |= SVGA_NEW_EDGEFLAGS; - } -} - - void svga_cleanup_vertex_state( struct svga_context *svga ) { unsigned i; @@ -109,7 +97,6 @@ void svga_init_vertex_functions( struct svga_context *svga ) { svga->pipe.set_vertex_buffers = svga_set_vertex_buffers; svga->pipe.set_vertex_elements = svga_set_vertex_elements; - svga->pipe.set_edgeflags = svga_set_edgeflags; } diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index fd9864c51a6..7e6ab576add 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -49,7 +49,7 @@ static const struct tgsi_token *substitute_vs( static struct tgsi_token tokens[300]; const char *text = - "VERT1.1\n" + "VERT\n" "DCL IN[0]\n" "DCL IN[1]\n" "DCL IN[2]\n" diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c index 1f8a8896723..58a1aba464b 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.c +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -356,7 +356,8 @@ svga_buffer_upload_flush(struct svga_context *svga, sbuf->hw.boxes = NULL; /* Decrement reference count */ - pipe_buffer_reference((struct pipe_buffer **)&sbuf, NULL); + pipe_reference(&(sbuf->base.reference), NULL); + sbuf = NULL; } diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index 1eb03db2806..2224c2d3945 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -29,6 +29,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_thread.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -158,7 +159,8 @@ svga_transfer_dma_band(struct svga_transfer *st, st->base.x + st->base.width, y + h, st->base.zslice + 1, - texture->base.block.size*8/(texture->base.block.width*texture->base.block.height)); + util_format_get_blocksize(texture->base.format)*8/ + (util_format_get_blockwidth(texture->base.format)*util_format_get_blockheight(texture->base.format))); box.x = st->base.x; box.y = y; @@ -208,7 +210,8 @@ svga_transfer_dma(struct svga_transfer *st, } else { unsigned y, h, srcy; - h = st->hw_nblocksy * st->base.block.height; + unsigned blockheight = util_format_get_blockheight(st->base.texture->format); + h = st->hw_nblocksy * blockheight; srcy = 0; for(y = 0; y < st->base.height; y += h) { unsigned offset, length; @@ -218,11 +221,11 @@ svga_transfer_dma(struct svga_transfer *st, h = st->base.height - y; /* Transfer band must be aligned to pixel block boundaries */ - assert(y % st->base.block.height == 0); - assert(h % st->base.block.height == 0); + assert(y % blockheight == 0); + assert(h % blockheight == 0); - offset = y * st->base.stride / st->base.block.height; - length = h * st->base.stride / st->base.block.height; + offset = y * st->base.stride / blockheight; + length = h * st->base.stride / blockheight; sw = (uint8_t *)st->swbuf + offset; @@ -281,24 +284,19 @@ svga_texture_create(struct pipe_screen *screen, if(templat->last_level >= SVGA_MAX_TEXTURE_LEVELS) goto error2; - width = templat->width[0]; - height = templat->height[0]; - depth = templat->depth[0]; + width = templat->width0; + height = templat->height0; + depth = templat->depth0; for(level = 0; level <= templat->last_level; ++level) { - tex->base.width[level] = width; - tex->base.height[level] = height; - tex->base.depth[level] = depth; - tex->base.nblocksx[level] = pf_get_nblocksx(&tex->base.block, width); - tex->base.nblocksy[level] = pf_get_nblocksy(&tex->base.block, height); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } tex->key.flags = 0; - tex->key.size.width = templat->width[0]; - tex->key.size.height = templat->height[0]; - tex->key.size.depth = templat->depth[0]; + tex->key.size.width = templat->width0; + tex->key.size.height = templat->height0; + tex->key.size.depth = templat->depth0; if(templat->target == PIPE_TEXTURE_CUBE) { tex->key.flags |= SVGA3D_SURFACE_CUBEMAP; @@ -322,7 +320,7 @@ svga_texture_create(struct pipe_screen *screen, */ #if 0 if((templat->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) && - !pf_is_compressed(templat->format)) + !util_format_is_compressed(templat->format)) tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET; #endif @@ -365,7 +363,7 @@ svga_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -538,9 +536,9 @@ svga_texture_view_surface(struct pipe_context *pipe, key->flags = 0; key->format = format; key->numMipLevels = num_mip; - key->size.width = tex->base.width[start_mip]; - key->size.height = tex->base.height[start_mip]; - key->size.depth = zslice_pick < 0 ? tex->base.depth[start_mip] : 1; + key->size.width = u_minify(tex->base.width0, start_mip); + key->size.height = u_minify(tex->base.height0, start_mip); + key->size.depth = zslice_pick < 0 ? u_minify(tex->base.depth0, start_mip) : 1; key->cachable = 1; assert(key->size.depth == 1); @@ -574,7 +572,10 @@ svga_texture_view_surface(struct pipe_context *pipe, for (i = 0; i < key->numMipLevels; i++) { for (j = 0; j < key->numFaces; j++) { if(tex->defined[j + face_pick][i + start_mip]) { - unsigned depth = zslice_pick < 0 ? tex->base.depth[i + start_mip] : 1; + unsigned depth = (zslice_pick < 0 ? + u_minify(tex->base.depth0, i + start_mip) : + 1); + svga_texture_copy_handle(svga_context(pipe), ss, tex->handle, @@ -582,8 +583,8 @@ svga_texture_view_surface(struct pipe_context *pipe, i + start_mip, j + face_pick, handle, 0, 0, 0, i, j, - tex->base.width[i + start_mip], - tex->base.height[i + start_mip], + u_minify(tex->base.width0, i + start_mip), + u_minify(tex->base.height0, i + start_mip), depth); } } @@ -612,8 +613,8 @@ svga_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&s->base.reference, 1); pipe_texture_reference(&s->base.texture, pt); s->base.format = pt->format; - s->base.width = pt->width[level]; - s->base.height = pt->height[level]; + s->base.width = u_minify(pt->width0, level); + s->base.height = u_minify(pt->height0, level); s->base.usage = flags; s->base.level = level; s->base.face = face; @@ -742,7 +743,8 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) svga_texture_copy_handle(svga_context(pipe), ss, s->handle, 0, 0, 0, s->real_level, s->real_face, tex->handle, 0, 0, surf->zslice, surf->level, surf->face, - tex->base.width[surf->level], tex->base.height[surf->level], 1); + u_minify(tex->base.width0, surf->level), + u_minify(tex->base.height0, surf->level), 1); tex->defined[surf->face][surf->level] = TRUE; } } @@ -770,6 +772,8 @@ svga_get_tex_transfer(struct pipe_screen *screen, struct svga_screen *ss = svga_screen(screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st; + unsigned nblocksx = util_format_get_nblocksx(texture->format, w); + unsigned nblocksy = util_format_get_nblocksy(texture->format, h); /* We can't map texture storage directly */ if (usage & PIPE_TRANSFER_MAP_DIRECTLY) @@ -779,21 +783,17 @@ svga_get_tex_transfer(struct pipe_screen *screen, if (!st) return NULL; - st->base.format = texture->format; - st->base.block = texture->block; st->base.x = x; st->base.y = y; st->base.width = w; st->base.height = h; - st->base.nblocksx = pf_get_nblocksx(&texture->block, w); - st->base.nblocksy = pf_get_nblocksy(&texture->block, h); - st->base.stride = st->base.nblocksx*st->base.block.size; + st->base.stride = nblocksx*util_format_get_blocksize(texture->format); st->base.usage = usage; st->base.face = face; st->base.level = level; st->base.zslice = zslice; - st->hw_nblocksy = st->base.nblocksy; + st->hw_nblocksy = nblocksy; st->hwbuf = svga_winsys_buffer_create(ss, 1, @@ -809,15 +809,15 @@ svga_get_tex_transfer(struct pipe_screen *screen, if(!st->hwbuf) goto no_hwbuf; - if(st->hw_nblocksy < st->base.nblocksy) { + if(st->hw_nblocksy < nblocksy) { /* We couldn't allocate a hardware buffer big enough for the transfer, * so allocate regular malloc memory instead */ debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n", __FUNCTION__, - (st->base.nblocksy*st->base.stride + 1023)/1024, - (st->base.nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, + (nblocksy*st->base.stride + 1023)/1024, + (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, (st->hw_nblocksy*st->base.stride + 1023)/1024); - st->swbuf = MALLOC(st->base.nblocksy*st->base.stride); + st->swbuf = MALLOC(nblocksy*st->base.stride); if(!st->swbuf) goto no_swbuf; } @@ -932,7 +932,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, if (min_lod == 0 && max_lod >= pt->last_level) view = FALSE; - if (pf_is_compressed(pt->format) && view) { + if (util_format_is_compressed(pt->format) && view) { format = svga_translate_format_render(pt->format); } @@ -971,9 +971,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, "svga: Sampler view: no %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n", pt, min_lod, max_lod, max_lod - min_lod + 1, - pt->width[0], - pt->height[0], - pt->depth[0], + pt->width0, + pt->height0, + pt->depth0, pt->last_level); sv->key.cachable = 0; sv->handle = tex->handle; @@ -984,9 +984,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, "svga: Sampler view: yes %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n", pt, min_lod, max_lod, max_lod - min_lod + 1, - pt->width[0], - pt->height[0], - pt->depth[0], + pt->width0, + pt->height0, + pt->depth0, pt->last_level); sv->age = tex->age; @@ -1036,9 +1036,9 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * svga_texture_copy_handle(svga, NULL, tex->handle, 0, 0, 0, i, k, v->handle, 0, 0, 0, i - v->min_lod, k, - tex->base.width[i], - tex->base.height[i], - tex->base.depth[i]); + u_minify(tex->base.width0, i), + u_minify(tex->base.height0, i), + u_minify(tex->base.depth0, i)); } } @@ -1071,8 +1071,7 @@ svga_screen_buffer_from_texture(struct pipe_texture *texture, svga_translate_format(texture->format), stex->handle); - *stride = pf_get_nblocksx(&texture->block, texture->width[0]) * - texture->block.size; + *stride = util_format_get_stride(texture->format, texture->width0); return *buffer != NULL; } diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h index 8cfdfea6937..89ae24219fd 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.h +++ b/src/gallium/drivers/svga/svga_screen_texture.h @@ -171,8 +171,9 @@ svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_ { struct svga_sampler_view *old = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &v->reference)) + if (pipe_reference(&(*ptr)->reference, &v->reference)) svga_destroy_sampler_view_priv(old); + *ptr = v; } extern void diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index a5777d4fbdc..6b0e511cec1 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -140,8 +140,8 @@ static int emit_fs_consts( struct svga_context *svga, struct pipe_texture *tex = svga->curr.texture[i]; float data[4]; - data[0] = 1.0 / (float)tex->width[0]; - data[1] = 1.0 / (float)tex->height[0]; + data[0] = 1.0 / (float)tex->width0; + data[1] = 1.0 / (float)tex->height0; data[2] = 1.0; data[3] = 1.0; diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index 00201b8091d..3c35a8579f7 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -108,6 +108,7 @@ static int update_need_pipeline( struct svga_context *svga, { boolean need_pipeline = FALSE; + struct svga_vertex_shader *vs = svga->curr.vs; /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE */ @@ -119,11 +120,9 @@ static int update_need_pipeline( struct svga_context *svga, need_pipeline = TRUE; } - /* SVGA_NEW_EDGEFLAGS + /* EDGEFLAGS */ - if (svga->curr.rast->hw_unfilled != PIPE_POLYGON_MODE_FILL && - svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES && - svga->curr.edgeflags != NULL) { + if (vs->base.info.writes_edgeflag) { SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__); need_pipeline = TRUE; } @@ -150,6 +149,7 @@ struct svga_tracked_state svga_update_need_pipeline = "need pipeline", (SVGA_NEW_RAST | SVGA_NEW_CLIP | + SVGA_NEW_VS | SVGA_NEW_REDUCED_PRIMITIVE), update_need_pipeline }; diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index db30f2735fd..ae1e77e7d44 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -25,6 +25,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_defines.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_bitmask.h" #include "translate/translate.h" @@ -216,7 +217,7 @@ static int update_zero_stride( struct svga_context *svga, mapped_buffer = pipe_buffer_map_range(svga->pipe.screen, vbuffer->buffer, vel->src_offset, - pf_get_size(vel->src_format), + util_format_get_blocksize(vel->src_format), PIPE_BUFFER_USAGE_CPU_READ); translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 8b14c913f72..7655121bec1 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -90,7 +90,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga, PIPE_BUFFER_USAGE_CPU_READ); assert(map); draw_set_mapped_constant_buffer( - draw, + draw, PIPE_SHADER_VERTEX, map, svga->curr.cb[PIPE_SHADER_VERTEX]->size); } diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index 16163121131..94b6ccc62dd 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -120,10 +120,6 @@ static int update_swtnl_draw( struct svga_context *svga, draw_set_mrd(svga->swtnl.draw, svga->curr.depthscale); - if (dirty & SVGA_NEW_EDGEFLAGS) - draw_set_edgeflags( svga->swtnl.draw, - svga->curr.edgeflags ); - return 0; } @@ -138,8 +134,7 @@ struct svga_tracked_state svga_update_swtnl_draw = SVGA_NEW_VIEWPORT | SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | - SVGA_NEW_REDUCED_PRIMITIVE | - SVGA_NEW_EDGEFLAGS), + SVGA_NEW_REDUCED_PRIMITIVE), update_swtnl_draw }; @@ -161,7 +156,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) memset(vdecl, 0, sizeof(vdecl)); /* always add position */ - src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->attrib[0].emit = EMIT_4F; vdecl[0].array.offset = offset; @@ -174,7 +169,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) for (i = 0; i < fs->base.info.num_inputs; i++) { unsigned name = fs->base.info.input_semantic_name[i]; unsigned index = fs->base.info.input_semantic_index[i]; - src = draw_find_vs_output(draw, name, index); + src = draw_find_shader_output(draw, name, index); vdecl[nr_decls].array.offset = offset; vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i]; diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c index 54457082a06..23b3ace7f30 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c @@ -46,7 +46,7 @@ static boolean ps20_input( struct svga_shader_emitter *emit, dcl.values[0] = 0; dcl.values[1] = 0; - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_POSITION: /* Special case: */ @@ -55,15 +55,15 @@ static boolean ps20_input( struct svga_shader_emitter *emit, break; case TGSI_SEMANTIC_COLOR: reg = src_register( SVGA3DREG_INPUT, - semantic.SemanticIndex ); + semantic.Index ); break; case TGSI_SEMANTIC_FOG: - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); reg = src_register( SVGA3DREG_TEXTURE, 0 ); break; case TGSI_SEMANTIC_GENERIC: reg = src_register( SVGA3DREG_TEXTURE, - semantic.SemanticIndex + 1 ); + semantic.Index + 1 ); break; default: assert(0); @@ -90,16 +90,16 @@ static boolean ps20_output( struct svga_shader_emitter *emit, { SVGA3dShaderDestToken reg; - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_COLOR: - if (semantic.SemanticIndex < PIPE_MAX_COLOR_BUFS) { - unsigned cbuf = semantic.SemanticIndex; + if (semantic.Index < PIPE_MAX_COLOR_BUFS) { + unsigned cbuf = semantic.Index; emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_col[cbuf] = emit->output_map[idx]; emit->true_col[cbuf] = dst_register( SVGA3DREG_COLOROUT, - semantic.SemanticIndex ); + semantic.Index ); } else { assert(0); @@ -111,7 +111,7 @@ static boolean ps20_output( struct svga_shader_emitter *emit, emit->nr_hw_temp++ ); emit->temp_pos = emit->output_map[idx]; emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, - semantic.SemanticIndex ); + semantic.Index ); break; default: assert(0); @@ -169,9 +169,9 @@ static boolean vs20_output( struct svga_shader_emitter *emit, /* Just build the register map table: */ - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_POSITION: - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_pos = emit->output_map[idx]; @@ -179,7 +179,7 @@ static boolean vs20_output( struct svga_shader_emitter *emit, SVGA3DRASTOUT_POSITION); break; case TGSI_SEMANTIC_PSIZE: - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_psiz = emit->output_map[idx]; @@ -187,17 +187,17 @@ static boolean vs20_output( struct svga_shader_emitter *emit, SVGA3DRASTOUT_PSIZE ); break; case TGSI_SEMANTIC_FOG: - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, 0 ); break; case TGSI_SEMANTIC_COLOR: /* oD0 */ emit->output_map[idx] = dst_register( SVGA3DREG_ATTROUT, - semantic.SemanticIndex ); + semantic.Index ); break; case TGSI_SEMANTIC_GENERIC: emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, - semantic.SemanticIndex + 1 ); + semantic.Index + 1 ); break; default: assert(0); @@ -230,15 +230,15 @@ static boolean ps20_sampler( struct svga_shader_emitter *emit, boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit, const struct tgsi_full_declaration *decl ) { - unsigned first = decl->DeclarationRange.First; - unsigned last = decl->DeclarationRange.Last; + unsigned first = decl->Range.First; + unsigned last = decl->Range.Last; unsigned semantic = 0; unsigned semantic_idx = 0; unsigned idx; if (decl->Declaration.Semantic) { - semantic = decl->Semantic.SemanticName; - semantic_idx = decl->Semantic.SemanticIndex; + semantic = decl->Semantic.Name; + semantic_idx = decl->Semantic.Index; } for( idx = first; idx <= last; idx++ ) { diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c index 08e7dfb117c..d1c7336dec4 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -35,35 +35,35 @@ static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semant unsigned *usage, unsigned *idx ) { - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_POSITION: - *idx = semantic.SemanticIndex; + *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_POSITION; break; case TGSI_SEMANTIC_COLOR: - *idx = semantic.SemanticIndex; + *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_COLOR; break; case TGSI_SEMANTIC_BCOLOR: - *idx = semantic.SemanticIndex + 2; /* sharing with COLOR */ + *idx = semantic.Index + 2; /* sharing with COLOR */ *usage = SVGA3D_DECLUSAGE_COLOR; break; case TGSI_SEMANTIC_FOG: *idx = 0; - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); *usage = SVGA3D_DECLUSAGE_TEXCOORD; break; case TGSI_SEMANTIC_PSIZE: - *idx = semantic.SemanticIndex; + *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_PSIZE; break; case TGSI_SEMANTIC_GENERIC: - *idx = semantic.SemanticIndex + 1; /* texcoord[0] is reserved for fog */ + *idx = semantic.Index + 1; /* texcoord[0] is reserved for fog */ *usage = SVGA3D_DECLUSAGE_TEXCOORD; break; case TGSI_SEMANTIC_NORMAL: - *idx = semantic.SemanticIndex; + *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_NORMAL; break; default: @@ -120,7 +120,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit, unsigned usage, index; SVGA3dShaderDestToken reg; - if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + if (semantic.Name == TGSI_SEMANTIC_POSITION) { emit->input_map[idx] = src_register( SVGA3DREG_MISCTYPE, SVGA3DMISCREG_POSITION ); @@ -135,7 +135,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit, return emit_decl( emit, reg, 0, 0 ); } else if (emit->key.fkey.light_twoside && - (semantic.SemanticName == TGSI_SEMANTIC_COLOR)) { + (semantic.Name == TGSI_SEMANTIC_COLOR)) { if (!translate_vs_ps_semantic( semantic, &usage, &index )) return FALSE; @@ -150,7 +150,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit, if (!emit_decl( emit, reg, usage, index )) return FALSE; - semantic.SemanticName = TGSI_SEMANTIC_BCOLOR; + semantic.Name = TGSI_SEMANTIC_BCOLOR; if (!translate_vs_ps_semantic( semantic, &usage, &index )) return FALSE; @@ -164,7 +164,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit, return TRUE; } - else if (semantic.SemanticName == TGSI_SEMANTIC_FACE) { + else if (semantic.Name == TGSI_SEMANTIC_FACE) { if (!emit_vface_decl( emit )) return FALSE; emit->emit_frontface = TRUE; @@ -193,17 +193,17 @@ static boolean ps30_output( struct svga_shader_emitter *emit, { SVGA3dShaderDestToken reg; - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_COLOR: emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, - semantic.SemanticIndex ); + semantic.Index ); break; case TGSI_SEMANTIC_POSITION: emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_pos = emit->output_map[idx]; emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, - semantic.SemanticIndex ); + semantic.Index ); break; default: assert(0); @@ -283,14 +283,14 @@ static boolean vs30_output( struct svga_shader_emitter *emit, dcl.index = index; dcl.values[0] |= 1<<31; - if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + if (semantic.Name == TGSI_SEMANTIC_POSITION) { assert(idx == 0); emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_pos = emit->output_map[idx]; emit->true_pos = dcl.dst; } - else if (semantic.SemanticName == TGSI_SEMANTIC_PSIZE) { + else if (semantic.Name == TGSI_SEMANTIC_PSIZE) { emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_psiz = emit->output_map[idx]; @@ -335,15 +335,15 @@ static boolean ps30_sampler( struct svga_shader_emitter *emit, boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit, const struct tgsi_full_declaration *decl ) { - unsigned first = decl->DeclarationRange.First; - unsigned last = decl->DeclarationRange.Last; + unsigned first = decl->Range.First; + unsigned last = decl->Range.Last; unsigned semantic = 0; unsigned semantic_idx = 0; unsigned idx; if (decl->Declaration.Semantic) { - semantic = decl->Semantic.SemanticName; - semantic_idx = decl->Semantic.SemanticIndex; + semantic = decl->Semantic.Name; + semantic_idx = decl->Semantic.Index; } for( idx = first; idx <= last; idx++ ) { diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index ea409b7e165..dc5eb8fc606 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -96,24 +96,24 @@ translate_dst_register( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn, unsigned idx ) { - const struct tgsi_full_dst_register *reg = &insn->FullDstRegisters[idx]; + const struct tgsi_full_dst_register *reg = &insn->Dst[idx]; SVGA3dShaderDestToken dest; - switch (reg->DstRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_OUTPUT: /* Output registers encode semantic information in their name. * Need to lookup a table built at decl time: */ - dest = emit->output_map[reg->DstRegister.Index]; + dest = emit->output_map[reg->Register.Index]; break; default: - dest = dst_register( translate_file( reg->DstRegister.File ), - reg->DstRegister.Index ); + dest = dst_register( translate_file( reg->Register.File ), + reg->Register.Index ); break; } - dest.mask = reg->DstRegister.WriteMask; + dest.mask = reg->Register.WriteMask; if (insn->Instruction.Saturate) dest.dstMod = SVGA3DDSTMOD_SATURATE; @@ -176,33 +176,33 @@ translate_src_register( const struct svga_shader_emitter *emit, { struct src_register src; - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_INPUT: /* Input registers are referred to by their semantic name rather * than by index. Use the mapping build up from the decls: */ - src = emit->input_map[reg->SrcRegister.Index]; + src = emit->input_map[reg->Register.Index]; break; case TGSI_FILE_IMMEDIATE: /* Immediates are appended after TGSI constants in the D3D * constant buffer. */ - src = src_register( translate_file( reg->SrcRegister.File ), - reg->SrcRegister.Index + + src = src_register( translate_file( reg->Register.File ), + reg->Register.Index + emit->imm_start ); break; default: - src = src_register( translate_file( reg->SrcRegister.File ), - reg->SrcRegister.Index ); + src = src_register( translate_file( reg->Register.File ), + reg->Register.Index ); break; } /* Indirect addressing (for coninstant buffer lookups only) */ - if (reg->SrcRegister.Indirect) + if (reg->Register.Indirect) { /* we shift the offset towards the minimum */ if (svga_arl_needs_adjustment( emit )) { @@ -213,28 +213,28 @@ translate_src_register( const struct svga_shader_emitter *emit, /* Not really sure what should go in the second token: */ src.indirect = src_token( SVGA3DREG_ADDR, - reg->SrcRegisterInd.Index ); + reg->Indirect.Index ); src.indirect.swizzle = SWIZZLE_XXXX; } src = swizzle( src, - reg->SrcRegister.SwizzleX, - reg->SrcRegister.SwizzleY, - reg->SrcRegister.SwizzleZ, - reg->SrcRegister.SwizzleW ); + reg->Register.SwizzleX, + reg->Register.SwizzleY, + reg->Register.SwizzleZ, + reg->Register.SwizzleW ); /* src.mod isn't a bitfield, unfortunately: * See tgsi_util_get_full_src_register_sign_mode for implementation details. */ - if (reg->SrcRegisterExtMod.Absolute) { - if (reg->SrcRegisterExtMod.Negate) + if (reg->Register.Absolute) { + if (reg->Register.Negate) src.base.srcMod = SVGA3DSRCMOD_ABSNEG; else src.base.srcMod = SVGA3DSRCMOD_ABS; } else { - if (reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate) + if (reg->Register.Negate) src.base.srcMod = SVGA3DSRCMOD_NEG; else src.base.srcMod = SVGA3DSRCMOD_NONE; @@ -629,7 +629,7 @@ static boolean emit_fake_arl(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = get_fake_arl_const( emit ); SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); SVGA3dShaderDestToken tmp = get_temp( emit ); @@ -653,7 +653,7 @@ static boolean emit_if(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { const struct src_register src = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); @@ -690,7 +690,7 @@ static boolean emit_floor(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* FRC TMP, SRC */ @@ -716,11 +716,11 @@ static boolean emit_cmp(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); const struct src_register src2 = translate_src_register( - emit, &insn->FullSrcRegisters[2] ); + emit, &insn->Src[2] ); /* CMP DST, SRC0, SRC2, SRC1 */ return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1); @@ -740,9 +740,9 @@ static boolean emit_div(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); SVGA3dShaderDestToken temp = get_temp( emit ); int i; @@ -782,9 +782,9 @@ static boolean emit_dp2(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); SVGA3dShaderDestToken temp = get_temp( emit ); struct src_register temp_src0, temp_src1; @@ -815,9 +815,9 @@ static boolean emit_dph(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* DP3 TMP, SRC1, SRC2 */ @@ -846,7 +846,7 @@ static boolean emit_nrm(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* DP3 TMP, SRC, SRC */ @@ -889,7 +889,7 @@ static boolean emit_sincos(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* SCS TMP SRC */ @@ -912,7 +912,7 @@ static boolean emit_sin(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* SCS TMP SRC */ @@ -937,7 +937,7 @@ static boolean emit_cos(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* SCS TMP SRC */ @@ -962,9 +962,9 @@ static boolean emit_sub(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); src1 = negate(src1); @@ -980,19 +980,19 @@ static boolean emit_kil(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn ) { SVGA3dShaderInstToken inst; - const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0]; + const struct tgsi_full_src_register *reg = &insn->Src[0]; struct src_register src0; inst = inst_token( SVGA3DOP_TEXKILL ); src0 = translate_src_register( emit, reg ); - if (reg->SrcRegisterExtMod.Absolute || - reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate || - reg->SrcRegister.Indirect || - reg->SrcRegister.SwizzleX != 0 || - reg->SrcRegister.SwizzleY != 1 || - reg->SrcRegister.SwizzleZ != 2 || - reg->SrcRegister.File != TGSI_FILE_TEMPORARY) + if (reg->Register.Absolute || + reg->Register.Negate || + reg->Register.Indirect || + reg->Register.SwizzleX != 0 || + reg->Register.SwizzleY != 1 || + reg->Register.SwizzleZ != 2 || + reg->Register.File != TGSI_FILE_TEMPORARY) { SVGA3dShaderDestToken temp = get_temp( emit ); @@ -1154,9 +1154,9 @@ static boolean emit_select_op(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); return emit_select( emit, compare, dst, src0, src1 ); } @@ -1189,8 +1189,8 @@ static boolean emit_tex2(struct svga_shader_emitter *emit, return FALSE; } - src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] ); - src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] ); + src0 = translate_src_register( emit, &insn->Src[0] ); + src1 = translate_src_register( emit, &insn->Src[1] ); if (emit->key.fkey.tex[src1.base.num].unnormalized) { struct src_register wh = get_tex_dimensions( emit, src1.base.num ); @@ -1231,9 +1231,9 @@ static boolean emit_tex3(struct svga_shader_emitter *emit, break; } - src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] ); - src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] ); - src2 = translate_src_register( emit, &insn->FullSrcRegisters[2] ); + src0 = translate_src_register( emit, &insn->Src[0] ); + src1 = translate_src_register( emit, &insn->Src[1] ); + src2 = translate_src_register( emit, &insn->Src[2] ); return submit_op3( emit, inst, dst, src0, src1, src2 ); } @@ -1245,9 +1245,9 @@ static boolean emit_tex(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = - translate_src_register( emit, &insn->FullSrcRegisters[0] ); + translate_src_register( emit, &insn->Src[0] ); struct src_register src1 = - translate_src_register( emit, &insn->FullSrcRegisters[1] ); + translate_src_register( emit, &insn->Src[1] ); SVGA3dShaderDestToken tex_result; @@ -1359,7 +1359,7 @@ static boolean emit_scalar_op1( struct svga_shader_emitter *emit, inst = inst_token( opcode ); dst = translate_dst_register( emit, insn, 0 ); - src = translate_src_register( emit, &insn->FullSrcRegisters[0] ); + src = translate_src_register( emit, &insn->Src[0] ); src = scalar( src, TGSI_SWIZZLE_X ); return submit_op1( emit, inst, dst, src ); @@ -1370,7 +1370,7 @@ static boolean emit_simple_instruction(struct svga_shader_emitter *emit, unsigned opcode, const struct tgsi_full_instruction *insn ) { - const struct tgsi_full_src_register *src = insn->FullSrcRegisters; + const struct tgsi_full_src_register *src = insn->Src; SVGA3dShaderInstToken inst; SVGA3dShaderDestToken dst; @@ -1428,13 +1428,13 @@ static boolean emit_pow(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); boolean need_tmp = FALSE; /* POW can only output to a temporary */ - if (insn->FullDstRegisters[0].DstRegister.File != TGSI_FILE_TEMPORARY) + if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY) need_tmp = TRUE; /* POW src1 must not be the same register as dst */ @@ -1463,9 +1463,9 @@ static boolean emit_xpd(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); boolean need_dst_tmp = FALSE; /* XPD can only output to a temporary */ @@ -1517,11 +1517,11 @@ static boolean emit_lrp(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); SVGA3dShaderDestToken tmp; const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); const struct src_register src2 = translate_src_register( - emit, &insn->FullSrcRegisters[2] ); + emit, &insn->Src[2] ); boolean need_dst_tmp = FALSE; /* The dst reg must not be the same as src0 or src2 */ @@ -1568,9 +1568,9 @@ static boolean emit_dst_insn(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); SVGA3dShaderDestToken tmp; const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); struct src_register zero = get_zero_immediate( emit ); boolean need_tmp = FALSE; @@ -1633,7 +1633,7 @@ static boolean emit_exp(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = - translate_src_register( emit, &insn->FullSrcRegisters[0] ); + translate_src_register( emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderDestToken fraction; @@ -1723,7 +1723,7 @@ static boolean emit_lit(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); SVGA3dShaderDestToken tmp = get_temp( emit ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); /* tmp = pow(src.y, src.w) @@ -1806,7 +1806,7 @@ static boolean emit_ex2( struct svga_shader_emitter *emit, inst = inst_token( SVGA3DOP_EXP ); dst = translate_dst_register( emit, insn, 0 ); - src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] ); + src0 = translate_src_register( emit, &insn->Src[0] ); src0 = scalar( src0, TGSI_SWIZZLE_X ); if (dst.mask != TGSI_WRITEMASK_XYZW) { @@ -1829,7 +1829,7 @@ static boolean emit_log(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = - translate_src_register( emit, &insn->FullSrcRegisters[0] ); + translate_src_register( emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderDestToken abs_tmp; struct src_register abs_src0; @@ -1953,7 +1953,7 @@ static boolean emit_bgnsub( struct svga_shader_emitter *emit, static boolean emit_call( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn ) { - unsigned position = insn->InstructionExtLabel.Label; + unsigned position = insn->Label.Label; unsigned i; for (i = 0; i < emit->nr_labels; i++) { @@ -2109,7 +2109,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_I2F: case TGSI_OPCODE_NOT: case TGSI_OPCODE_SHL: - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: case TGSI_OPCODE_XOR: return FALSE; @@ -2543,27 +2543,27 @@ pre_parse_instruction( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn, int current_arl) { - if (insn->FullSrcRegisters[0].SrcRegister.Indirect && - insn->FullSrcRegisters[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { - const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0]; - if (reg->SrcRegister.Index < 0) { - pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); + if (insn->Src[0].Register.Indirect && + insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->Src[0]; + if (reg->Register.Index < 0) { + pre_parse_add_indirect(emit, reg->Register.Index, current_arl); } } - if (insn->FullSrcRegisters[1].SrcRegister.Indirect && - insn->FullSrcRegisters[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { - const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[1]; - if (reg->SrcRegister.Index < 0) { - pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); + if (insn->Src[1].Register.Indirect && + insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->Src[1]; + if (reg->Register.Index < 0) { + pre_parse_add_indirect(emit, reg->Register.Index, current_arl); } } - if (insn->FullSrcRegisters[2].SrcRegister.Indirect && - insn->FullSrcRegisters[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { - const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[2]; - if (reg->SrcRegister.Index < 0) { - pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); + if (insn->Src[2].Register.Indirect && + insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->Src[2]; + if (reg->Register.Index < 0) { + pre_parse_add_indirect(emit, reg->Register.Index, current_arl); } } diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index 910afa25287..d59fb89a58c 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -42,554 +42,554 @@ dump_SVGA3dVertexDecl(const SVGA3dVertexDecl *cmd) { switch((*cmd).identity.type) { case SVGA3D_DECLTYPE_FLOAT1: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n"); break; case SVGA3D_DECLTYPE_FLOAT2: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n"); break; case SVGA3D_DECLTYPE_FLOAT3: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n"); break; case SVGA3D_DECLTYPE_FLOAT4: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n"); break; case SVGA3D_DECLTYPE_D3DCOLOR: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n"); break; case SVGA3D_DECLTYPE_UBYTE4: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n"); break; case SVGA3D_DECLTYPE_SHORT2: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n"); break; case SVGA3D_DECLTYPE_SHORT4: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n"); break; case SVGA3D_DECLTYPE_UBYTE4N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n"); break; case SVGA3D_DECLTYPE_SHORT2N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n"); break; case SVGA3D_DECLTYPE_SHORT4N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n"); break; case SVGA3D_DECLTYPE_USHORT2N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n"); break; case SVGA3D_DECLTYPE_USHORT4N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n"); break; case SVGA3D_DECLTYPE_UDEC3: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n"); break; case SVGA3D_DECLTYPE_DEC3N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n"); break; case SVGA3D_DECLTYPE_FLOAT16_2: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n"); break; case SVGA3D_DECLTYPE_FLOAT16_4: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n"); break; case SVGA3D_DECLTYPE_MAX: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n"); break; default: - debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type); + _debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type); break; } switch((*cmd).identity.method) { case SVGA3D_DECLMETHOD_DEFAULT: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n"); break; case SVGA3D_DECLMETHOD_PARTIALU: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n"); break; case SVGA3D_DECLMETHOD_PARTIALV: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n"); break; case SVGA3D_DECLMETHOD_CROSSUV: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n"); break; case SVGA3D_DECLMETHOD_UV: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n"); break; case SVGA3D_DECLMETHOD_LOOKUP: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n"); break; case SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n"); break; default: - debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method); + _debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method); break; } switch((*cmd).identity.usage) { case SVGA3D_DECLUSAGE_POSITION: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n"); break; case SVGA3D_DECLUSAGE_BLENDWEIGHT: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n"); break; case SVGA3D_DECLUSAGE_BLENDINDICES: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n"); break; case SVGA3D_DECLUSAGE_NORMAL: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n"); break; case SVGA3D_DECLUSAGE_PSIZE: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n"); break; case SVGA3D_DECLUSAGE_TEXCOORD: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n"); break; case SVGA3D_DECLUSAGE_TANGENT: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n"); break; case SVGA3D_DECLUSAGE_BINORMAL: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n"); break; case SVGA3D_DECLUSAGE_TESSFACTOR: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n"); break; case SVGA3D_DECLUSAGE_POSITIONT: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n"); break; case SVGA3D_DECLUSAGE_COLOR: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n"); break; case SVGA3D_DECLUSAGE_FOG: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n"); break; case SVGA3D_DECLUSAGE_DEPTH: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n"); break; case SVGA3D_DECLUSAGE_SAMPLE: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n"); break; case SVGA3D_DECLUSAGE_MAX: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n"); break; default: - debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage); + _debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage); break; } - debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex); - debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId); - debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset); - debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride); - debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first); - debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last); + _debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex); + _debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId); + _debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset); + _debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride); + _debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first); + _debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last); } static void dump_SVGA3dTextureState(const SVGA3dTextureState *cmd) { - debug_printf("\t\t.stage = %u\n", (*cmd).stage); + _debug_printf("\t\t.stage = %u\n", (*cmd).stage); switch((*cmd).name) { case SVGA3D_TS_INVALID: - debug_printf("\t\t.name = SVGA3D_TS_INVALID\n"); + _debug_printf("\t\t.name = SVGA3D_TS_INVALID\n"); break; case SVGA3D_TS_BIND_TEXTURE: - debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n"); break; case SVGA3D_TS_COLOROP: - debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n"); + _debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n"); break; case SVGA3D_TS_COLORARG1: - debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n"); + _debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n"); break; case SVGA3D_TS_COLORARG2: - debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n"); + _debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n"); break; case SVGA3D_TS_ALPHAOP: - debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n"); break; case SVGA3D_TS_ALPHAARG1: - debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n"); break; case SVGA3D_TS_ALPHAARG2: - debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n"); break; case SVGA3D_TS_ADDRESSU: - debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n"); break; case SVGA3D_TS_ADDRESSV: - debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n"); break; case SVGA3D_TS_MIPFILTER: - debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n"); + _debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n"); break; case SVGA3D_TS_MAGFILTER: - debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n"); + _debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n"); break; case SVGA3D_TS_MINFILTER: - debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n"); + _debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n"); break; case SVGA3D_TS_BORDERCOLOR: - debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n"); break; case SVGA3D_TS_TEXCOORDINDEX: - debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n"); break; case SVGA3D_TS_TEXTURETRANSFORMFLAGS: - debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n"); break; case SVGA3D_TS_TEXCOORDGEN: - debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n"); break; case SVGA3D_TS_BUMPENVMAT00: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n"); break; case SVGA3D_TS_BUMPENVMAT01: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n"); break; case SVGA3D_TS_BUMPENVMAT10: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n"); break; case SVGA3D_TS_BUMPENVMAT11: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n"); break; case SVGA3D_TS_TEXTURE_MIPMAP_LEVEL: - debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n"); break; case SVGA3D_TS_TEXTURE_LOD_BIAS: - debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n"); break; case SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL: - debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n"); break; case SVGA3D_TS_ADDRESSW: - debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n"); break; case SVGA3D_TS_GAMMA: - debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n"); + _debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n"); break; case SVGA3D_TS_BUMPENVLSCALE: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n"); break; case SVGA3D_TS_BUMPENVLOFFSET: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n"); break; case SVGA3D_TS_COLORARG0: - debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n"); + _debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n"); break; case SVGA3D_TS_ALPHAARG0: - debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n"); break; case SVGA3D_TS_MAX: - debug_printf("\t\t.name = SVGA3D_TS_MAX\n"); + _debug_printf("\t\t.name = SVGA3D_TS_MAX\n"); break; default: - debug_printf("\t\t.name = %i\n", (*cmd).name); + _debug_printf("\t\t.name = %i\n", (*cmd).name); break; } - debug_printf("\t\t.value = %u\n", (*cmd).value); - debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); + _debug_printf("\t\t.value = %u\n", (*cmd).value); + _debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); } static void dump_SVGA3dCopyBox(const SVGA3dCopyBox *cmd) { - debug_printf("\t\t.x = %u\n", (*cmd).x); - debug_printf("\t\t.y = %u\n", (*cmd).y); - debug_printf("\t\t.z = %u\n", (*cmd).z); - debug_printf("\t\t.w = %u\n", (*cmd).w); - debug_printf("\t\t.h = %u\n", (*cmd).h); - debug_printf("\t\t.d = %u\n", (*cmd).d); - debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); - debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); - debug_printf("\t\t.srcz = %u\n", (*cmd).srcz); + _debug_printf("\t\t.x = %u\n", (*cmd).x); + _debug_printf("\t\t.y = %u\n", (*cmd).y); + _debug_printf("\t\t.z = %u\n", (*cmd).z); + _debug_printf("\t\t.w = %u\n", (*cmd).w); + _debug_printf("\t\t.h = %u\n", (*cmd).h); + _debug_printf("\t\t.d = %u\n", (*cmd).d); + _debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); + _debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); + _debug_printf("\t\t.srcz = %u\n", (*cmd).srcz); } static void dump_SVGA3dCmdSetClipPlane(const SVGA3dCmdSetClipPlane *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.index = %u\n", (*cmd).index); - debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]); - debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]); - debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]); - debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.index = %u\n", (*cmd).index); + _debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]); + _debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]); + _debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]); + _debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]); } static void dump_SVGA3dCmdWaitForQuery(const SVGA3dCmdWaitForQuery *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_QUERYTYPE_OCCLUSION: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); break; case SVGA3D_QUERYTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } - debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); - debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); + _debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); + _debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); } static void dump_SVGA3dCmdSetRenderTarget(const SVGA3dCmdSetRenderTarget *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_RT_DEPTH: - debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n"); + _debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n"); break; case SVGA3D_RT_STENCIL: - debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n"); + _debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n"); break; default: - debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0); + _debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0); break; } - debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid); - debug_printf("\t\t.target.face = %u\n", (*cmd).target.face); - debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap); + _debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid); + _debug_printf("\t\t.target.face = %u\n", (*cmd).target.face); + _debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap); } static void dump_SVGA3dCmdSetTextureState(const SVGA3dCmdSetTextureState *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); } static void dump_SVGA3dCmdSurfaceCopy(const SVGA3dCmdSurfaceCopy *cmd) { - debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); - debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); - debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); - debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); - debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); - debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); + _debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); + _debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); + _debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); + _debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); + _debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); + _debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); } static void dump_SVGA3dCmdSetMaterial(const SVGA3dCmdSetMaterial *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).face) { case SVGA3D_FACE_INVALID: - debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n"); break; case SVGA3D_FACE_NONE: - debug_printf("\t\t.face = SVGA3D_FACE_NONE\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_NONE\n"); break; case SVGA3D_FACE_FRONT: - debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n"); break; case SVGA3D_FACE_BACK: - debug_printf("\t\t.face = SVGA3D_FACE_BACK\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_BACK\n"); break; case SVGA3D_FACE_FRONT_BACK: - debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n"); break; case SVGA3D_FACE_MAX: - debug_printf("\t\t.face = SVGA3D_FACE_MAX\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_MAX\n"); break; default: - debug_printf("\t\t.face = %i\n", (*cmd).face); + _debug_printf("\t\t.face = %i\n", (*cmd).face); break; } - debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]); - debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]); - debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]); - debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]); - debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]); - debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]); - debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]); - debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]); - debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]); - debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]); - debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]); - debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]); - debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]); - debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]); - debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]); - debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]); - debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess); + _debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]); + _debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]); + _debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]); + _debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]); + _debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]); + _debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]); + _debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]); + _debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]); + _debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]); + _debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]); + _debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]); + _debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]); + _debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]); + _debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]); + _debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]); + _debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]); + _debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess); } static void dump_SVGA3dCmdSetLightData(const SVGA3dCmdSetLightData *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.index = %u\n", (*cmd).index); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.index = %u\n", (*cmd).index); switch((*cmd).data.type) { case SVGA3D_LIGHTTYPE_INVALID: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n"); break; case SVGA3D_LIGHTTYPE_POINT: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n"); break; case SVGA3D_LIGHTTYPE_SPOT1: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n"); break; case SVGA3D_LIGHTTYPE_SPOT2: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n"); break; case SVGA3D_LIGHTTYPE_DIRECTIONAL: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n"); break; case SVGA3D_LIGHTTYPE_MAX: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n"); break; default: - debug_printf("\t\t.data.type = %i\n", (*cmd).data.type); + _debug_printf("\t\t.data.type = %i\n", (*cmd).data.type); break; } - debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace); - debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]); - debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]); - debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]); - debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]); - debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]); - debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]); - debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]); - debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]); - debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]); - debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]); - debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]); - debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]); - debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]); - debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]); - debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]); - debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]); - debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]); - debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]); - debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]); - debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]); - debug_printf("\t\t.data.range = %f\n", (*cmd).data.range); - debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff); - debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0); - debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1); - debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2); - debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta); - debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi); + _debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace); + _debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]); + _debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]); + _debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]); + _debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]); + _debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]); + _debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]); + _debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]); + _debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]); + _debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]); + _debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]); + _debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]); + _debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]); + _debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]); + _debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]); + _debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]); + _debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]); + _debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]); + _debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]); + _debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]); + _debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]); + _debug_printf("\t\t.data.range = %f\n", (*cmd).data.range); + _debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff); + _debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0); + _debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1); + _debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2); + _debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta); + _debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi); } static void dump_SVGA3dCmdSetViewport(const SVGA3dCmdSetViewport *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); - debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); - debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); - debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); + _debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); + _debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); + _debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); } static void dump_SVGA3dCmdSetScissorRect(const SVGA3dCmdSetScissorRect *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); - debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); - debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); - debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); + _debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); + _debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); + _debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); } static void dump_SVGA3dCopyRect(const SVGA3dCopyRect *cmd) { - debug_printf("\t\t.x = %u\n", (*cmd).x); - debug_printf("\t\t.y = %u\n", (*cmd).y); - debug_printf("\t\t.w = %u\n", (*cmd).w); - debug_printf("\t\t.h = %u\n", (*cmd).h); - debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); - debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); + _debug_printf("\t\t.x = %u\n", (*cmd).x); + _debug_printf("\t\t.y = %u\n", (*cmd).y); + _debug_printf("\t\t.w = %u\n", (*cmd).w); + _debug_printf("\t\t.h = %u\n", (*cmd).h); + _debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); + _debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); } static void dump_SVGA3dCmdSetShader(const SVGA3dCmdSetShader *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_SHADERTYPE_COMPILED_DX8: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); break; case SVGA3D_SHADERTYPE_VS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); break; case SVGA3D_SHADERTYPE_PS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); break; case SVGA3D_SHADERTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } - debug_printf("\t\t.shid = %u\n", (*cmd).shid); + _debug_printf("\t\t.shid = %u\n", (*cmd).shid); } static void dump_SVGA3dCmdEndQuery(const SVGA3dCmdEndQuery *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_QUERYTYPE_OCCLUSION: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); break; case SVGA3D_QUERYTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } - debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); - debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); + _debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); + _debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); } static void dump_SVGA3dSize(const SVGA3dSize *cmd) { - debug_printf("\t\t.width = %u\n", (*cmd).width); - debug_printf("\t\t.height = %u\n", (*cmd).height); - debug_printf("\t\t.depth = %u\n", (*cmd).depth); + _debug_printf("\t\t.width = %u\n", (*cmd).width); + _debug_printf("\t\t.height = %u\n", (*cmd).height); + _debug_printf("\t\t.depth = %u\n", (*cmd).depth); } static void dump_SVGA3dCmdDestroySurface(const SVGA3dCmdDestroySurface *cmd) { - debug_printf("\t\t.sid = %u\n", (*cmd).sid); + _debug_printf("\t\t.sid = %u\n", (*cmd).sid); } static void dump_SVGA3dCmdDefineContext(const SVGA3dCmdDefineContext *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); } static void dump_SVGA3dRect(const SVGA3dRect *cmd) { - debug_printf("\t\t.x = %u\n", (*cmd).x); - debug_printf("\t\t.y = %u\n", (*cmd).y); - debug_printf("\t\t.w = %u\n", (*cmd).w); - debug_printf("\t\t.h = %u\n", (*cmd).h); + _debug_printf("\t\t.x = %u\n", (*cmd).x); + _debug_printf("\t\t.y = %u\n", (*cmd).y); + _debug_printf("\t\t.w = %u\n", (*cmd).w); + _debug_printf("\t\t.h = %u\n", (*cmd).h); } static void dump_SVGA3dCmdBeginQuery(const SVGA3dCmdBeginQuery *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_QUERYTYPE_OCCLUSION: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); break; case SVGA3D_QUERYTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } } @@ -599,336 +599,336 @@ dump_SVGA3dRenderState(const SVGA3dRenderState *cmd) { switch((*cmd).state) { case SVGA3D_RS_INVALID: - debug_printf("\t\t.state = SVGA3D_RS_INVALID\n"); + _debug_printf("\t\t.state = SVGA3D_RS_INVALID\n"); break; case SVGA3D_RS_ZENABLE: - debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n"); break; case SVGA3D_RS_ZWRITEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n"); break; case SVGA3D_RS_ALPHATESTENABLE: - debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n"); break; case SVGA3D_RS_DITHERENABLE: - debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n"); break; case SVGA3D_RS_BLENDENABLE: - debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n"); break; case SVGA3D_RS_FOGENABLE: - debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n"); break; case SVGA3D_RS_SPECULARENABLE: - debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n"); break; case SVGA3D_RS_STENCILENABLE: - debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n"); break; case SVGA3D_RS_LIGHTINGENABLE: - debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n"); break; case SVGA3D_RS_NORMALIZENORMALS: - debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n"); break; case SVGA3D_RS_POINTSPRITEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n"); break; case SVGA3D_RS_POINTSCALEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n"); break; case SVGA3D_RS_STENCILREF: - debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n"); break; case SVGA3D_RS_STENCILMASK: - debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n"); break; case SVGA3D_RS_STENCILWRITEMASK: - debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n"); break; case SVGA3D_RS_FOGSTART: - debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n"); break; case SVGA3D_RS_FOGEND: - debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n"); break; case SVGA3D_RS_FOGDENSITY: - debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n"); break; case SVGA3D_RS_POINTSIZE: - debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n"); break; case SVGA3D_RS_POINTSIZEMIN: - debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n"); break; case SVGA3D_RS_POINTSIZEMAX: - debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n"); break; case SVGA3D_RS_POINTSCALE_A: - debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n"); break; case SVGA3D_RS_POINTSCALE_B: - debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n"); break; case SVGA3D_RS_POINTSCALE_C: - debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n"); break; case SVGA3D_RS_FOGCOLOR: - debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n"); break; case SVGA3D_RS_AMBIENT: - debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n"); + _debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n"); break; case SVGA3D_RS_CLIPPLANEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n"); break; case SVGA3D_RS_FOGMODE: - debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n"); break; case SVGA3D_RS_FILLMODE: - debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n"); break; case SVGA3D_RS_SHADEMODE: - debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n"); break; case SVGA3D_RS_LINEPATTERN: - debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n"); + _debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n"); break; case SVGA3D_RS_SRCBLEND: - debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n"); break; case SVGA3D_RS_DSTBLEND: - debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n"); break; case SVGA3D_RS_BLENDEQUATION: - debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n"); + _debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n"); break; case SVGA3D_RS_CULLMODE: - debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n"); break; case SVGA3D_RS_ZFUNC: - debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n"); break; case SVGA3D_RS_ALPHAFUNC: - debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n"); break; case SVGA3D_RS_STENCILFUNC: - debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n"); break; case SVGA3D_RS_STENCILFAIL: - debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n"); break; case SVGA3D_RS_STENCILZFAIL: - debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n"); break; case SVGA3D_RS_STENCILPASS: - debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n"); break; case SVGA3D_RS_ALPHAREF: - debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n"); break; case SVGA3D_RS_FRONTWINDING: - debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n"); break; case SVGA3D_RS_COORDINATETYPE: - debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n"); break; case SVGA3D_RS_ZBIAS: - debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n"); break; case SVGA3D_RS_RANGEFOGENABLE: - debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n"); break; case SVGA3D_RS_COLORWRITEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n"); break; case SVGA3D_RS_VERTEXMATERIALENABLE: - debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n"); break; case SVGA3D_RS_DIFFUSEMATERIALSOURCE: - debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n"); break; case SVGA3D_RS_SPECULARMATERIALSOURCE: - debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n"); break; case SVGA3D_RS_AMBIENTMATERIALSOURCE: - debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n"); break; case SVGA3D_RS_EMISSIVEMATERIALSOURCE: - debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n"); break; case SVGA3D_RS_TEXTUREFACTOR: - debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n"); + _debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n"); break; case SVGA3D_RS_LOCALVIEWER: - debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n"); + _debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n"); break; case SVGA3D_RS_SCISSORTESTENABLE: - debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n"); break; case SVGA3D_RS_BLENDCOLOR: - debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n"); + _debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n"); break; case SVGA3D_RS_STENCILENABLE2SIDED: - debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n"); break; case SVGA3D_RS_CCWSTENCILFUNC: - debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n"); break; case SVGA3D_RS_CCWSTENCILFAIL: - debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n"); break; case SVGA3D_RS_CCWSTENCILZFAIL: - debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n"); break; case SVGA3D_RS_CCWSTENCILPASS: - debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n"); break; case SVGA3D_RS_VERTEXBLEND: - debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n"); + _debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n"); break; case SVGA3D_RS_SLOPESCALEDEPTHBIAS: - debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n"); break; case SVGA3D_RS_DEPTHBIAS: - debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n"); break; case SVGA3D_RS_OUTPUTGAMMA: - debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n"); + _debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n"); break; case SVGA3D_RS_ZVISIBLE: - debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n"); break; case SVGA3D_RS_LASTPIXEL: - debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n"); break; case SVGA3D_RS_CLIPPING: - debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n"); break; case SVGA3D_RS_WRAP0: - debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n"); break; case SVGA3D_RS_WRAP1: - debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n"); break; case SVGA3D_RS_WRAP2: - debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n"); break; case SVGA3D_RS_WRAP3: - debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n"); break; case SVGA3D_RS_WRAP4: - debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n"); break; case SVGA3D_RS_WRAP5: - debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n"); break; case SVGA3D_RS_WRAP6: - debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n"); break; case SVGA3D_RS_WRAP7: - debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n"); break; case SVGA3D_RS_WRAP8: - debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n"); break; case SVGA3D_RS_WRAP9: - debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n"); break; case SVGA3D_RS_WRAP10: - debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n"); break; case SVGA3D_RS_WRAP11: - debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n"); break; case SVGA3D_RS_WRAP12: - debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n"); break; case SVGA3D_RS_WRAP13: - debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n"); break; case SVGA3D_RS_WRAP14: - debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n"); break; case SVGA3D_RS_WRAP15: - debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n"); break; case SVGA3D_RS_MULTISAMPLEANTIALIAS: - debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n"); break; case SVGA3D_RS_MULTISAMPLEMASK: - debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n"); + _debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n"); break; case SVGA3D_RS_INDEXEDVERTEXBLENDENABLE: - debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n"); break; case SVGA3D_RS_TWEENFACTOR: - debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n"); + _debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n"); break; case SVGA3D_RS_ANTIALIASEDLINEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n"); break; case SVGA3D_RS_COLORWRITEENABLE1: - debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n"); break; case SVGA3D_RS_COLORWRITEENABLE2: - debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n"); break; case SVGA3D_RS_COLORWRITEENABLE3: - debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n"); break; case SVGA3D_RS_SEPARATEALPHABLENDENABLE: - debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n"); break; case SVGA3D_RS_SRCBLENDALPHA: - debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n"); break; case SVGA3D_RS_DSTBLENDALPHA: - debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n"); break; case SVGA3D_RS_BLENDEQUATIONALPHA: - debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n"); + _debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n"); break; case SVGA3D_RS_MAX: - debug_printf("\t\t.state = SVGA3D_RS_MAX\n"); + _debug_printf("\t\t.state = SVGA3D_RS_MAX\n"); break; default: - debug_printf("\t\t.state = %i\n", (*cmd).state); + _debug_printf("\t\t.state = %i\n", (*cmd).state); break; } - debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue); - debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); + _debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue); + _debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); } static void dump_SVGA3dVertexDivisor(const SVGA3dVertexDivisor *cmd) { - debug_printf("\t\t.value = %u\n", (*cmd).value); - debug_printf("\t\t.count = %u\n", (*cmd).count); - debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData); - debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData); + _debug_printf("\t\t.value = %u\n", (*cmd).value); + _debug_printf("\t\t.count = %u\n", (*cmd).count); + _debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData); + _debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData); } static void dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.shid = %u\n", (*cmd).shid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.shid = %u\n", (*cmd).shid); switch((*cmd).type) { case SVGA3D_SHADERTYPE_COMPILED_DX8: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); break; case SVGA3D_SHADERTYPE_VS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); break; case SVGA3D_SHADERTYPE_PS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); break; case SVGA3D_SHADERTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } } @@ -936,53 +936,53 @@ dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd) static void dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.reg = %u\n", (*cmd).reg); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.reg = %u\n", (*cmd).reg); switch((*cmd).type) { case SVGA3D_SHADERTYPE_COMPILED_DX8: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); break; case SVGA3D_SHADERTYPE_VS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); break; case SVGA3D_SHADERTYPE_PS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); break; case SVGA3D_SHADERTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } switch((*cmd).ctype) { case SVGA3D_CONST_TYPE_FLOAT: - debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n"); - debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]); - debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]); - debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]); - debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]); + _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n"); + _debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]); + _debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]); + _debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]); + _debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]); break; case SVGA3D_CONST_TYPE_INT: - debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n"); - debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); - debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); - debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); - debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n"); + _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); break; case SVGA3D_CONST_TYPE_BOOL: - debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n"); - debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); - debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); - debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); - debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n"); + _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); break; default: - debug_printf("\t\t.ctype = %i\n", (*cmd).ctype); - debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); - debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); - debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); - debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + _debug_printf("\t\t.ctype = %i\n", (*cmd).ctype); + _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); break; } } @@ -990,25 +990,25 @@ dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd) static void dump_SVGA3dCmdSetZRange(const SVGA3dCmdSetZRange *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min); - debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min); + _debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max); } static void dump_SVGA3dCmdDrawPrimitives(const SVGA3dCmdDrawPrimitives *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls); - debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls); + _debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges); } static void dump_SVGA3dCmdSetLightEnabled(const SVGA3dCmdSetLightEnabled *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.index = %u\n", (*cmd).index); - debug_printf("\t\t.enabled = %u\n", (*cmd).enabled); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.index = %u\n", (*cmd).index); + _debug_printf("\t\t.enabled = %u\n", (*cmd).enabled); } static void @@ -1016,86 +1016,86 @@ dump_SVGA3dPrimitiveRange(const SVGA3dPrimitiveRange *cmd) { switch((*cmd).primType) { case SVGA3D_PRIMITIVE_INVALID: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n"); break; case SVGA3D_PRIMITIVE_TRIANGLELIST: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n"); break; case SVGA3D_PRIMITIVE_POINTLIST: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n"); break; case SVGA3D_PRIMITIVE_LINELIST: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n"); break; case SVGA3D_PRIMITIVE_LINESTRIP: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n"); break; case SVGA3D_PRIMITIVE_TRIANGLESTRIP: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n"); break; case SVGA3D_PRIMITIVE_TRIANGLEFAN: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n"); break; case SVGA3D_PRIMITIVE_MAX: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n"); break; default: - debug_printf("\t\t.primType = %i\n", (*cmd).primType); + _debug_printf("\t\t.primType = %i\n", (*cmd).primType); break; } - debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount); - debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId); - debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset); - debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride); - debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth); - debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias); + _debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount); + _debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId); + _debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset); + _debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride); + _debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth); + _debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias); } static void dump_SVGA3dCmdPresent(const SVGA3dCmdPresent *cmd) { - debug_printf("\t\t.sid = %u\n", (*cmd).sid); + _debug_printf("\t\t.sid = %u\n", (*cmd).sid); } static void dump_SVGA3dCmdSetRenderState(const SVGA3dCmdSetRenderState *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); } static void dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd) { - debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); - debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); - debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); - debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); - debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); - debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); - debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x); - debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y); - debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z); - debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w); - debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h); - debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d); - debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x); - debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y); - debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z); - debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w); - debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h); - debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d); + _debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); + _debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); + _debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); + _debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); + _debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); + _debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); + _debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x); + _debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y); + _debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z); + _debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w); + _debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h); + _debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d); + _debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x); + _debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y); + _debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z); + _debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w); + _debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h); + _debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d); switch((*cmd).mode) { case SVGA3D_STRETCH_BLT_POINT: - debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n"); + _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n"); break; case SVGA3D_STRETCH_BLT_LINEAR: - debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n"); + _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n"); break; case SVGA3D_STRETCH_BLT_MAX: - debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n"); + _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n"); break; default: - debug_printf("\t\t.mode = %i\n", (*cmd).mode); + _debug_printf("\t\t.mode = %i\n", (*cmd).mode); break; } } @@ -1103,21 +1103,21 @@ dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd) static void dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd) { - debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId); - debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset); - debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch); - debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid); - debug_printf("\t\t.host.face = %u\n", (*cmd).host.face); - debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap); + _debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId); + _debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset); + _debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch); + _debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid); + _debug_printf("\t\t.host.face = %u\n", (*cmd).host.face); + _debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap); switch((*cmd).transfer) { case SVGA3D_WRITE_HOST_VRAM: - debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n"); + _debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n"); break; case SVGA3D_READ_HOST_VRAM: - debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n"); + _debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n"); break; default: - debug_printf("\t\t.transfer = %i\n", (*cmd).transfer); + _debug_printf("\t\t.transfer = %i\n", (*cmd).transfer); break; } } @@ -1125,107 +1125,107 @@ dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd) static void dump_SVGA3dCmdSurfaceDMASuffix(const SVGA3dCmdSurfaceDMASuffix *cmd) { - debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize); - debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset); - debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard); - debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized); + _debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize); + _debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset); + _debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard); + _debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized); } static void dump_SVGA3dCmdSetTransform(const SVGA3dCmdSetTransform *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_TRANSFORM_INVALID: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n"); break; case SVGA3D_TRANSFORM_WORLD: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n"); break; case SVGA3D_TRANSFORM_VIEW: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n"); break; case SVGA3D_TRANSFORM_PROJECTION: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n"); break; case SVGA3D_TRANSFORM_TEXTURE0: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n"); break; case SVGA3D_TRANSFORM_TEXTURE1: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n"); break; case SVGA3D_TRANSFORM_TEXTURE2: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n"); break; case SVGA3D_TRANSFORM_TEXTURE3: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n"); break; case SVGA3D_TRANSFORM_TEXTURE4: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n"); break; case SVGA3D_TRANSFORM_TEXTURE5: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n"); break; case SVGA3D_TRANSFORM_TEXTURE6: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n"); break; case SVGA3D_TRANSFORM_TEXTURE7: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n"); break; case SVGA3D_TRANSFORM_WORLD1: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n"); break; case SVGA3D_TRANSFORM_WORLD2: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n"); break; case SVGA3D_TRANSFORM_WORLD3: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n"); break; case SVGA3D_TRANSFORM_MAX: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } - debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]); - debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]); - debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]); - debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]); - debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]); - debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]); - debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]); - debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]); - debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]); - debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]); - debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]); - debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]); - debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]); - debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]); - debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]); - debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]); + _debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]); + _debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]); + _debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]); + _debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]); + _debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]); + _debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]); + _debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]); + _debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]); + _debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]); + _debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]); + _debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]); + _debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]); + _debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]); + _debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]); + _debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]); + _debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]); } static void dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.shid = %u\n", (*cmd).shid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.shid = %u\n", (*cmd).shid); switch((*cmd).type) { case SVGA3D_SHADERTYPE_COMPILED_DX8: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); break; case SVGA3D_SHADERTYPE_VS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); break; case SVGA3D_SHADERTYPE_PS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); break; case SVGA3D_SHADERTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } } @@ -1233,187 +1233,519 @@ dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd) static void dump_SVGA3dCmdDestroyContext(const SVGA3dCmdDestroyContext *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); } static void dump_SVGA3dCmdClear(const SVGA3dCmdClear *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).clearFlag) { case SVGA3D_CLEAR_COLOR: - debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n"); + _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n"); break; case SVGA3D_CLEAR_DEPTH: - debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n"); + _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n"); break; case SVGA3D_CLEAR_STENCIL: - debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n"); + _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n"); break; default: - debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag); + _debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag); break; } - debug_printf("\t\t.color = %u\n", (*cmd).color); - debug_printf("\t\t.depth = %f\n", (*cmd).depth); - debug_printf("\t\t.stencil = %u\n", (*cmd).stencil); + _debug_printf("\t\t.color = %u\n", (*cmd).color); + _debug_printf("\t\t.depth = %f\n", (*cmd).depth); + _debug_printf("\t\t.stencil = %u\n", (*cmd).stencil); } static void dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd) { - debug_printf("\t\t.sid = %u\n", (*cmd).sid); + _debug_printf("\t\t.sid = %u\n", (*cmd).sid); switch((*cmd).surfaceFlags) { case SVGA3D_SURFACE_CUBEMAP: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n"); break; case SVGA3D_SURFACE_HINT_STATIC: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n"); break; case SVGA3D_SURFACE_HINT_DYNAMIC: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n"); break; case SVGA3D_SURFACE_HINT_INDEXBUFFER: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n"); break; case SVGA3D_SURFACE_HINT_VERTEXBUFFER: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n"); break; default: - debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags); + _debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags); break; } switch((*cmd).format) { case SVGA3D_FORMAT_INVALID: - debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n"); + _debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n"); break; case SVGA3D_X8R8G8B8: - debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n"); + _debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n"); break; case SVGA3D_A8R8G8B8: - debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n"); + _debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n"); break; case SVGA3D_R5G6B5: - debug_printf("\t\t.format = SVGA3D_R5G6B5\n"); + _debug_printf("\t\t.format = SVGA3D_R5G6B5\n"); break; case SVGA3D_X1R5G5B5: - debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n"); + _debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n"); break; case SVGA3D_A1R5G5B5: - debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n"); + _debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n"); break; case SVGA3D_A4R4G4B4: - debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n"); + _debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n"); break; case SVGA3D_Z_D32: - debug_printf("\t\t.format = SVGA3D_Z_D32\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D32\n"); break; case SVGA3D_Z_D16: - debug_printf("\t\t.format = SVGA3D_Z_D16\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D16\n"); break; case SVGA3D_Z_D24S8: - debug_printf("\t\t.format = SVGA3D_Z_D24S8\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D24S8\n"); break; case SVGA3D_Z_D15S1: - debug_printf("\t\t.format = SVGA3D_Z_D15S1\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D15S1\n"); break; case SVGA3D_LUMINANCE8: - debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n"); + _debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n"); break; case SVGA3D_LUMINANCE4_ALPHA4: - debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n"); + _debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n"); break; case SVGA3D_LUMINANCE16: - debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n"); + _debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n"); break; case SVGA3D_LUMINANCE8_ALPHA8: - debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n"); + _debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n"); break; case SVGA3D_DXT1: - debug_printf("\t\t.format = SVGA3D_DXT1\n"); + _debug_printf("\t\t.format = SVGA3D_DXT1\n"); break; case SVGA3D_DXT2: - debug_printf("\t\t.format = SVGA3D_DXT2\n"); + _debug_printf("\t\t.format = SVGA3D_DXT2\n"); break; case SVGA3D_DXT3: - debug_printf("\t\t.format = SVGA3D_DXT3\n"); + _debug_printf("\t\t.format = SVGA3D_DXT3\n"); break; case SVGA3D_DXT4: - debug_printf("\t\t.format = SVGA3D_DXT4\n"); + _debug_printf("\t\t.format = SVGA3D_DXT4\n"); break; case SVGA3D_DXT5: - debug_printf("\t\t.format = SVGA3D_DXT5\n"); + _debug_printf("\t\t.format = SVGA3D_DXT5\n"); break; case SVGA3D_BUMPU8V8: - debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n"); + _debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n"); break; case SVGA3D_BUMPL6V5U5: - debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n"); + _debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n"); break; case SVGA3D_BUMPX8L8V8U8: - debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n"); break; case SVGA3D_BUMPL8V8U8: - debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n"); break; case SVGA3D_ARGB_S10E5: - debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n"); + _debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n"); break; case SVGA3D_ARGB_S23E8: - debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n"); + _debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n"); break; case SVGA3D_A2R10G10B10: - debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n"); + _debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n"); break; case SVGA3D_V8U8: - debug_printf("\t\t.format = SVGA3D_V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_V8U8\n"); break; case SVGA3D_Q8W8V8U8: - debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n"); break; case SVGA3D_CxV8U8: - debug_printf("\t\t.format = SVGA3D_CxV8U8\n"); + _debug_printf("\t\t.format = SVGA3D_CxV8U8\n"); break; case SVGA3D_X8L8V8U8: - debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n"); break; case SVGA3D_A2W10V10U10: - debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n"); + _debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n"); break; case SVGA3D_ALPHA8: - debug_printf("\t\t.format = SVGA3D_ALPHA8\n"); + _debug_printf("\t\t.format = SVGA3D_ALPHA8\n"); break; case SVGA3D_R_S10E5: - debug_printf("\t\t.format = SVGA3D_R_S10E5\n"); + _debug_printf("\t\t.format = SVGA3D_R_S10E5\n"); break; case SVGA3D_R_S23E8: - debug_printf("\t\t.format = SVGA3D_R_S23E8\n"); + _debug_printf("\t\t.format = SVGA3D_R_S23E8\n"); break; case SVGA3D_RG_S10E5: - debug_printf("\t\t.format = SVGA3D_RG_S10E5\n"); + _debug_printf("\t\t.format = SVGA3D_RG_S10E5\n"); break; case SVGA3D_RG_S23E8: - debug_printf("\t\t.format = SVGA3D_RG_S23E8\n"); + _debug_printf("\t\t.format = SVGA3D_RG_S23E8\n"); break; case SVGA3D_BUFFER: - debug_printf("\t\t.format = SVGA3D_BUFFER\n"); + _debug_printf("\t\t.format = SVGA3D_BUFFER\n"); break; case SVGA3D_Z_D24X8: - debug_printf("\t\t.format = SVGA3D_Z_D24X8\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D24X8\n"); break; case SVGA3D_FORMAT_MAX: - debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n"); + _debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n"); break; default: - debug_printf("\t\t.format = %i\n", (*cmd).format); + _debug_printf("\t\t.format = %i\n", (*cmd).format); break; } - debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels); - debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels); - debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels); - debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels); - debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels); - debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels); + _debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels); + _debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels); + _debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels); + _debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels); + _debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels); + _debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels); +} + +static void +dump_SVGASignedRect(const SVGASignedRect *cmd) +{ + _debug_printf("\t\t.left = %i\n", (*cmd).left); + _debug_printf("\t\t.top = %i\n", (*cmd).top); + _debug_printf("\t\t.right = %i\n", (*cmd).right); + _debug_printf("\t\t.bottom = %i\n", (*cmd).bottom); +} + +static void +dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd) +{ + _debug_printf("\t\t.srcImage.sid = %u\n", (*cmd).srcImage.sid); + _debug_printf("\t\t.srcImage.face = %u\n", (*cmd).srcImage.face); + _debug_printf("\t\t.srcImage.mipmap = %u\n", (*cmd).srcImage.mipmap); + _debug_printf("\t\t.srcRect.left = %i\n", (*cmd).srcRect.left); + _debug_printf("\t\t.srcRect.top = %i\n", (*cmd).srcRect.top); + _debug_printf("\t\t.srcRect.right = %i\n", (*cmd).srcRect.right); + _debug_printf("\t\t.srcRect.bottom = %i\n", (*cmd).srcRect.bottom); + _debug_printf("\t\t.destScreenId = %u\n", (*cmd).destScreenId); + _debug_printf("\t\t.destRect.left = %i\n", (*cmd).destRect.left); + _debug_printf("\t\t.destRect.top = %i\n", (*cmd).destRect.top); + _debug_printf("\t\t.destRect.right = %i\n", (*cmd).destRect.right); + _debug_printf("\t\t.destRect.bottom = %i\n", (*cmd).destRect.bottom); +} + + +void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; + + switch(cmd_id) { + case SVGA_3D_CMD_SURFACE_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); + { + const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; + dump_SVGA3dCmdDefineSurface(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dSize) <= next) { + dump_SVGA3dSize((const SVGA3dSize *)body); + body += sizeof(SVGA3dSize); + } + } + break; + case SVGA_3D_CMD_SURFACE_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); + { + const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; + dump_SVGA3dCmdDestroySurface(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_COPY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); + { + const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; + dump_SVGA3dCmdSurfaceCopy(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + } + break; + case SVGA_3D_CMD_SURFACE_STRETCHBLT: + _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); + { + const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; + dump_SVGA3dCmdSurfaceStretchBlt(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_DMA: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); + { + const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; + dump_SVGA3dCmdSurfaceDMA(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { + dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); + body += sizeof(SVGA3dCmdSurfaceDMASuffix); + } + } + break; + case SVGA_3D_CMD_CONTEXT_DEFINE: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); + { + const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; + dump_SVGA3dCmdDefineContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CONTEXT_DESTROY: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); + { + const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; + dump_SVGA3dCmdDestroyContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTRANSFORM: + _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); + { + const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; + dump_SVGA3dCmdSetTransform(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETZRANGE: + _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); + { + const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; + dump_SVGA3dCmdSetZRange(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETRENDERSTATE: + _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); + { + const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; + dump_SVGA3dCmdSetRenderState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRenderState) <= next) { + dump_SVGA3dRenderState((const SVGA3dRenderState *)body); + body += sizeof(SVGA3dRenderState); + } + } + break; + case SVGA_3D_CMD_SETRENDERTARGET: + _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); + { + const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; + dump_SVGA3dCmdSetRenderTarget(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTEXTURESTATE: + _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); + { + const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; + dump_SVGA3dCmdSetTextureState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dTextureState) <= next) { + dump_SVGA3dTextureState((const SVGA3dTextureState *)body); + body += sizeof(SVGA3dTextureState); + } + } + break; + case SVGA_3D_CMD_SETMATERIAL: + _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); + { + const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; + dump_SVGA3dCmdSetMaterial(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTDATA: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); + { + const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; + dump_SVGA3dCmdSetLightData(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTENABLED: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); + { + const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; + dump_SVGA3dCmdSetLightEnabled(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETVIEWPORT: + _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); + { + const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; + dump_SVGA3dCmdSetViewport(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETCLIPPLANE: + _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); + { + const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; + dump_SVGA3dCmdSetClipPlane(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CLEAR: + _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); + { + const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; + dump_SVGA3dCmdClear(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRect) <= next) { + dump_SVGA3dRect((const SVGA3dRect *)body); + body += sizeof(SVGA3dRect); + } + } + break; + case SVGA_3D_CMD_PRESENT: + _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); + { + const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; + dump_SVGA3dCmdPresent(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyRect) <= next) { + dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); + body += sizeof(SVGA3dCopyRect); + } + } + break; + case SVGA_3D_CMD_SHADER_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); + { + const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; + dump_SVGA3dCmdDefineShader(cmd); + body = (const uint8_t *)&cmd[1]; + svga_shader_dump((const uint32_t *)body, + (unsigned)(next - body)/sizeof(uint32_t), + FALSE ); + body = next; + } + break; + case SVGA_3D_CMD_SHADER_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); + { + const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; + dump_SVGA3dCmdDestroyShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); + { + const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; + dump_SVGA3dCmdSetShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER_CONST: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); + { + const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; + dump_SVGA3dCmdSetShaderConst(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_DRAW_PRIMITIVES: + _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); + { + const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; + unsigned i, j; + dump_SVGA3dCmdDrawPrimitives(cmd); + body = (const uint8_t *)&cmd[1]; + for(i = 0; i < cmd->numVertexDecls; ++i) { + dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); + body += sizeof(SVGA3dVertexDecl); + } + for(j = 0; j < cmd->numRanges; ++j) { + dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); + body += sizeof(SVGA3dPrimitiveRange); + } + while(body + sizeof(SVGA3dVertexDivisor) <= next) { + dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); + body += sizeof(SVGA3dVertexDivisor); + } + } + break; + case SVGA_3D_CMD_SETSCISSORRECT: + _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); + { + const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; + dump_SVGA3dCmdSetScissorRect(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BEGIN_QUERY: + _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); + { + const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; + dump_SVGA3dCmdBeginQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_END_QUERY: + _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); + { + const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; + dump_SVGA3dCmdEndQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_WAIT_FOR_QUERY: + _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); + { + const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; + dump_SVGA3dCmdWaitForQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: + _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); + { + const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; + dump_SVGA3dCmdBlitSurfaceToScreen(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGASignedRect) <= next) { + dump_SVGASignedRect((const SVGASignedRect *)body); + body += sizeof(SVGASignedRect); + } + } + break; + default: + _debug_printf("\t0x%08x\n", cmd_id); + break; + } + + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); } @@ -1432,303 +1764,19 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; - switch(cmd_id) { - case SVGA_3D_CMD_SURFACE_DEFINE: - debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); - { - const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; - dump_SVGA3dCmdDefineSurface(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dSize) <= next) { - dump_SVGA3dSize((const SVGA3dSize *)body); - body += sizeof(SVGA3dSize); - } - } - break; - case SVGA_3D_CMD_SURFACE_DESTROY: - debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); - { - const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; - dump_SVGA3dCmdDestroySurface(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_COPY: - debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); - { - const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; - dump_SVGA3dCmdSurfaceCopy(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - } - break; - case SVGA_3D_CMD_SURFACE_STRETCHBLT: - debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); - { - const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; - dump_SVGA3dCmdSurfaceStretchBlt(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_DMA: - debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); - { - const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; - dump_SVGA3dCmdSurfaceDMA(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { - dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); - body += sizeof(SVGA3dCmdSurfaceDMASuffix); - } - } - break; - case SVGA_3D_CMD_CONTEXT_DEFINE: - debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); - { - const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; - dump_SVGA3dCmdDefineContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CONTEXT_DESTROY: - debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); - { - const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; - dump_SVGA3dCmdDestroyContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTRANSFORM: - debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); - { - const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; - dump_SVGA3dCmdSetTransform(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETZRANGE: - debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); - { - const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; - dump_SVGA3dCmdSetZRange(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETRENDERSTATE: - debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); - { - const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; - dump_SVGA3dCmdSetRenderState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRenderState) <= next) { - dump_SVGA3dRenderState((const SVGA3dRenderState *)body); - body += sizeof(SVGA3dRenderState); - } - } - break; - case SVGA_3D_CMD_SETRENDERTARGET: - debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); - { - const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; - dump_SVGA3dCmdSetRenderTarget(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTEXTURESTATE: - debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); - { - const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; - dump_SVGA3dCmdSetTextureState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dTextureState) <= next) { - dump_SVGA3dTextureState((const SVGA3dTextureState *)body); - body += sizeof(SVGA3dTextureState); - } - } - break; - case SVGA_3D_CMD_SETMATERIAL: - debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); - { - const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; - dump_SVGA3dCmdSetMaterial(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTDATA: - debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); - { - const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; - dump_SVGA3dCmdSetLightData(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTENABLED: - debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); - { - const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; - dump_SVGA3dCmdSetLightEnabled(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETVIEWPORT: - debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); - { - const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; - dump_SVGA3dCmdSetViewport(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETCLIPPLANE: - debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); - { - const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; - dump_SVGA3dCmdSetClipPlane(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CLEAR: - debug_printf("\tSVGA_3D_CMD_CLEAR\n"); - { - const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; - dump_SVGA3dCmdClear(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRect) <= next) { - dump_SVGA3dRect((const SVGA3dRect *)body); - body += sizeof(SVGA3dRect); - } - } - break; - case SVGA_3D_CMD_PRESENT: - debug_printf("\tSVGA_3D_CMD_PRESENT\n"); - { - const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; - dump_SVGA3dCmdPresent(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyRect) <= next) { - dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); - body += sizeof(SVGA3dCopyRect); - } - } - break; - case SVGA_3D_CMD_SHADER_DEFINE: - debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); - { - const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; - dump_SVGA3dCmdDefineShader(cmd); - body = (const uint8_t *)&cmd[1]; - svga_shader_dump((const uint32_t *)body, - (unsigned)(next - body)/sizeof(uint32_t), - FALSE ); - body = next; - } - break; - case SVGA_3D_CMD_SHADER_DESTROY: - debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); - { - const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; - dump_SVGA3dCmdDestroyShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER: - debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); - { - const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; - dump_SVGA3dCmdSetShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER_CONST: - debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); - { - const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; - dump_SVGA3dCmdSetShaderConst(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_DRAW_PRIMITIVES: - debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); - { - const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; - unsigned i, j; - dump_SVGA3dCmdDrawPrimitives(cmd); - body = (const uint8_t *)&cmd[1]; - for(i = 0; i < cmd->numVertexDecls; ++i) { - dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); - body += sizeof(SVGA3dVertexDecl); - } - for(j = 0; j < cmd->numRanges; ++j) { - dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); - body += sizeof(SVGA3dPrimitiveRange); - } - while(body + sizeof(SVGA3dVertexDivisor) <= next) { - dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); - body += sizeof(SVGA3dVertexDivisor); - } - } - break; - case SVGA_3D_CMD_SETSCISSORRECT: - debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); - { - const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; - dump_SVGA3dCmdSetScissorRect(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_BEGIN_QUERY: - debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); - { - const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; - dump_SVGA3dCmdBeginQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_END_QUERY: - debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); - { - const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; - dump_SVGA3dCmdEndQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_WAIT_FOR_QUERY: - debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); - { - const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; - dump_SVGA3dCmdWaitForQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - default: - debug_printf("\t0x%08x\n", cmd_id); - break; - } - - while(body + sizeof(uint32_t) <= next) { - debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { - debug_printf("\tSVGA_CMD_FENCE\n"); - debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); + _debug_printf("\tSVGA_CMD_FENCE\n"); + _debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); next += 2*sizeof(uint32_t); } else { - debug_printf("\t0x%08x\n", cmd_id); + _debug_printf("\t0x%08x\n", cmd_id); next += sizeof(uint32_t); } } diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h index 69a87020875..ca0154361cc 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.h +++ b/src/gallium/drivers/svga/svgadump/svga_dump.h @@ -28,6 +28,9 @@ #include "pipe/p_compiler.h" +void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size); + void svga_dump_commands(const void *commands, uint32_t size); diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py index 288e753296e..0bc0b3ae317 100755 --- a/src/gallium/drivers/svga/svgadump/svga_dump.py +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -71,14 +71,14 @@ class decl_dumper_t(decl_visitor.decl_visitor_t): print ' switch(%s) {' % ("(*cmd)" + self._instance,) for name, value in self.decl.values: print ' case %s:' % (name,) - print ' debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name) + print ' _debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name) print ' break;' print ' default:' - print ' debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) + print ' _debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) print ' break;' print ' }' else: - print ' debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) + print ' _debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) def dump_decl(instance, decl): @@ -154,7 +154,7 @@ class type_dumper_t(type_visitor.type_visitor_t): dump_decl(self.instance, decl) def print_instance(self, format): - print ' debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance) + print ' _debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance) def dump_type(instance, type_): @@ -202,11 +202,62 @@ cmds = [ ('SVGA_3D_CMD_END_QUERY', 'SVGA3dCmdEndQuery', (), None), ('SVGA_3D_CMD_WAIT_FOR_QUERY', 'SVGA3dCmdWaitForQuery', (), None), #('SVGA_3D_CMD_PRESENT_READBACK', None, (), None), + ('SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN', 'SVGA3dCmdBlitSurfaceToScreen', (), 'SVGASignedRect'), ] def dump_cmds(): print r''' void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; +''' + print ' switch(cmd_id) {' + indexes = 'ijklmn' + for id, header, body, footer in cmds: + print ' case %s:' % id + print ' _debug_printf("\\t%s\\n");' % id + print ' {' + print ' const %s *cmd = (const %s *)body;' % (header, header) + if len(body): + print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' + print ' dump_%s(cmd);' % header + print ' body = (const uint8_t *)&cmd[1];' + for i in range(len(body)): + struct, count = body[i] + idx = indexes[i] + print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) + print ' dump_%s((const %s *)body);' % (struct, struct) + print ' body += sizeof(%s);' % struct + print ' }' + if footer is not None: + print ' while(body + sizeof(%s) <= next) {' % footer + print ' dump_%s((const %s *)body);' % (footer, footer) + print ' body += sizeof(%s);' % footer + print ' }' + if id == 'SVGA_3D_CMD_SHADER_DEFINE': + print ' svga_shader_dump((const uint32_t *)body,' + print ' (unsigned)(next - body)/sizeof(uint32_t),' + print ' FALSE);' + print ' body = next;' + print ' }' + print ' break;' + print ' default:' + print ' _debug_printf("\\t0x%08x\\n", cmd_id);' + print ' break;' + print ' }' + print r''' + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); +} +''' + print r''' +void svga_dump_commands(const void *commands, uint32_t size) { const uint8_t *next = commands; @@ -221,59 +272,19 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; -''' - print ' switch(cmd_id) {' - indexes = 'ijklmn' - for id, header, body, footer in cmds: - print ' case %s:' % id - print ' debug_printf("\\t%s\\n");' % id - print ' {' - print ' const %s *cmd = (const %s *)body;' % (header, header) - if len(body): - print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' - print ' dump_%s(cmd);' % header - print ' body = (const uint8_t *)&cmd[1];' - for i in range(len(body)): - struct, count = body[i] - idx = indexes[i] - print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) - print ' dump_%s((const %s *)body);' % (struct, struct) - print ' body += sizeof(%s);' % struct - print ' }' - if footer is not None: - print ' while(body + sizeof(%s) <= next) {' % footer - print ' dump_%s((const %s *)body);' % (footer, footer) - print ' body += sizeof(%s);' % footer - print ' }' - if id == 'SVGA_3D_CMD_SHADER_DEFINE': - print ' sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));' - print ' body = next;' - print ' }' - print ' break;' - print ' default:' - print ' debug_printf("\\t0x%08x\\n", cmd_id);' - print ' break;' - print ' }' - - print r''' - while(body + sizeof(uint32_t) <= next) { - debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { - debug_printf("\tSVGA_CMD_FENCE\n"); - debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); + _debug_printf("\tSVGA_CMD_FENCE\n"); + _debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); next += 2*sizeof(uint32_t); } else { - debug_printf("\t0x%08x\n", cmd_id); + _debug_printf("\t0x%08x\n", cmd_id); next += sizeof(uint32_t); } } @@ -294,18 +305,18 @@ def main(): print '#include "svga_shader_dump.h"' print '#include "svga3d_reg.h"' print - print '#include "pipe/p_debug.h"' + print '#include "util/u_debug.h"' print '#include "svga_dump.h"' print config = parser.config_t( - include_paths = ['include'], + include_paths = ['../../../include', '../include'], compiler = 'gcc', ) headers = [ - 'include/svga_types.h', - 'include/svga3d_reg.h', + 'svga_types.h', + 'svga3d_reg.h', ] decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE) diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c index b0e7fdf378a..70e27d86d30 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -50,16 +50,16 @@ static void dump_op( struct sh_op op, const char *mnemonic ) assert( op.is_reg == 0 ); if (op.coissue) - debug_printf( "+" ); - debug_printf( "%s", mnemonic ); + _debug_printf( "+" ); + _debug_printf( "%s", mnemonic ); switch (op.control) { case 0: break; case SVGA3DOPCONT_PROJECT: - debug_printf( "p" ); + _debug_printf( "p" ); break; case SVGA3DOPCONT_BIAS: - debug_printf( "b" ); + _debug_printf( "b" ); break; default: assert( 0 ); @@ -72,28 +72,28 @@ static void dump_comp_op( struct sh_op op, const char *mnemonic ) assert( op.is_reg == 0 ); if (op.coissue) - debug_printf( "+" ); - debug_printf( "%s", mnemonic ); + _debug_printf( "+" ); + _debug_printf( "%s", mnemonic ); switch (op.control) { case SVGA3DOPCOMP_RESERVED0: break; case SVGA3DOPCOMP_GT: - debug_printf("_gt"); + _debug_printf("_gt"); break; case SVGA3DOPCOMP_EQ: - debug_printf("_eq"); + _debug_printf("_eq"); break; case SVGA3DOPCOMP_GE: - debug_printf("_ge"); + _debug_printf("_ge"); break; case SVGA3DOPCOMP_LT: - debug_printf("_lt"); + _debug_printf("_lt"); break; case SVGA3DOPCOMPC_NE: - debug_printf("_ne"); + _debug_printf("_ne"); break; case SVGA3DOPCOMP_LE: - debug_printf("_le"); + _debug_printf("_le"); break; case SVGA3DOPCOMP_RESERVED1: default: @@ -109,93 +109,93 @@ static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct switch (sh_reg_type( reg )) { case SVGA3DREG_TEMP: - debug_printf( "r%u", reg.number ); + _debug_printf( "r%u", reg.number ); break; case SVGA3DREG_INPUT: - debug_printf( "v%u", reg.number ); + _debug_printf( "v%u", reg.number ); break; case SVGA3DREG_CONST: if (reg.relative) { if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP) - debug_printf( "c[aL+%u]", reg.number ); + _debug_printf( "c[aL+%u]", reg.number ); else - debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); + _debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); } else - debug_printf( "c%u", reg.number ); + _debug_printf( "c%u", reg.number ); break; case SVGA3DREG_ADDR: /* VS */ /* SVGA3DREG_TEXTURE */ /* PS */ if (di->is_ps) - debug_printf( "t%u", reg.number ); + _debug_printf( "t%u", reg.number ); else - debug_printf( "a%u", reg.number ); + _debug_printf( "a%u", reg.number ); break; case SVGA3DREG_RASTOUT: switch (reg.number) { case 0 /*POSITION*/: - debug_printf( "oPos" ); + _debug_printf( "oPos" ); break; case 1 /*FOG*/: - debug_printf( "oFog" ); + _debug_printf( "oFog" ); break; case 2 /*POINT_SIZE*/: - debug_printf( "oPts" ); + _debug_printf( "oPts" ); break; default: assert( 0 ); - debug_printf( "???" ); + _debug_printf( "???" ); } break; case SVGA3DREG_ATTROUT: assert( reg.number < 2 ); - debug_printf( "oD%u", reg.number ); + _debug_printf( "oD%u", reg.number ); break; case SVGA3DREG_TEXCRDOUT: /* SVGA3DREG_OUTPUT */ - debug_printf( "oT%u", reg.number ); + _debug_printf( "oT%u", reg.number ); break; case SVGA3DREG_COLOROUT: - debug_printf( "oC%u", reg.number ); + _debug_printf( "oC%u", reg.number ); break; case SVGA3DREG_DEPTHOUT: - debug_printf( "oD%u", reg.number ); + _debug_printf( "oD%u", reg.number ); break; case SVGA3DREG_SAMPLER: - debug_printf( "s%u", reg.number ); + _debug_printf( "s%u", reg.number ); break; case SVGA3DREG_CONSTBOOL: assert( !reg.relative ); - debug_printf( "b%u", reg.number ); + _debug_printf( "b%u", reg.number ); break; case SVGA3DREG_CONSTINT: assert( !reg.relative ); - debug_printf( "i%u", reg.number ); + _debug_printf( "i%u", reg.number ); break; case SVGA3DREG_LOOP: assert( reg.number == 0 ); - debug_printf( "aL" ); + _debug_printf( "aL" ); break; case SVGA3DREG_MISCTYPE: switch (reg.number) { case SVGA3DMISCREG_POSITION: - debug_printf( "vPos" ); + _debug_printf( "vPos" ); break; case SVGA3DMISCREG_FACE: - debug_printf( "vFace" ); + _debug_printf( "vFace" ); break; default: assert(0); @@ -204,46 +204,46 @@ static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct break; case SVGA3DREG_LABEL: - debug_printf( "l%u", reg.number ); + _debug_printf( "l%u", reg.number ); break; case SVGA3DREG_PREDICATE: - debug_printf( "p%u", reg.number ); + _debug_printf( "p%u", reg.number ); break; default: assert( 0 ); - debug_printf( "???" ); + _debug_printf( "???" ); } } static void dump_cdata( struct sh_cdata cdata ) { - debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); + _debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); } static void dump_idata( struct sh_idata idata ) { - debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); + _debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); } static void dump_bdata( boolean bdata ) { - debug_printf( bdata ? "TRUE" : "FALSE" ); + _debug_printf( bdata ? "TRUE" : "FALSE" ); } static void dump_sampleinfo( struct ps_sampleinfo sampleinfo ) { switch (sampleinfo.texture_type) { case SVGA3DSAMP_2D: - debug_printf( "_2d" ); + _debug_printf( "_2d" ); break; case SVGA3DSAMP_CUBE: - debug_printf( "_cube" ); + _debug_printf( "_cube" ); break; case SVGA3DSAMP_VOLUME: - debug_printf( "_volume" ); + _debug_printf( "_volume" ); break; default: assert( 0 ); @@ -255,46 +255,46 @@ static void dump_usageinfo( struct vs_semantic semantic ) { switch (semantic.usage) { case SVGA3D_DECLUSAGE_POSITION: - debug_printf("_position" ); + _debug_printf("_position" ); break; case SVGA3D_DECLUSAGE_BLENDWEIGHT: - debug_printf("_blendweight" ); + _debug_printf("_blendweight" ); break; case SVGA3D_DECLUSAGE_BLENDINDICES: - debug_printf("_blendindices" ); + _debug_printf("_blendindices" ); break; case SVGA3D_DECLUSAGE_NORMAL: - debug_printf("_normal" ); + _debug_printf("_normal" ); break; case SVGA3D_DECLUSAGE_PSIZE: - debug_printf("_psize" ); + _debug_printf("_psize" ); break; case SVGA3D_DECLUSAGE_TEXCOORD: - debug_printf("_texcoord"); + _debug_printf("_texcoord"); break; case SVGA3D_DECLUSAGE_TANGENT: - debug_printf("_tangent" ); + _debug_printf("_tangent" ); break; case SVGA3D_DECLUSAGE_BINORMAL: - debug_printf("_binormal" ); + _debug_printf("_binormal" ); break; case SVGA3D_DECLUSAGE_TESSFACTOR: - debug_printf("_tessfactor" ); + _debug_printf("_tessfactor" ); break; case SVGA3D_DECLUSAGE_POSITIONT: - debug_printf("_positiont" ); + _debug_printf("_positiont" ); break; case SVGA3D_DECLUSAGE_COLOR: - debug_printf("_color" ); + _debug_printf("_color" ); break; case SVGA3D_DECLUSAGE_FOG: - debug_printf("_fog" ); + _debug_printf("_fog" ); break; case SVGA3D_DECLUSAGE_DEPTH: - debug_printf("_depth" ); + _debug_printf("_depth" ); break; case SVGA3D_DECLUSAGE_SAMPLE: - debug_printf("_sample"); + _debug_printf("_sample"); break; default: assert( 0 ); @@ -302,7 +302,7 @@ static void dump_usageinfo( struct vs_semantic semantic ) } if (semantic.usage_index != 0) { - debug_printf("%d", semantic.usage_index ); + _debug_printf("%d", semantic.usage_index ); } } @@ -316,47 +316,47 @@ static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di ) assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier ); if (dstreg.modifier & SVGA3DDSTMOD_SATURATE) - debug_printf( "_sat" ); + _debug_printf( "_sat" ); if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION) - debug_printf( "_pp" ); + _debug_printf( "_pp" ); switch (dstreg.shift_scale) { case 0: break; case 1: - debug_printf( "_x2" ); + _debug_printf( "_x2" ); break; case 2: - debug_printf( "_x4" ); + _debug_printf( "_x4" ); break; case 3: - debug_printf( "_x8" ); + _debug_printf( "_x8" ); break; case 13: - debug_printf( "_d8" ); + _debug_printf( "_d8" ); break; case 14: - debug_printf( "_d4" ); + _debug_printf( "_d4" ); break; case 15: - debug_printf( "_d2" ); + _debug_printf( "_d2" ); break; default: assert( 0 ); } - debug_printf( " " ); + _debug_printf( " " ); u.dstreg = dstreg; dump_reg( u.reg, NULL, di ); if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) { - debug_printf( "." ); + _debug_printf( "." ); if (dstreg.write_mask & SVGA3DWRITEMASK_0) - debug_printf( "x" ); + _debug_printf( "x" ); if (dstreg.write_mask & SVGA3DWRITEMASK_1) - debug_printf( "y" ); + _debug_printf( "y" ); if (dstreg.write_mask & SVGA3DWRITEMASK_2) - debug_printf( "z" ); + _debug_printf( "z" ); if (dstreg.write_mask & SVGA3DWRITEMASK_3) - debug_printf( "w" ); + _debug_printf( "w" ); } } @@ -372,19 +372,19 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons case SVGA3DSRCMOD_BIASNEG: case SVGA3DSRCMOD_SIGNNEG: case SVGA3DSRCMOD_X2NEG: - debug_printf( "-" ); + _debug_printf( "-" ); break; case SVGA3DSRCMOD_ABS: - debug_printf( "|" ); + _debug_printf( "|" ); break; case SVGA3DSRCMOD_ABSNEG: - debug_printf( "-|" ); + _debug_printf( "-|" ); break; case SVGA3DSRCMOD_COMP: - debug_printf( "1-" ); + _debug_printf( "1-" ); break; case SVGA3DSRCMOD_NOT: - debug_printf( "!" ); + _debug_printf( "!" ); } u.srcreg = srcreg; @@ -397,39 +397,39 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons break; case SVGA3DSRCMOD_ABS: case SVGA3DSRCMOD_ABSNEG: - debug_printf( "|" ); + _debug_printf( "|" ); break; case SVGA3DSRCMOD_BIAS: case SVGA3DSRCMOD_BIASNEG: - debug_printf( "_bias" ); + _debug_printf( "_bias" ); break; case SVGA3DSRCMOD_SIGN: case SVGA3DSRCMOD_SIGNNEG: - debug_printf( "_bx2" ); + _debug_printf( "_bx2" ); break; case SVGA3DSRCMOD_X2: case SVGA3DSRCMOD_X2NEG: - debug_printf( "_x2" ); + _debug_printf( "_x2" ); break; case SVGA3DSRCMOD_DZ: - debug_printf( "_dz" ); + _debug_printf( "_dz" ); break; case SVGA3DSRCMOD_DW: - debug_printf( "_dw" ); + _debug_printf( "_dw" ); break; default: assert( 0 ); } if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) { - debug_printf( "." ); + _debug_printf( "." ); if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) { - debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); } else { - debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); } } } @@ -447,15 +447,15 @@ svga_shader_dump( if (do_binary) { for (i = 0; i < dwords; i++) - debug_printf(" 0x%08x,\n", assem[i]); + _debug_printf(" 0x%08x,\n", assem[i]); - debug_printf("\n\n"); + _debug_printf("\n\n"); } di.version.value = *assem++; di.is_ps = (di.version.type == SVGA3D_PS_TYPE); - debug_printf( + _debug_printf( "%s_%u_%u\n", di.is_ps ? "ps" : "vs", di.version.major, @@ -465,7 +465,7 @@ svga_shader_dump( struct sh_op op = *(struct sh_op *) assem; if (assem - start >= dwords) { - debug_printf("... ran off end of buffer\n"); + _debug_printf("... ran off end of buffer\n"); assert(0); return; } @@ -475,7 +475,7 @@ svga_shader_dump( { struct sh_dcl dcl = *(struct sh_dcl *) assem; - debug_printf( "dcl" ); + _debug_printf( "dcl" ); if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER) dump_sampleinfo( dcl.u.ps.sampleinfo ); else if (di.is_ps) { @@ -486,7 +486,7 @@ svga_shader_dump( else dump_usageinfo( dcl.u.vs.semantic ); dump_dstreg( dcl.reg, &di ); - debug_printf( "\n" ); + _debug_printf( "\n" ); assem += sizeof( struct sh_dcl ) / sizeof( unsigned ); } break; @@ -495,11 +495,11 @@ svga_shader_dump( { struct sh_defb defb = *(struct sh_defb *) assem; - debug_printf( "defb " ); + _debug_printf( "defb " ); dump_reg( defb.reg, NULL, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_bdata( defb.data ); - debug_printf( "\n" ); + _debug_printf( "\n" ); assem += sizeof( struct sh_defb ) / sizeof( unsigned ); } break; @@ -508,11 +508,11 @@ svga_shader_dump( { struct sh_defi defi = *(struct sh_defi *) assem; - debug_printf( "defi " ); + _debug_printf( "defi " ); dump_reg( defi.reg, NULL, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_idata( defi.idata ); - debug_printf( "\n" ); + _debug_printf( "\n" ); assem += sizeof( struct sh_defi ) / sizeof( unsigned ); } break; @@ -528,11 +528,11 @@ svga_shader_dump( else { struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; dump_dstreg( unaryop.dst, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_srcreg( unaryop.src, NULL, &di ); assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); } - debug_printf( "\n" ); + _debug_printf( "\n" ); break; case SVGA3DOP_TEX: @@ -549,7 +549,7 @@ svga_shader_dump( struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; dump_dstreg( unaryop.dst, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_srcreg( unaryop.src, NULL, &di ); assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); } @@ -559,30 +559,30 @@ svga_shader_dump( dump_op( op, "texld" ); dump_dstreg( binaryop.dst, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_srcreg( binaryop.src0, NULL, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_srcreg( binaryop.src1, NULL, &di ); assem += sizeof( struct sh_binaryop ) / sizeof( unsigned ); } - debug_printf( "\n" ); + _debug_printf( "\n" ); break; case SVGA3DOP_DEF: { struct sh_def def = *(struct sh_def *) assem; - debug_printf( "def " ); + _debug_printf( "def " ); dump_reg( def.reg, NULL, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_cdata( def.cdata ); - debug_printf( "\n" ); + _debug_printf( "\n" ); assem += sizeof( struct sh_def ) / sizeof( unsigned ); } break; case SVGA3DOP_PHASE: - debug_printf( "phase\n" ); + _debug_printf( "phase\n" ); assem += sizeof( struct sh_op ) / sizeof( unsigned ); break; @@ -596,12 +596,12 @@ svga_shader_dump( break; case SVGA3DOP_RET: - debug_printf( "ret\n" ); + _debug_printf( "ret\n" ); assem += sizeof( struct sh_op ) / sizeof( unsigned ); break; case SVGA3DOP_END: - debug_printf( "end\n" ); + _debug_printf( "end\n" ); finished = TRUE; break; @@ -640,14 +640,14 @@ svga_shader_dump( } if (not_first_arg) - debug_printf( ", " ); + _debug_printf( ", " ); else - debug_printf( " " ); + _debug_printf( " " ); dump_srcreg( srcreg, &indreg, &di ); not_first_arg = TRUE; } - debug_printf( "\n" ); + _debug_printf( "\n" ); } } } diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 1000c31e49a..203c3851bc3 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -24,11 +24,10 @@ ensure the right libGL.so is being picked by doing ldd progs/trivial/tri -== Traceing == +== Tracing == -For traceing then do +For tracing then do - export XMESA_TRACE=y GALLIUM_TRACE=tri.trace progs/trivial/tri which should create a tri.trace file, which is an XML file. You can view copying diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 540855c067d..075e4f9a0b2 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -97,25 +97,6 @@ trace_surface_unwrap(struct trace_context *tr_ctx, static INLINE void -trace_context_set_edgeflags(struct pipe_context *_pipe, - const unsigned *bitfield) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct pipe_context *pipe = tr_ctx->pipe; - - trace_dump_call_begin("pipe_context", "set_edgeflags"); - - trace_dump_arg(ptr, pipe); - /* FIXME: we don't know how big this array is */ - trace_dump_arg(ptr, bitfield); - - pipe->set_edgeflags(pipe, bitfield);; - - trace_dump_call_end(); -} - - -static INLINE void trace_context_draw_block(struct trace_context *tr_ctx, int flag) { int k; @@ -145,10 +126,16 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) for (k = 0; k < tr_ctx->curr.nr_cbufs; k++) if (tr_ctx->draw_rule.surf == tr_ctx->curr.cbufs[k]) block = TRUE; - if (tr_ctx->draw_rule.tex) + if (tr_ctx->draw_rule.tex) { for (k = 0; k < tr_ctx->curr.num_texs; k++) if (tr_ctx->draw_rule.tex == tr_ctx->curr.tex[k]) block = TRUE; + for (k = 0; k < tr_ctx->curr.num_vert_texs; k++) { + if (tr_ctx->draw_rule.tex == tr_ctx->curr.vert_tex[k]) { + block = TRUE; + } + } + } if (block) tr_ctx->draw_blocked |= (flag | 4); @@ -174,16 +161,15 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) pipe_mutex_unlock(tr_ctx->draw_mutex); } -static INLINE boolean +static INLINE void trace_context_draw_arrays(struct pipe_context *_pipe, unsigned mode, unsigned start, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -194,19 +180,15 @@ trace_context_draw_arrays(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_arrays(pipe, mode, start, count);; - - trace_dump_ret(bool, result); + pipe->draw_arrays(pipe, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -216,10 +198,9 @@ trace_context_draw_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -234,19 +215,15 @@ trace_context_draw_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);; - - trace_dump_ret(bool, result); + pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -260,10 +237,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -280,18 +256,14 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, minIndex, maxIndex, - mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, minIndex, maxIndex, + mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } @@ -308,7 +280,7 @@ trace_context_create_query(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(uint, query_type); - result = pipe->create_query(pipe, query_type);; + result = pipe->create_query(pipe, query_type); trace_dump_ret(ptr, result); @@ -330,7 +302,7 @@ trace_context_destroy_query(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, query); - pipe->destroy_query(pipe, query);; + pipe->destroy_query(pipe, query); trace_dump_call_end(); } @@ -348,7 +320,7 @@ trace_context_begin_query(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, query); - pipe->begin_query(pipe, query);; + pipe->begin_query(pipe, query); trace_dump_call_end(); } @@ -387,7 +359,7 @@ trace_context_get_query_result(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); - _result = pipe->get_query_result(pipe, query, wait, presult);; + _result = pipe->get_query_result(pipe, query, wait, presult); result = *presult; trace_dump_arg(uint, result); @@ -412,7 +384,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(blend_state, state); - result = pipe->create_blend_state(pipe, state);; + result = pipe->create_blend_state(pipe, state); trace_dump_ret(ptr, result); @@ -434,7 +406,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_blend_state(pipe, state);; + pipe->bind_blend_state(pipe, state); trace_dump_call_end(); } @@ -452,7 +424,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_blend_state(pipe, state);; + pipe->delete_blend_state(pipe, state); trace_dump_call_end(); } @@ -471,7 +443,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(sampler_state, state); - result = pipe->create_sampler_state(pipe, state);; + result = pipe->create_sampler_state(pipe, state); trace_dump_ret(ptr, result); @@ -482,19 +454,40 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, static INLINE void -trace_context_bind_sampler_states(struct pipe_context *_pipe, - unsigned num_states, void **states) +trace_context_bind_fragment_sampler_states(struct pipe_context *_pipe, + unsigned num_states, + void **states) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin("pipe_context", "bind_sampler_states"); + trace_dump_call_begin("pipe_context", "bind_fragment_sampler_states"); trace_dump_arg(ptr, pipe); trace_dump_arg(uint, num_states); trace_dump_arg_array(ptr, states, num_states); - pipe->bind_sampler_states(pipe, num_states, states);; + pipe->bind_fragment_sampler_states(pipe, num_states, states); + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_bind_vertex_sampler_states(struct pipe_context *_pipe, + unsigned num_states, + void **states) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_vertex_sampler_states"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_states); + trace_dump_arg_array(ptr, states, num_states); + + pipe->bind_vertex_sampler_states(pipe, num_states, states); trace_dump_call_end(); } @@ -512,7 +505,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_sampler_state(pipe, state);; + pipe->delete_sampler_state(pipe, state); trace_dump_call_end(); } @@ -531,7 +524,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(rasterizer_state, state); - result = pipe->create_rasterizer_state(pipe, state);; + result = pipe->create_rasterizer_state(pipe, state); trace_dump_ret(ptr, result); @@ -553,7 +546,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_rasterizer_state(pipe, state);; + pipe->bind_rasterizer_state(pipe, state); trace_dump_call_end(); } @@ -571,7 +564,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_rasterizer_state(pipe, state);; + pipe->delete_rasterizer_state(pipe, state); trace_dump_call_end(); } @@ -587,7 +580,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state"); - result = pipe->create_depth_stencil_alpha_state(pipe, state);; + result = pipe->create_depth_stencil_alpha_state(pipe, state); trace_dump_arg(ptr, pipe); trace_dump_arg(depth_stencil_alpha_state, state); @@ -612,7 +605,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_depth_stencil_alpha_state(pipe, state);; + pipe->bind_depth_stencil_alpha_state(pipe, state); trace_dump_call_end(); } @@ -630,7 +623,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_depth_stencil_alpha_state(pipe, state);; + pipe->delete_depth_stencil_alpha_state(pipe, state); trace_dump_call_end(); } @@ -649,7 +642,7 @@ trace_context_create_fs_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(shader_state, state); - result = pipe->create_fs_state(pipe, state);; + result = pipe->create_fs_state(pipe, state); trace_dump_ret(ptr, result); @@ -752,7 +745,7 @@ trace_context_bind_vs_state(struct pipe_context *_pipe, if (tr_shdr && tr_shdr->replaced) state = tr_shdr->replaced; - pipe->bind_vs_state(pipe, state);; + pipe->bind_vs_state(pipe, state); trace_dump_call_end(); } @@ -772,7 +765,7 @@ trace_context_delete_vs_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_vs_state(pipe, state);; + pipe->delete_vs_state(pipe, state); trace_dump_call_end(); @@ -792,7 +785,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(blend_color, state); - pipe->set_blend_color(pipe, state);; + pipe->set_blend_color(pipe, state); trace_dump_call_end(); } @@ -810,7 +803,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(clip_state, state); - pipe->set_clip_state(pipe, state);; + pipe->set_clip_state(pipe, state); trace_dump_call_end(); } @@ -882,7 +875,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(framebuffer_state, state); - pipe->set_framebuffer_state(pipe, state);; + pipe->set_framebuffer_state(pipe, state); trace_dump_call_end(); } @@ -900,7 +893,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(poly_stipple, state); - pipe->set_polygon_stipple(pipe, state);; + pipe->set_polygon_stipple(pipe, state); trace_dump_call_end(); } @@ -918,7 +911,7 @@ trace_context_set_scissor_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(scissor_state, state); - pipe->set_scissor_state(pipe, state);; + pipe->set_scissor_state(pipe, state); trace_dump_call_end(); } @@ -936,16 +929,16 @@ trace_context_set_viewport_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(viewport_state, state); - pipe->set_viewport_state(pipe, state);; + pipe->set_viewport_state(pipe, state); trace_dump_call_end(); } static INLINE void -trace_context_set_sampler_textures(struct pipe_context *_pipe, - unsigned num_textures, - struct pipe_texture **textures) +trace_context_set_fragment_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **textures) { struct trace_context *tr_ctx = trace_context(_pipe); struct trace_texture *tr_tex; @@ -961,13 +954,44 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe, } textures = unwrapped_textures; - trace_dump_call_begin("pipe_context", "set_sampler_textures"); + trace_dump_call_begin("pipe_context", "set_fragment_sampler_textures"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_textures); + trace_dump_arg_array(ptr, textures, num_textures); + + pipe->set_fragment_sampler_textures(pipe, num_textures, textures); + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_vertex_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_texture *tr_tex; + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned i; + + tr_ctx->curr.num_vert_texs = num_textures; + for(i = 0; i < num_textures; ++i) { + tr_tex = trace_texture(textures[i]); + tr_ctx->curr.vert_tex[i] = tr_tex; + unwrapped_textures[i] = tr_tex ? tr_tex->texture : NULL; + } + textures = unwrapped_textures; + + trace_dump_call_begin("pipe_context", "set_vertex_sampler_textures"); trace_dump_arg(ptr, pipe); trace_dump_arg(uint, num_textures); trace_dump_arg_array(ptr, textures, num_textures); - pipe->set_sampler_textures(pipe, num_textures, textures);; + pipe->set_vertex_sampler_textures(pipe, num_textures, textures); trace_dump_call_end(); } @@ -1026,7 +1050,7 @@ trace_context_set_vertex_elements(struct pipe_context *_pipe, trace_dump_struct_array(vertex_element, elements, num_elements); trace_dump_arg_end(); - pipe->set_vertex_elements(pipe, num_elements, elements);; + pipe->set_vertex_elements(pipe, num_elements, elements); trace_dump_call_end(); } @@ -1087,7 +1111,7 @@ trace_context_surface_fill(struct pipe_context *_pipe, trace_dump_arg(uint, width); trace_dump_arg(uint, height); - pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);; + pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value); trace_dump_call_end(); } @@ -1130,7 +1154,7 @@ trace_context_flush(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(uint, flags); - pipe->flush(pipe, flags, fence);; + pipe->flush(pipe, flags, fence); if(fence) trace_dump_ret(ptr, *fence); @@ -1242,7 +1266,6 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.winsys = _screen->winsys; tr_ctx->base.screen = _screen; tr_ctx->base.destroy = trace_context_destroy; - tr_ctx->base.set_edgeflags = trace_context_set_edgeflags; tr_ctx->base.draw_arrays = trace_context_draw_arrays; tr_ctx->base.draw_elements = trace_context_draw_elements; tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; @@ -1255,7 +1278,8 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.bind_blend_state = trace_context_bind_blend_state; tr_ctx->base.delete_blend_state = trace_context_delete_blend_state; tr_ctx->base.create_sampler_state = trace_context_create_sampler_state; - tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states; + tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_fragment_sampler_states; + tr_ctx->base.bind_vertex_sampler_states = trace_context_bind_vertex_sampler_states; tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state; tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state; tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state; @@ -1276,7 +1300,8 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple; tr_ctx->base.set_scissor_state = trace_context_set_scissor_state; tr_ctx->base.set_viewport_state = trace_context_set_viewport_state; - tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures; + tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures; + tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; if (pipe->surface_copy) diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 6febe4b4114..852b480765a 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -54,6 +54,9 @@ struct trace_context struct trace_texture *tex[PIPE_MAX_SAMPLERS]; unsigned num_texs; + struct trace_texture *vert_tex[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned num_vert_texs; + unsigned nr_cbufs; struct trace_texture *cbufs[PIPE_MAX_COLOR_BUFS]; struct trace_texture *zsbuf; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index bcf6751af4f..0102cc18763 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -43,19 +43,6 @@ void trace_dump_format(enum pipe_format format) } -void trace_dump_block(const struct pipe_format_block *block) -{ - if (!trace_dumping_enabled_locked()) - return; - - trace_dump_struct_begin("pipe_format_block"); - trace_dump_member(uint, block, size); - trace_dump_member(uint, block, width); - trace_dump_member(uint, block, height); - trace_dump_struct_end(); -} - - static void trace_dump_reference(const struct pipe_reference *reference) { if (!trace_dumping_enabled_locked()) @@ -83,19 +70,15 @@ void trace_dump_template(const struct pipe_texture *templat) trace_dump_member(format, templat, format); trace_dump_member_begin("width"); - trace_dump_array(uint, templat->width, 1); + trace_dump_uint(templat->width0); trace_dump_member_end(); trace_dump_member_begin("height"); - trace_dump_array(uint, templat->height, 1); + trace_dump_uint(templat->height0); trace_dump_member_end(); trace_dump_member_begin("depth"); - trace_dump_array(uint, templat->depth, 1); - trace_dump_member_end(); - - trace_dump_member_begin("block"); - trace_dump_block(&templat->block); + trace_dump_uint(templat->depth0); trace_dump_member_end(); trace_dump_member(uint, templat, last_level); @@ -483,16 +466,9 @@ void trace_dump_transfer(const struct pipe_transfer *state) trace_dump_struct_begin("pipe_transfer"); - trace_dump_member(format, state, format); trace_dump_member(uint, state, width); trace_dump_member(uint, state, height); - trace_dump_member_begin("block"); - trace_dump_block(&state->block); - trace_dump_member_end(); - - trace_dump_member(uint, state, nblocksx); - trace_dump_member(uint, state, nblocksy); trace_dump_member(uint, state, stride); trace_dump_member(uint, state, usage); diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 05b821adb64..07ad6fbb205 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -35,11 +35,8 @@ void trace_dump_format(enum pipe_format format); -void trace_dump_block(const struct pipe_format_block *block); - void trace_dump_template(const struct pipe_texture *templat); - void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state); void trace_dump_poly_stipple(const struct pipe_poly_stipple *state); diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index 0372d92782b..0546aad9b50 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -26,6 +26,7 @@ **************************************************************************/ +#include "util/u_format.h" #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_simple_list.h" @@ -200,10 +201,12 @@ trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, t = tr_tex->texture; rbug_send_texture_info_reply(tr_rbug->con, serial, t->target, t->format, - t->width, t->last_level + 1, - t->height, t->last_level + 1, - t->depth, t->last_level + 1, - t->block.width, t->block.height, t->block.size, + &t->width0, 1, + &t->height0, 1, + &t->depth0, 1, + util_format_get_blockwidth(t->format), + util_format_get_blockheight(t->format), + util_format_get_blocksize(t->format), t->last_level, t->nr_samples, t->tex_usage, @@ -251,9 +254,12 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, map = screen->transfer_map(screen, t); rbug_send_texture_read_reply(tr_rbug->con, serial, - t->format, - t->block.width, t->block.height, t->block.size, - (uint8_t*)map, t->stride * t->nblocksy, + t->texture->format, + util_format_get_blockwidth(t->texture->format), + util_format_get_blockheight(t->texture->format), + util_format_get_blocksize(t->texture->format), + (uint8_t*)map, + t->stride * util_format_get_nblocksy(t->texture->format, t->height), t->stride, NULL); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 7da9bd3866b..117503aaff6 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_simple_list.h" @@ -35,6 +36,7 @@ #include "tr_screen.h" #include "pipe/p_inlines.h" +#include "pipe/p_format.h" static boolean trace = FALSE; @@ -424,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen, struct pipe_transfer *transfer = tr_trans->transfer; if(tr_trans->map) { - size_t size = transfer->nblocksy * transfer->stride; + size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride; trace_dump_call_begin("pipe_screen", "transfer_write"); diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index c36286f9bee..f7368bb95b3 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -167,7 +167,7 @@ typedef unsigned char boolean; #define ALIGN16_ASSIGN(NAME) NAME##___aligned #define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) #define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) -#if __GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1) +#if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) #define ALIGN_STACK __attribute__((force_align_arg_pointer)) #else #define ALIGN_STACK diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index f6feea5f74d..064605a4a05 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -53,6 +53,7 @@ #if defined(__GNUC__) #define PIPE_CC_GCC +#define PIPE_CC_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) #endif /* diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 5569001e601..d2f8085b421 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -57,41 +57,41 @@ struct pipe_context { void (*destroy)( struct pipe_context * ); - - /* Possible interface for setting edgeflags. These aren't really - * vertex elements, so don't fit there. - */ - void (*set_edgeflags)( struct pipe_context *, - const unsigned *bitfield ); - - /** * VBO drawing (return false on fallbacks (temporary??)) */ /*@{*/ - boolean (*draw_arrays)( struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); + void (*draw_arrays)( struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count); - boolean (*draw_elements)( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); + void (*draw_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); /* XXX: this is (probably) a temporary entrypoint, as the range * information should be available from the vertex_buffer state. * Using this to quickly evaluate a specialized path in the draw * module. */ - boolean (*draw_range_elements)( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); + void (*draw_range_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); /*@}*/ + /** + * Predicate subsequent rendering on occlusion query result + * \param query the query predicate, or NULL if no predicate + * \param mode one of PIPE_COND_RENDER_x + */ + void (*render_condition)( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ); /** * Query objects @@ -123,7 +123,12 @@ struct pipe_context { void * (*create_sampler_state)(struct pipe_context *, const struct pipe_sampler_state *); - void (*bind_sampler_states)(struct pipe_context *, unsigned num, void **); + void (*bind_fragment_sampler_states)(struct pipe_context *, + unsigned num_samplers, + void **samplers); + void (*bind_vertex_sampler_states)(struct pipe_context *, + unsigned num_samplers, + void **samplers); void (*delete_sampler_state)(struct pipe_context *, void *); void * (*create_rasterizer_state)(struct pipe_context *, @@ -145,6 +150,12 @@ struct pipe_context { const struct pipe_shader_state *); void (*bind_vs_state)(struct pipe_context *, void *); void (*delete_vs_state)(struct pipe_context *, void *); + + void * (*create_gs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*bind_gs_state)(struct pipe_context *, void *); + void (*delete_gs_state)(struct pipe_context *, void *); + /*@}*/ /** @@ -173,9 +184,13 @@ struct pipe_context { void (*set_viewport_state)( struct pipe_context *, const struct pipe_viewport_state * ); - void (*set_sampler_textures)( struct pipe_context *, - unsigned num_textures, - struct pipe_texture ** ); + void (*set_fragment_sampler_textures)(struct pipe_context *, + unsigned num_textures, + struct pipe_texture **); + + void (*set_vertex_sampler_textures)(struct pipe_context *, + unsigned num_textures, + struct pipe_texture **); void (*set_vertex_buffers)( struct pipe_context *, unsigned num_buffers, diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index fd14dc8e92d..35f3830ebcf 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -140,7 +140,8 @@ enum pipe_texture_target { PIPE_TEXTURE_1D = 0, PIPE_TEXTURE_2D = 1, PIPE_TEXTURE_3D = 2, - PIPE_TEXTURE_CUBE = 3 + PIPE_TEXTURE_CUBE = 3, + PIPE_MAX_TEXTURE_TYPES }; #define PIPE_TEX_FACE_POS_X 0 @@ -170,8 +171,6 @@ enum pipe_texture_target { */ #define PIPE_TEX_FILTER_NEAREST 0 #define PIPE_TEX_FILTER_LINEAR 1 -#define PIPE_TEX_FILTER_ANISO 2 - #define PIPE_TEX_COMPARE_NONE 0 #define PIPE_TEX_COMPARE_R_TO_TEXTURE 1 @@ -320,23 +319,28 @@ enum pipe_transfer_usage { */ #define PIPE_SHADER_VERTEX 0 #define PIPE_SHADER_FRAGMENT 1 -#define PIPE_SHADER_TYPES 2 +#define PIPE_SHADER_GEOMETRY 2 +#define PIPE_SHADER_TYPES 3 /** * Primitive types: */ -#define PIPE_PRIM_POINTS 0 -#define PIPE_PRIM_LINES 1 -#define PIPE_PRIM_LINE_LOOP 2 -#define PIPE_PRIM_LINE_STRIP 3 -#define PIPE_PRIM_TRIANGLES 4 -#define PIPE_PRIM_TRIANGLE_STRIP 5 -#define PIPE_PRIM_TRIANGLE_FAN 6 -#define PIPE_PRIM_QUADS 7 -#define PIPE_PRIM_QUAD_STRIP 8 -#define PIPE_PRIM_POLYGON 9 -#define PIPE_PRIM_MAX 10 +#define PIPE_PRIM_POINTS 0 +#define PIPE_PRIM_LINES 1 +#define PIPE_PRIM_LINE_LOOP 2 +#define PIPE_PRIM_LINE_STRIP 3 +#define PIPE_PRIM_TRIANGLES 4 +#define PIPE_PRIM_TRIANGLE_STRIP 5 +#define PIPE_PRIM_TRIANGLE_FAN 6 +#define PIPE_PRIM_QUADS 7 +#define PIPE_PRIM_QUAD_STRIP 8 +#define PIPE_PRIM_POLYGON 9 +#define PIPE_PRIM_LINES_ADJACENCY 10 +#define PIPE_PRIM_LINE_STRIP_ADJACENCY 11 +#define PIPE_PRIM_TRIANGLES_ADJACENCY 12 +#define PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY 13 +#define PIPE_PRIM_MAX 14 /** @@ -349,6 +353,15 @@ enum pipe_transfer_usage { /** + * Conditional rendering modes + */ +#define PIPE_RENDER_COND_WAIT 0 +#define PIPE_RENDER_COND_NO_WAIT 1 +#define PIPE_RENDER_COND_BY_REGION_WAIT 2 +#define PIPE_RENDER_COND_BY_REGION_NO_WAIT 3 + + +/** * Point sprite coord modes */ #define PIPE_SPRITE_COORD_NONE 0 @@ -390,6 +403,8 @@ enum pipe_transfer_usage { #define PIPE_CAP_BLEND_EQUATION_SEPARATE 28 #define PIPE_CAP_SM3 29 /*< Shader Model 3 supported */ #define PIPE_CAP_MAX_PREDICATE_REGISTERS 30 +#define PIPE_CAP_MAX_COMBINED_SAMPLERS 31 /*< Maximum texture image units accessible from vertex + and fragment shaders combined */ /** diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index af230809201..6bfff1cc59c 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -40,353 +40,133 @@ extern "C" { #endif /** - * The pipe_format enum is a 32-bit wide bitfield that encodes all the - * information needed to uniquely describe a pixel format. - */ - -/** - * Possible format layouts are encoded in the first 2 bits. - * The interpretation of the remaining 30 bits depends on a particular - * format layout. - */ -#define PIPE_FORMAT_LAYOUT_RGBAZS 0 -#define PIPE_FORMAT_LAYOUT_YCBCR 1 -#define PIPE_FORMAT_LAYOUT_DXT 2 /**< XXX temporary? */ -#define PIPE_FORMAT_LAYOUT_MIXED 3 - -static INLINE uint pf_layout(uint f) /**< PIPE_FORMAT_LAYOUT_ */ -{ - return f & 0x3; -} - -/** - * RGBAZS Format Layout. - */ - -/** - * Format component selectors for RGBAZS & MIXED layout. - */ -#define PIPE_FORMAT_COMP_R 0 -#define PIPE_FORMAT_COMP_G 1 -#define PIPE_FORMAT_COMP_B 2 -#define PIPE_FORMAT_COMP_A 3 -#define PIPE_FORMAT_COMP_0 4 -#define PIPE_FORMAT_COMP_1 5 -#define PIPE_FORMAT_COMP_Z 6 -#define PIPE_FORMAT_COMP_S 7 - -/** - * Format types for RGBAZS layout. - */ -#define PIPE_FORMAT_TYPE_UNKNOWN 0 -#define PIPE_FORMAT_TYPE_FLOAT 1 /**< 16/32/64-bit/channel formats */ -#define PIPE_FORMAT_TYPE_UNORM 2 /**< uints, normalized to [0,1] */ -#define PIPE_FORMAT_TYPE_SNORM 3 /**< ints, normalized to [-1,1] */ -#define PIPE_FORMAT_TYPE_USCALED 4 /**< uints, not normalized */ -#define PIPE_FORMAT_TYPE_SSCALED 5 /**< ints, not normalized */ -#define PIPE_FORMAT_TYPE_SRGB 6 /**< sRGB colorspace */ -#define PIPE_FORMAT_TYPE_FIXED 7 /**< 16.16 fixed point */ - - -/** - * Because the destination vector is assumed to be RGBA FLOAT, we - * need to know how to swizzle and expand components from the source - * vector. - * Let's take U_A1_R5_G5_B5 as an example. X swizzle is A, X size - * is 1 bit and type is UNORM. So we take the most significant bit - * from source vector, convert 0 to 0.0 and 1 to 1.0 and save it - * in the last component of the destination RGBA component. - * Next, Y swizzle is R, Y size is 5 and type is UNORM. We normalize - * those 5 bits into [0.0; 1.0] range and put it into second - * component of the destination vector. Rinse and repeat for - * components Z and W. - * If any of size fields is zero, it means the source format contains - * less than four components. - * If any swizzle is 0 or 1, the corresponding destination component - * should be filled with 0.0 and 1.0, respectively. - */ -typedef uint pipe_format_rgbazs_t; - -static INLINE uint pf_get(pipe_format_rgbazs_t f, uint shift, uint mask) -{ - return (f >> shift) & mask; -} - -#define pf_swizzle_x(f) pf_get(f, 2, 0x7) /**< PIPE_FORMAT_COMP_ */ -#define pf_swizzle_y(f) pf_get(f, 5, 0x7) /**< PIPE_FORMAT_COMP_ */ -#define pf_swizzle_z(f) pf_get(f, 8, 0x7) /**< PIPE_FORMAT_COMP_ */ -#define pf_swizzle_w(f) pf_get(f, 11, 0x7) /**< PIPE_FORMAT_COMP_ */ -#define pf_swizzle_xyzw(f,i) pf_get(f, 2+((i)*3), 0x7) -#define pf_size_x(f) pf_get(f, 14, 0x7) /**< Size of X */ -#define pf_size_y(f) pf_get(f, 17, 0x7) /**< Size of Y */ -#define pf_size_z(f) pf_get(f, 20, 0x7) /**< Size of Z */ -#define pf_size_w(f) pf_get(f, 23, 0x7) /**< Size of W */ -#define pf_size_xyzw(f,i) pf_get(f, 14+((i)*3), 0x7) -#define pf_exp2(f) pf_get(f, 26, 0x7) /**< Scale size by 2 ^ exp2 */ -#define pf_type(f) pf_get(f, 29, 0x7) /**< PIPE_FORMAT_TYPE_ */ - -/** - * Helper macro to encode the above structure into a 32-bit value. - */ -#define _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, EXP2, TYPE ) (\ - (PIPE_FORMAT_LAYOUT_RGBAZS << 0) |\ - ((SWZ) << 2) |\ - ((SIZEX) << 14) |\ - ((SIZEY) << 17) |\ - ((SIZEZ) << 20) |\ - ((SIZEW) << 23) |\ - ((EXP2) << 26) |\ - ((TYPE) << 29) ) - -/** - * Helper macro to encode the swizzle part of the structure above. - */ -#define _PIPE_FORMAT_SWZ( SWZX, SWZY, SWZZ, SWZW ) (((SWZX) << 0) | ((SWZY) << 3) | ((SWZZ) << 6) | ((SWZW) << 9)) - -/** - * Shorthand macro for RGBAZS layout with component sizes in 1-bit units. - */ -#define _PIPE_FORMAT_RGBAZS_1( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE )\ - _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 0, TYPE ) - -/** - * Shorthand macro for RGBAZS layout with component sizes in 2-bit units. - */ -#define _PIPE_FORMAT_RGBAZS_2( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE )\ - _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 1, TYPE ) - -/** - * Shorthand macro for RGBAZS layout with component sizes in 8-bit units. - */ -#define _PIPE_FORMAT_RGBAZS_8( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE )\ - _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 3, TYPE ) - -/** - * Shorthand macro for RGBAZS layout with component sizes in 64-bit units. - */ -#define _PIPE_FORMAT_RGBAZS_64( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE )\ - _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 6, TYPE ) - -typedef uint pipe_format_mixed_t; - -/* NOTE: Use pf_swizzle_* and pf_size_* macros for swizzles and sizes. - */ - -#define pf_mixed_sign_x(f) pf_get( f, 26, 0x1 ) /*< Sign of X */ -#define pf_mixed_sign_y(f) pf_get( f, 27, 0x1 ) /*< Sign of Y */ -#define pf_mixed_sign_z(f) pf_get( f, 28, 0x1 ) /*< Sign of Z */ -#define pf_mixed_sign_w(f) pf_get( f, 29, 0x1 ) /*< Sign of W */ -#define pf_mixed_sign_xyzw(f, i) pf_get( f, 26 + (i), 0x1 ) -#define pf_mixed_normalized(f) pf_get( f, 30, 0x1 ) /*< Type is NORM (1) or SCALED (0) */ -#define pf_mixed_scale8(f) pf_get( f, 31, 0x1 ) /*< Scale size by either one (0) or eight (1) */ - -/** - * Helper macro to encode the above structure into a 32-bit value. - */ -#define _PIPE_FORMAT_MIXED( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, SIGNX, SIGNY, SIGNZ, SIGNW, NORMALIZED, SCALE8 ) (\ - (PIPE_FORMAT_LAYOUT_MIXED << 0) |\ - ((SWZ) << 2) |\ - ((SIZEX) << 14) |\ - ((SIZEY) << 17) |\ - ((SIZEZ) << 20) |\ - ((SIZEW) << 23) |\ - ((SIGNX) << 26) |\ - ((SIGNY) << 27) |\ - ((SIGNZ) << 28) |\ - ((SIGNW) << 29) |\ - ((NORMALIZED) << 30) |\ - ((SCALE8) << 31) ) - -/** - * Shorthand macro for common format swizzles. - */ -#define _PIPE_FORMAT_R001 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_1 ) -#define _PIPE_FORMAT_RG01 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_1 ) -#define _PIPE_FORMAT_RGB1 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_1 ) -#define _PIPE_FORMAT_RGBA _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_A ) -#define _PIPE_FORMAT_ARGB _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_A, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B ) -#define _PIPE_FORMAT_ABGR _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_A, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R ) -#define _PIPE_FORMAT_BGRA _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_A ) -#define _PIPE_FORMAT_1RGB _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_1, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B ) -#define _PIPE_FORMAT_1BGR _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_1, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R ) -#define _PIPE_FORMAT_BGR1 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_1 ) -#define _PIPE_FORMAT_0000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) -#define _PIPE_FORMAT_000R _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_R ) -#define _PIPE_FORMAT_RRR1 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_1 ) -#define _PIPE_FORMAT_RRRR _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R ) -#define _PIPE_FORMAT_RRRG _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G ) -#define _PIPE_FORMAT_Z000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) -#define _PIPE_FORMAT_0Z00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) -#define _PIPE_FORMAT_SZ00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) -#define _PIPE_FORMAT_ZS00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) -#define _PIPE_FORMAT_S000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) - -/** - * YCBCR Format Layout. - */ - -/** - * This only contains a flag that indicates whether the format is reversed or - * not. - */ -typedef uint pipe_format_ycbcr_t; - -/** - * Helper macro to encode the above structure into a 32-bit value. - */ -#define _PIPE_FORMAT_YCBCR( REV ) (\ - (PIPE_FORMAT_LAYOUT_YCBCR << 0) |\ - ((REV) << 2) ) - -static INLINE uint pf_rev(pipe_format_ycbcr_t f) -{ - return (f >> 2) & 0x1; -} - - -/** - * Compresssed format layouts (this will probably change) - */ -#define _PIPE_FORMAT_DXT( LEVEL, RSIZE, GSIZE, BSIZE, ASIZE, TYPE ) \ - ((PIPE_FORMAT_LAYOUT_DXT << 0) | \ - ((LEVEL) << 2) | \ - ((RSIZE) << 5) | \ - ((GSIZE) << 8) | \ - ((BSIZE) << 11) | \ - ((ASIZE) << 14) | \ - ((TYPE) << 29)) - - - -/** * Texture/surface image formats (preliminary) */ /* KW: Added lots of surface formats to support vertex element layout - * definitions, and eventually render-to-vertex-buffer. Could - * consider making float/int/uint/scaled/normalized a separate - * parameter, but on the other hand there are special cases like - * z24s8, compressed textures, ycbcr, etc that won't fit that model. + * definitions, and eventually render-to-vertex-buffer. */ enum pipe_format { - PIPE_FORMAT_NONE = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_0000, 0, 0, 0, 0, PIPE_FORMAT_TYPE_UNKNOWN ), - PIPE_FORMAT_A8R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ARGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_X8R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_1RGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_B8G8R8A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_B8G8R8X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_A1R5G5B5_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_ARGB, 1, 5, 5, 5, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_A4R4G4B4_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_ARGB, 4, 4, 4, 4, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R5G6B5_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_RGB1, 5, 6, 5, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_A2B10G10R10_UNORM = _PIPE_FORMAT_RGBAZS_2 ( _PIPE_FORMAT_ABGR, 1, 5, 5, 5, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_L8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte luminance */ - PIPE_FORMAT_A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_000R, 0, 0, 0, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte alpha */ - PIPE_FORMAT_I8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRR, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte intensity */ - PIPE_FORMAT_A8L8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte alpha, luminance */ - PIPE_FORMAT_L16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_UNORM ), /**< ushort luminance */ - PIPE_FORMAT_YCBCR = _PIPE_FORMAT_YCBCR( 0 ), - PIPE_FORMAT_YCBCR_REV = _PIPE_FORMAT_YCBCR( 1 ), - PIPE_FORMAT_Z16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 2, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_Z32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_Z32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_S8Z24_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_SZ00, 1, 3, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_Z24S8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ZS00, 3, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_X8Z24_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_0Z00, 1, 3, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_Z24X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 3, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_S8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_S000, 1, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte stencil */ - PIPE_FORMAT_R64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R64G64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R64G64B64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R64G64B64A64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R32G32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R32G32B32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R32G32B32A32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R32G32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R32G32B32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R32G32B32A32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R32G32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R32G32B32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R32G32B32A32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R32G32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R32G32B32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R32G32B32A32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R32G32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R32G32B32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R32G32B32A32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R16G16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R16G16B16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R16G16B16A16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R16G16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R16G16B16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R16G16B16A16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R16G16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R16G16B16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R16G16B16A16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R16G16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R16G16B16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R16G16B16A16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R8G8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R8G8B8A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R8G8B8X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R8G8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R8G8B8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R8G8B8A8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R8G8B8X8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R8G8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R8G8B8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R8G8B8A8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R8G8B8X8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_B6G5R5_SNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_BGR1, 6, 5, 5, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_A8B8G8R8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_X8B8G8R8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R8G8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R8G8B8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R8G8B8A8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R8G8B8X8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FIXED ), - PIPE_FORMAT_R32G32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_FIXED ), - PIPE_FORMAT_R32G32B32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_FIXED ), - PIPE_FORMAT_R32G32B32A32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FIXED ), + PIPE_FORMAT_NONE = 0, + PIPE_FORMAT_A8R8G8B8_UNORM = 1, + PIPE_FORMAT_X8R8G8B8_UNORM = 2, + PIPE_FORMAT_B8G8R8A8_UNORM = 3, + PIPE_FORMAT_B8G8R8X8_UNORM = 4, + PIPE_FORMAT_A1R5G5B5_UNORM = 5, + PIPE_FORMAT_A4R4G4B4_UNORM = 6, + PIPE_FORMAT_R5G6B5_UNORM = 7, + PIPE_FORMAT_A2B10G10R10_UNORM = 8, + PIPE_FORMAT_L8_UNORM = 9, /**< ubyte luminance */ + PIPE_FORMAT_A8_UNORM = 10, /**< ubyte alpha */ + PIPE_FORMAT_I8_UNORM = 11, /**< ubyte intensity */ + PIPE_FORMAT_A8L8_UNORM = 12, /**< ubyte alpha, luminance */ + PIPE_FORMAT_L16_UNORM = 13, /**< ushort luminance */ + PIPE_FORMAT_YCBCR = 14, + PIPE_FORMAT_YCBCR_REV = 15, + PIPE_FORMAT_Z16_UNORM = 16, + PIPE_FORMAT_Z32_UNORM = 17, + PIPE_FORMAT_Z32_FLOAT = 18, + PIPE_FORMAT_S8Z24_UNORM = 19, + PIPE_FORMAT_Z24S8_UNORM = 20, + PIPE_FORMAT_X8Z24_UNORM = 21, + PIPE_FORMAT_Z24X8_UNORM = 22, + PIPE_FORMAT_S8_UNORM = 23, /**< ubyte stencil */ + PIPE_FORMAT_R64_FLOAT = 24, + PIPE_FORMAT_R64G64_FLOAT = 25, + PIPE_FORMAT_R64G64B64_FLOAT = 26, + PIPE_FORMAT_R64G64B64A64_FLOAT = 27, + PIPE_FORMAT_R32_FLOAT = 28, + PIPE_FORMAT_R32G32_FLOAT = 29, + PIPE_FORMAT_R32G32B32_FLOAT = 30, + PIPE_FORMAT_R32G32B32A32_FLOAT = 31, + PIPE_FORMAT_R32_UNORM = 32, + PIPE_FORMAT_R32G32_UNORM = 33, + PIPE_FORMAT_R32G32B32_UNORM = 34, + PIPE_FORMAT_R32G32B32A32_UNORM = 35, + PIPE_FORMAT_R32_USCALED = 36, + PIPE_FORMAT_R32G32_USCALED = 37, + PIPE_FORMAT_R32G32B32_USCALED = 38, + PIPE_FORMAT_R32G32B32A32_USCALED = 39, + PIPE_FORMAT_R32_SNORM = 40, + PIPE_FORMAT_R32G32_SNORM = 41, + PIPE_FORMAT_R32G32B32_SNORM = 42, + PIPE_FORMAT_R32G32B32A32_SNORM = 43, + PIPE_FORMAT_R32_SSCALED = 44, + PIPE_FORMAT_R32G32_SSCALED = 45, + PIPE_FORMAT_R32G32B32_SSCALED = 46, + PIPE_FORMAT_R32G32B32A32_SSCALED = 47, + PIPE_FORMAT_R16_UNORM = 48, + PIPE_FORMAT_R16G16_UNORM = 49, + PIPE_FORMAT_R16G16B16_UNORM = 50, + PIPE_FORMAT_R16G16B16A16_UNORM = 51, + PIPE_FORMAT_R16_USCALED = 52, + PIPE_FORMAT_R16G16_USCALED = 53, + PIPE_FORMAT_R16G16B16_USCALED = 54, + PIPE_FORMAT_R16G16B16A16_USCALED = 55, + PIPE_FORMAT_R16_SNORM = 56, + PIPE_FORMAT_R16G16_SNORM = 57, + PIPE_FORMAT_R16G16B16_SNORM = 58, + PIPE_FORMAT_R16G16B16A16_SNORM = 59, + PIPE_FORMAT_R16_SSCALED = 60, + PIPE_FORMAT_R16G16_SSCALED = 61, + PIPE_FORMAT_R16G16B16_SSCALED = 62, + PIPE_FORMAT_R16G16B16A16_SSCALED = 63, + PIPE_FORMAT_R8_UNORM = 64, + PIPE_FORMAT_R8G8_UNORM = 65, + PIPE_FORMAT_R8G8B8_UNORM = 66, + PIPE_FORMAT_R8G8B8A8_UNORM = 67, + PIPE_FORMAT_R8G8B8X8_UNORM = 68, + PIPE_FORMAT_R8_USCALED = 69, + PIPE_FORMAT_R8G8_USCALED = 70, + PIPE_FORMAT_R8G8B8_USCALED = 71, + PIPE_FORMAT_R8G8B8A8_USCALED = 72, + PIPE_FORMAT_R8G8B8X8_USCALED = 73, + PIPE_FORMAT_R8_SNORM = 74, + PIPE_FORMAT_R8G8_SNORM = 75, + PIPE_FORMAT_R8G8B8_SNORM = 76, + PIPE_FORMAT_R8G8B8A8_SNORM = 77, + PIPE_FORMAT_R8G8B8X8_SNORM = 78, + PIPE_FORMAT_B6G5R5_SNORM = 79, + PIPE_FORMAT_A8B8G8R8_SNORM = 80, + PIPE_FORMAT_X8B8G8R8_SNORM = 81, + PIPE_FORMAT_R8_SSCALED = 82, + PIPE_FORMAT_R8G8_SSCALED = 83, + PIPE_FORMAT_R8G8B8_SSCALED = 84, + PIPE_FORMAT_R8G8B8A8_SSCALED = 85, + PIPE_FORMAT_R8G8B8X8_SSCALED = 86, + PIPE_FORMAT_R32_FIXED = 87, + PIPE_FORMAT_R32G32_FIXED = 88, + PIPE_FORMAT_R32G32B32_FIXED = 89, + PIPE_FORMAT_R32G32B32A32_FIXED = 90, /* sRGB formats */ - PIPE_FORMAT_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_A8L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_R8G8B8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_R8G8B8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_A8R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ARGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_X8R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_1RGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_B8G8R8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_B8G8R8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_L8_SRGB = 91, + PIPE_FORMAT_A8L8_SRGB = 92, + PIPE_FORMAT_R8G8B8_SRGB = 93, + PIPE_FORMAT_R8G8B8A8_SRGB = 94, + PIPE_FORMAT_R8G8B8X8_SRGB = 95, + PIPE_FORMAT_A8R8G8B8_SRGB = 96, + PIPE_FORMAT_X8R8G8B8_SRGB = 97, + PIPE_FORMAT_B8G8R8A8_SRGB = 98, + PIPE_FORMAT_B8G8R8X8_SRGB = 99, /* mixed formats */ - PIPE_FORMAT_X8UB8UG8SR8S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_1BGR, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 ), - PIPE_FORMAT_B6UG5SR5S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_BGR1, 6, 5, 5, 0, 0, 1, 1, 0, 1, 0 ), + PIPE_FORMAT_X8UB8UG8SR8S_NORM = 100, + PIPE_FORMAT_B6UG5SR5S_NORM = 101, /* compressed formats */ - PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_DXT1_RGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_DXT3_RGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_DXT5_RGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_DXT1_RGB = 102, + PIPE_FORMAT_DXT1_RGBA = 103, + PIPE_FORMAT_DXT3_RGBA = 104, + PIPE_FORMAT_DXT5_RGBA = 105, /* sRGB, compressed */ - PIPE_FORMAT_DXT1_SRGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_DXT1_SRGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_DXT3_SRGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_DXT5_SRGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ) + PIPE_FORMAT_DXT1_SRGB = 106, + PIPE_FORMAT_DXT1_SRGBA = 107, + PIPE_FORMAT_DXT3_SRGBA = 108, + PIPE_FORMAT_DXT5_SRGBA = 109, + + PIPE_FORMAT_COUNT }; /** @@ -394,224 +174,6 @@ enum pipe_format { */ extern const char *pf_name( enum pipe_format format ); -/** - * Return bits for a particular component. - * \param comp component index, starting at 0 - */ -static INLINE uint pf_get_component_bits( enum pipe_format format, uint comp ) -{ - uint size; - - if (pf_swizzle_x(format) == comp) { - size = pf_size_x(format); - } - else if (pf_swizzle_y(format) == comp) { - size = pf_size_y(format); - } - else if (pf_swizzle_z(format) == comp) { - size = pf_size_z(format); - } - else if (pf_swizzle_w(format) == comp) { - size = pf_size_w(format); - } - else { - size = 0; - } - if (pf_layout( format ) == PIPE_FORMAT_LAYOUT_RGBAZS) - return size << pf_exp2( format ); - return size << (pf_mixed_scale8( format ) * 3); -} - -/** - * Return total bits needed for the pixel format. - */ -static INLINE uint pf_get_bits( enum pipe_format format ) -{ - switch (pf_layout(format)) { - case PIPE_FORMAT_LAYOUT_RGBAZS: - case PIPE_FORMAT_LAYOUT_MIXED: - return - pf_get_component_bits( format, PIPE_FORMAT_COMP_0 ) + - pf_get_component_bits( format, PIPE_FORMAT_COMP_1 ) + - pf_get_component_bits( format, PIPE_FORMAT_COMP_R ) + - pf_get_component_bits( format, PIPE_FORMAT_COMP_G ) + - pf_get_component_bits( format, PIPE_FORMAT_COMP_B ) + - pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) + - pf_get_component_bits( format, PIPE_FORMAT_COMP_Z ) + - pf_get_component_bits( format, PIPE_FORMAT_COMP_S ); - case PIPE_FORMAT_LAYOUT_YCBCR: - assert( format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV ); - /* return effective bits per pixel */ - return 16; - default: - assert( 0 ); - return 0; - } -} - -/** - * Return bytes per pixel for the given format. - */ -static INLINE uint pf_get_size( enum pipe_format format ) -{ - assert(pf_get_bits(format) % 8 == 0); - return pf_get_bits(format) / 8; -} - -/** - * Describe accurately the pixel format. - * - * The chars-per-pixel concept falls apart with compressed and yuv images, where - * more than one pixel are coded in a single data block. This structure - * describes that block. - * - * Simple pixel formats are effectively a 1x1xcpp block. - */ -struct pipe_format_block -{ - /** Block size in bytes */ - unsigned size; - - /** Block width in pixels */ - unsigned width; - - /** Block height in pixels */ - unsigned height; -}; - -/** - * Describe pixel format's block. - * - * @sa http://msdn2.microsoft.com/en-us/library/ms796147.aspx - */ -static INLINE void -pf_get_block(enum pipe_format format, struct pipe_format_block *block) -{ - switch(format) { - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_SRGBA: - case PIPE_FORMAT_DXT1_SRGB: - block->size = 8; - block->width = 4; - block->height = 4; - break; - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - case PIPE_FORMAT_DXT3_SRGBA: - case PIPE_FORMAT_DXT5_SRGBA: - block->size = 16; - block->width = 4; - block->height = 4; - break; - case PIPE_FORMAT_YCBCR: - case PIPE_FORMAT_YCBCR_REV: - block->size = 4; /* 2*cpp */ - block->width = 2; - block->height = 1; - break; - default: - block->size = pf_get_size(format); - block->width = 1; - block->height = 1; - break; - } -} - -static INLINE unsigned -pf_get_nblocksx(const struct pipe_format_block *block, unsigned x) -{ - return (x + block->width - 1)/block->width; -} - -static INLINE unsigned -pf_get_nblocksy(const struct pipe_format_block *block, unsigned y) -{ - return (y + block->height - 1)/block->height; -} - -static INLINE unsigned -pf_get_nblocks(const struct pipe_format_block *block, unsigned width, unsigned height) -{ - return pf_get_nblocksx(block, width)*pf_get_nblocksy(block, height); -} - -static INLINE size_t -pf_get_stride(const struct pipe_format_block *block, unsigned width) -{ - return pf_get_nblocksx(block, width)*block->size; -} - -static INLINE size_t -pf_get_2d_size(const struct pipe_format_block *block, size_t stride, unsigned height) -{ - return pf_get_nblocksy(block, height)*stride; -} - -static INLINE boolean -pf_is_depth_or_stencil( enum pipe_format format ) -{ - return (pf_get_component_bits( format, PIPE_FORMAT_COMP_Z ) + - pf_get_component_bits( format, PIPE_FORMAT_COMP_S )) != 0; -} - -static INLINE boolean -pf_is_depth_and_stencil( enum pipe_format format ) -{ - return (pf_get_component_bits( format, PIPE_FORMAT_COMP_Z ) != 0 && - pf_get_component_bits( format, PIPE_FORMAT_COMP_S ) != 0); -} - -/** DEPRECATED: For backwards compatibility */ -static INLINE boolean -pf_is_depth_stencil( enum pipe_format format ) -{ - return pf_is_depth_or_stencil( format ); -} - -static INLINE boolean -pf_is_compressed( enum pipe_format format ) -{ - return pf_layout(format) == PIPE_FORMAT_LAYOUT_DXT ? TRUE : FALSE; -} - -static INLINE boolean -pf_is_ycbcr( enum pipe_format format ) -{ - return pf_layout(format) == PIPE_FORMAT_LAYOUT_YCBCR ? TRUE : FALSE; -} - -static INLINE boolean -pf_has_alpha( enum pipe_format format ) -{ - switch (pf_layout(format)) { - case PIPE_FORMAT_LAYOUT_RGBAZS: - case PIPE_FORMAT_LAYOUT_MIXED: - /* FIXME: pf_get_component_bits( PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_COMP_A ) should not return 0 right? */ - if(format == PIPE_FORMAT_A8_UNORM || - format == PIPE_FORMAT_A8L8_UNORM || - format == PIPE_FORMAT_A8L8_SRGB) - return TRUE; - return pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) ? TRUE : FALSE; - case PIPE_FORMAT_LAYOUT_YCBCR: - return FALSE; - case PIPE_FORMAT_LAYOUT_DXT: - switch (format) { - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - case PIPE_FORMAT_DXT1_SRGBA: - case PIPE_FORMAT_DXT3_SRGBA: - case PIPE_FORMAT_DXT5_SRGBA: - return TRUE; - default: - return FALSE; - } - default: - assert( 0 ); - return FALSE; - } -} enum pipe_video_chroma_format { diff --git a/src/gallium/include/pipe/p_refcnt.h b/src/gallium/include/pipe/p_refcnt.h index 1f9088b3e9c..c1c7415e023 100644 --- a/src/gallium/include/pipe/p_refcnt.h +++ b/src/gallium/include/pipe/p_refcnt.h @@ -51,7 +51,7 @@ pipe_reference_init(struct pipe_reference *reference, unsigned count) } -static INLINE bool +static INLINE boolean pipe_is_referenced(struct pipe_reference *reference) { return p_atomic_read(&reference->count) != 0; @@ -59,30 +59,29 @@ pipe_is_referenced(struct pipe_reference *reference) /** - * Set 'ptr' to point to 'reference' and update reference counting. - * The old thing pointed to, if any, will be unreferenced first. - * 'reference' may be NULL. + * Update reference counting. + * The old thing pointed to, if any, will be unreferenced. + * Both 'ptr' and 'reference' may be NULL. + * \return TRUE if the object's refcount hits zero and should be destroyed. */ -static INLINE bool -pipe_reference(struct pipe_reference **ptr, struct pipe_reference *reference) +static INLINE boolean +pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference) { - bool destroy = FALSE; + boolean destroy = FALSE; - if(*ptr != reference) { + if(ptr != reference) { /* bump the reference.count first */ if (reference) { assert(pipe_is_referenced(reference)); p_atomic_inc(&reference->count); } - if (*ptr) { - assert(pipe_is_referenced(*ptr)); - if (p_atomic_dec_zero(&(*ptr)->count)) { + if (ptr) { + assert(pipe_is_referenced(ptr)); + if (p_atomic_dec_zero(&ptr->count)) { destroy = TRUE; } } - - *ptr = reference; } return destroy; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index d4c8aadaf92..550e2abc32a 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -35,12 +35,6 @@ extern "C" { #include "p_compiler.h" -struct tgsi_version -{ - unsigned MajorVersion : 8; - unsigned MinorVersion : 8; - unsigned Padding : 16; -}; struct tgsi_header { @@ -61,26 +55,27 @@ struct tgsi_processor #define TGSI_TOKEN_TYPE_DECLARATION 0 #define TGSI_TOKEN_TYPE_IMMEDIATE 1 #define TGSI_TOKEN_TYPE_INSTRUCTION 2 +#define TGSI_TOKEN_TYPE_PROPERTY 3 struct tgsi_token { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_x */ unsigned NrTokens : 8; /**< UINT */ - unsigned Padding : 19; - unsigned Extended : 1; /**< BOOL */ + unsigned Padding : 20; }; enum tgsi_file_type { - TGSI_FILE_NULL =0, - TGSI_FILE_CONSTANT =1, - TGSI_FILE_INPUT =2, - TGSI_FILE_OUTPUT =3, - TGSI_FILE_TEMPORARY =4, - TGSI_FILE_SAMPLER =5, - TGSI_FILE_ADDRESS =6, - TGSI_FILE_IMMEDIATE =7, - TGSI_FILE_LOOP =8, - TGSI_FILE_PREDICATE =9, + TGSI_FILE_NULL =0, + TGSI_FILE_CONSTANT =1, + TGSI_FILE_INPUT =2, + TGSI_FILE_OUTPUT =3, + TGSI_FILE_TEMPORARY =4, + TGSI_FILE_SAMPLER =5, + TGSI_FILE_ADDRESS =6, + TGSI_FILE_IMMEDIATE =7, + TGSI_FILE_LOOP =8, + TGSI_FILE_PREDICATE =9, + TGSI_FILE_SYSTEM_VALUE =10, TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ }; @@ -117,8 +112,7 @@ struct tgsi_declaration unsigned Semantic : 1; /**< BOOL, any semantic info? */ unsigned Centroid : 1; /**< centroid sampling? */ unsigned Invariant : 1; /**< invariant optimization? */ - unsigned Padding : 4; - unsigned Extended : 1; /**< BOOL */ + unsigned Padding : 5; }; struct tgsi_declaration_range @@ -127,37 +121,58 @@ struct tgsi_declaration_range unsigned Last : 16; /**< UINT */ }; -#define TGSI_SEMANTIC_POSITION 0 -#define TGSI_SEMANTIC_COLOR 1 -#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ -#define TGSI_SEMANTIC_FOG 3 -#define TGSI_SEMANTIC_PSIZE 4 -#define TGSI_SEMANTIC_GENERIC 5 -#define TGSI_SEMANTIC_NORMAL 6 -#define TGSI_SEMANTIC_FACE 7 -#define TGSI_SEMANTIC_COUNT 8 /**< number of semantic values */ +#define TGSI_SEMANTIC_POSITION 0 +#define TGSI_SEMANTIC_COLOR 1 +#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ +#define TGSI_SEMANTIC_FOG 3 +#define TGSI_SEMANTIC_PSIZE 4 +#define TGSI_SEMANTIC_GENERIC 5 +#define TGSI_SEMANTIC_NORMAL 6 +#define TGSI_SEMANTIC_FACE 7 +#define TGSI_SEMANTIC_EDGEFLAG 8 +#define TGSI_SEMANTIC_PRIMID 9 +#define TGSI_SEMANTIC_COUNT 10 /**< number of semantic values */ struct tgsi_declaration_semantic { - unsigned SemanticName : 8; /**< one of TGSI_SEMANTIC_x */ - unsigned SemanticIndex : 16; /**< UINT */ + unsigned Name : 8; /**< one of TGSI_SEMANTIC_x */ + unsigned Index : 16; /**< UINT */ unsigned Padding : 8; }; #define TGSI_IMM_FLOAT32 0 +#define TGSI_IMM_UINT32 1 +#define TGSI_IMM_INT32 2 struct tgsi_immediate { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_IMMEDIATE */ unsigned NrTokens : 8; /**< UINT */ unsigned DataType : 4; /**< one of TGSI_IMM_x */ - unsigned Padding : 15; - unsigned Extended : 1; /**< BOOL */ + unsigned Padding : 16; }; union tgsi_immediate_data { float Float; + unsigned Uint; + int Int; +}; + +#define TGSI_PROPERTY_GS_INPUT_PRIM 0 +#define TGSI_PROPERTY_GS_OUTPUT_PRIM 1 +#define TGSI_PROPERTY_GS_MAX_VERTICES 2 +#define TGSI_PROPERTY_COUNT 3 + +struct tgsi_property { + unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */ + unsigned NrTokens : 8; /**< UINT */ + unsigned PropertyName : 8; /**< one of TGSI_PROPERTY */ + unsigned Padding : 12; +}; + +struct tgsi_property_data { + unsigned Data; }; /* TGSI opcodes. @@ -253,7 +268,7 @@ union tgsi_immediate_data #define TGSI_OPCODE_NOT 85 #define TGSI_OPCODE_TRUNC 86 #define TGSI_OPCODE_SHL 87 -#define TGSI_OPCODE_SHR 88 + /* gap */ #define TGSI_OPCODE_AND 89 #define TGSI_OPCODE_OR 90 #define TGSI_OPCODE_MOD 91 @@ -278,7 +293,33 @@ union tgsi_immediate_data #define TGSI_OPCODE_KIL 116 /* conditional kill */ #define TGSI_OPCODE_END 117 /* aka HALT */ /* gap */ -#define TGSI_OPCODE_LAST 119 +#define TGSI_OPCODE_F2I 119 +#define TGSI_OPCODE_IDIV 120 +#define TGSI_OPCODE_IMAX 121 +#define TGSI_OPCODE_IMIN 122 +#define TGSI_OPCODE_INEG 123 +#define TGSI_OPCODE_ISGE 124 +#define TGSI_OPCODE_ISHR 125 +#define TGSI_OPCODE_ISLT 126 +#define TGSI_OPCODE_F2U 127 +#define TGSI_OPCODE_U2F 128 +#define TGSI_OPCODE_UADD 129 +#define TGSI_OPCODE_UDIV 130 +#define TGSI_OPCODE_UMAD 131 +#define TGSI_OPCODE_UMAX 132 +#define TGSI_OPCODE_UMIN 133 +#define TGSI_OPCODE_UMOD 134 +#define TGSI_OPCODE_UMUL 135 +#define TGSI_OPCODE_USEQ 136 +#define TGSI_OPCODE_USGE 137 +#define TGSI_OPCODE_USHR 138 +#define TGSI_OPCODE_USLT 139 +#define TGSI_OPCODE_USNE 140 +#define TGSI_OPCODE_SWITCH 141 +#define TGSI_OPCODE_CASE 142 +#define TGSI_OPCODE_DEFAULT 143 +#define TGSI_OPCODE_ENDSWITCH 144 +#define TGSI_OPCODE_LAST 145 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ @@ -293,7 +334,7 @@ union tgsi_immediate_data * respectively. For a given operation code, those numbers are fixed and are * present here only for convenience. * - * If Extended is TRUE, it is now executed. + * If Predicate is TRUE, tgsi_instruction_predicate token immediately follows. * * Saturate controls how are final results in destination registers modified. */ @@ -306,12 +347,16 @@ struct tgsi_instruction unsigned Saturate : 2; /* TGSI_SAT_ */ unsigned NumDstRegs : 2; /* UINT */ unsigned NumSrcRegs : 4; /* UINT */ - unsigned Padding : 3; - unsigned Extended : 1; /* BOOL */ + unsigned Predicate : 1; /* BOOL */ + unsigned Label : 1; + unsigned Texture : 1; + unsigned Padding : 1; }; /* - * If tgsi_instruction::Extended is TRUE, tgsi_instruction_ext follows. + * If tgsi_instruction::Label is TRUE, tgsi_instruction_label follows. + * + * If tgsi_instruction::Texture is TRUE, tgsi_instruction_texture follows. * * Then, tgsi_instruction::NumDstRegs of tgsi_dst_register follow. * @@ -321,42 +366,15 @@ struct tgsi_instruction * instruction, including the instruction word. */ -#define TGSI_INSTRUCTION_EXT_TYPE_LABEL 1 -#define TGSI_INSTRUCTION_EXT_TYPE_TEXTURE 2 -#define TGSI_INSTRUCTION_EXT_TYPE_PREDICATE 3 - -struct tgsi_instruction_ext -{ - unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_ */ - unsigned Padding : 27; - unsigned Extended : 1; /* BOOL */ -}; - -/* - * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_LABEL, it - * should be cast to tgsi_instruction_ext_label. - * - * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_TEXTURE, it - * should be cast to tgsi_instruction_ext_texture. - * - * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_PREDICATE, it - * should be cast to tgsi_instruction_ext_predicate. - * - * If tgsi_instruction_ext::Extended is TRUE, another tgsi_instruction_ext - * follows. - */ - #define TGSI_SWIZZLE_X 0 #define TGSI_SWIZZLE_Y 1 #define TGSI_SWIZZLE_Z 2 #define TGSI_SWIZZLE_W 3 -struct tgsi_instruction_ext_label +struct tgsi_instruction_label { - unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_LABEL */ unsigned Label : 24; /* UINT */ - unsigned Padding : 3; - unsigned Extended : 1; /* BOOL */ + unsigned Padding : 8; }; #define TGSI_TEXTURE_UNKNOWN 0 @@ -370,29 +388,25 @@ struct tgsi_instruction_ext_label #define TGSI_TEXTURE_SHADOWRECT 8 #define TGSI_TEXTURE_COUNT 9 -struct tgsi_instruction_ext_texture +struct tgsi_instruction_texture { - unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_TEXTURE */ unsigned Texture : 8; /* TGSI_TEXTURE_ */ - unsigned Padding : 19; - unsigned Extended : 1; /* BOOL */ + unsigned Padding : 24; }; /* * For SM3, the following constraint applies. * - Swizzle is either set to identity or replicate. */ -struct tgsi_instruction_ext_predicate +struct tgsi_instruction_predicate { - unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_PREDICATE */ + int Index : 16; /* SINT */ unsigned SwizzleX : 2; /* TGSI_SWIZZLE_x */ unsigned SwizzleY : 2; /* TGSI_SWIZZLE_x */ unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_x */ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_x */ unsigned Negate : 1; /* BOOL */ - unsigned SrcIndex : 8; /* UINT */ - unsigned Padding : 10; - unsigned Extended : 1; /* BOOL */ + unsigned Padding : 7; }; /** @@ -409,26 +423,24 @@ struct tgsi_instruction_ext_predicate * The fetched register components are swizzled according to SwizzleX, SwizzleY, * SwizzleZ and SwizzleW. * - * If Extended is TRUE, any further modifications to the source register are - * made to this temporary storage. */ struct tgsi_src_register { unsigned File : 4; /* TGSI_FILE_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ - unsigned Negate : 1; /* BOOL */ - unsigned Indirect : 1; /* BOOL */ - unsigned Dimension : 1; /* BOOL */ - int Index : 16; /* SINT */ - unsigned Extended : 1; /* BOOL */ + unsigned Absolute : 1; /* BOOL */ + unsigned Negate : 1; /* BOOL */ }; /** - * If tgsi_src_register::Extended is TRUE, tgsi_src_register_ext follows. + * If tgsi_src_register::Modifier is TRUE, tgsi_src_register_modifier follows. * * Then, if tgsi_src_register::Indirect is TRUE, another tgsi_src_register * follows. @@ -436,58 +448,13 @@ struct tgsi_src_register * Then, if tgsi_src_register::Dimension is TRUE, tgsi_dimension follows. */ -#define TGSI_SRC_REGISTER_EXT_TYPE_MOD 1 - -struct tgsi_src_register_ext -{ - unsigned Type : 4; /* TGSI_SRC_REGISTER_EXT_TYPE_ */ - unsigned Padding : 27; - unsigned Extended : 1; /* BOOL */ -}; - -/** - * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_MOD, - * it should be cast to tgsi_src_register_ext_mod. - * - * If tgsi_dst_register_ext::Extended is TRUE, another tgsi_dst_register_ext - * follows. - */ - - -/** - * Extra src register modifiers - * - * If Complement is TRUE, the source register is modified by subtracting it - * from 1.0. - * - * If Bias is TRUE, the source register is modified by subtracting 0.5 from it. - * - * If Scale2X is TRUE, the source register is modified by multiplying it by 2.0. - * - * If Absolute is TRUE, the source register is modified by removing the sign. - * - * If Negate is TRUE, the source register is modified by negating it. - */ - -struct tgsi_src_register_ext_mod -{ - unsigned Type : 4; /* TGSI_SRC_REGISTER_EXT_TYPE_MOD */ - unsigned Complement : 1; /* BOOL */ - unsigned Bias : 1; /* BOOL */ - unsigned Scale2X : 1; /* BOOL */ - unsigned Absolute : 1; /* BOOL */ - unsigned Negate : 1; /* BOOL */ - unsigned Padding : 22; - unsigned Extended : 1; /* BOOL */ -}; struct tgsi_dimension { unsigned Indirect : 1; /* BOOL */ unsigned Dimension : 1; /* BOOL */ - unsigned Padding : 13; + unsigned Padding : 14; int Index : 16; /* SINT */ - unsigned Extended : 1; /* BOOL */ }; struct tgsi_dst_register @@ -497,51 +464,9 @@ struct tgsi_dst_register unsigned Indirect : 1; /* BOOL */ unsigned Dimension : 1; /* BOOL */ int Index : 16; /* SINT */ - unsigned Padding : 5; - unsigned Extended : 1; /* BOOL */ -}; - -/* - * If tgsi_dst_register::Extended is TRUE, tgsi_dst_register_ext follows. - * - * Then, if tgsi_dst_register::Indirect is TRUE, tgsi_src_register follows. - */ - -#define TGSI_DST_REGISTER_EXT_TYPE_MODULATE 1 - -struct tgsi_dst_register_ext -{ - unsigned Type : 4; /* TGSI_DST_REGISTER_EXT_TYPE_ */ - unsigned Padding : 27; - unsigned Extended : 1; /* BOOL */ + unsigned Padding : 6; }; -/** - * Extra destination register modifiers - * - * If tgsi_dst_register_ext::Type is TGSI_DST_REGISTER_EXT_TYPE_MODULATE, - * it should be cast to tgsi_dst_register_ext_modulate. - * - * If tgsi_dst_register_ext::Extended is TRUE, another tgsi_dst_register_ext - * follows. - */ - -#define TGSI_MODULATE_1X 0 -#define TGSI_MODULATE_2X 1 -#define TGSI_MODULATE_4X 2 -#define TGSI_MODULATE_8X 3 -#define TGSI_MODULATE_HALF 4 -#define TGSI_MODULATE_QUARTER 5 -#define TGSI_MODULATE_EIGHTH 6 -#define TGSI_MODULATE_COUNT 7 - -struct tgsi_dst_register_ext_modulate -{ - unsigned Type : 4; /* TGSI_DST_REGISTER_EXT_TYPE_MODULATE */ - unsigned Modulate : 4; /* TGSI_MODULATE_ */ - unsigned Padding : 23; - unsigned Extended : 1; /* BOOL */ -}; #ifdef __cplusplus } diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 287b424e4ac..60e96b98deb 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -60,6 +60,7 @@ extern "C" { #define PIPE_MAX_COLOR_BUFS 8 #define PIPE_MAX_CONSTANT 32 #define PIPE_MAX_SAMPLERS 16 +#define PIPE_MAX_VERTEX_SAMPLERS 16 #define PIPE_MAX_SHADER_INPUTS 16 #define PIPE_MAX_SHADER_OUTPUTS 16 #define PIPE_MAX_TEXTURE_LEVELS 16 @@ -315,14 +316,10 @@ struct pipe_surface */ struct pipe_transfer { - enum pipe_format format; /**< PIPE_FORMAT_x */ unsigned x; /**< x offset from start of texture image */ unsigned y; /**< y offset from start of texture image */ unsigned width; /**< logical width in pixels */ unsigned height; /**< logical height in pixels */ - struct pipe_format_block block; - unsigned nblocksx; /**< allocated width in blocks */ - unsigned nblocksy; /**< allocated height in blocks */ unsigned stride; /**< stride in bytes between rows of blocks */ enum pipe_transfer_usage usage; /**< PIPE_TRANSFER_* */ @@ -343,13 +340,9 @@ struct pipe_texture enum pipe_texture_target target; /**< PIPE_TEXTURE_x */ enum pipe_format format; /**< PIPE_FORMAT_x */ - unsigned width[PIPE_MAX_TEXTURE_LEVELS]; - unsigned height[PIPE_MAX_TEXTURE_LEVELS]; - unsigned depth[PIPE_MAX_TEXTURE_LEVELS]; - - struct pipe_format_block block; - unsigned nblocksx[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated width in blocks */ - unsigned nblocksy[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated height in blocks */ + unsigned width0; + unsigned height0; + unsigned depth0; unsigned last_level:8; /**< Index of last mipmap level present/defined */ @@ -399,8 +392,9 @@ pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf) { struct pipe_buffer *old_buf = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &buf->reference)) + if (pipe_reference(&(*ptr)->reference, &buf->reference)) old_buf->screen->buffer_destroy(old_buf); + *ptr = buf; } static INLINE void @@ -408,8 +402,9 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) { struct pipe_surface *old_surf = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &surf->reference)) + if (pipe_reference(&(*ptr)->reference, &surf->reference)) old_surf->texture->screen->tex_surface_destroy(old_surf); + *ptr = surf; } static INLINE void @@ -417,8 +412,9 @@ pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *tex) { struct pipe_texture *old_tex = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &tex->reference)) + if (pipe_reference(&(*ptr)->reference, &tex->reference)) old_tex->screen->texture_destroy(old_tex); + *ptr = tex; } diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 4da26d608cf..b85f01c2b02 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -56,8 +56,9 @@ pipe_video_surface_reference(struct pipe_video_surface **ptr, struct pipe_video_ { struct pipe_video_surface *old_surf = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &surf->reference)) + if (pipe_reference(&(*ptr)->reference, &surf->reference)) old_surf->screen->video_surface_destroy(old_surf); + *ptr = surf; } struct pipe_video_rect diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c index 8819936fcaf..f2e5f3fb23c 100644 --- a/src/gallium/state_trackers/dri/dri_context.c +++ b/src/gallium/state_trackers/dri/dri_context.c @@ -44,9 +44,9 @@ GLboolean dri_create_context(const __GLcontextModes * visual, - __DRIcontextPrivate * cPriv, void *sharedContextPrivate) + __DRIcontext * cPriv, void *sharedContextPrivate) { - __DRIscreenPrivate *sPriv = cPriv->driScreenPriv; + __DRIscreen *sPriv = cPriv->driScreenPriv; struct dri_screen *screen = dri_screen(sPriv); struct dri_context *ctx = NULL; struct st_context *st_share = NULL; @@ -97,7 +97,7 @@ dri_create_context(const __GLcontextModes * visual, } void -dri_destroy_context(__DRIcontextPrivate * cPriv) +dri_destroy_context(__DRIcontext * cPriv) { struct dri_context *ctx = dri_context(cPriv); @@ -116,7 +116,7 @@ dri_destroy_context(__DRIcontextPrivate * cPriv) } GLboolean -dri_unbind_context(__DRIcontextPrivate * cPriv) +dri_unbind_context(__DRIcontext * cPriv) { if (cPriv) { struct dri_context *ctx = dri_context(cPriv); @@ -133,9 +133,9 @@ dri_unbind_context(__DRIcontextPrivate * cPriv) } GLboolean -dri_make_current(__DRIcontextPrivate * cPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) +dri_make_current(__DRIcontext * cPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) { if (cPriv) { struct dri_context *ctx = dri_context(cPriv); diff --git a/src/gallium/state_trackers/dri/dri_context.h b/src/gallium/state_trackers/dri/dri_context.h index 46501787340..13f497462f7 100644 --- a/src/gallium/state_trackers/dri/dri_context.h +++ b/src/gallium/state_trackers/dri/dri_context.h @@ -44,10 +44,10 @@ struct dri_drawable; struct dri_context { /* dri */ - __DRIscreenPrivate *sPriv; - __DRIcontextPrivate *cPriv; - __DRIdrawablePrivate *dPriv; - __DRIdrawablePrivate *rPriv; + __DRIscreen *sPriv; + __DRIcontext *cPriv; + __DRIdrawable *dPriv; + __DRIdrawable *rPriv; driOptionCache optionCache; @@ -67,7 +67,7 @@ struct dri_context }; static INLINE struct dri_context * -dri_context(__DRIcontextPrivate * driContextPriv) +dri_context(__DRIcontext * driContextPriv) { return (struct dri_context *)driContextPriv->driverPrivate; } @@ -99,18 +99,18 @@ dri_unlock(struct dri_context *ctx) */ extern struct dri1_api_lock_funcs dri1_lf; -void dri_destroy_context(__DRIcontextPrivate * driContextPriv); +void dri_destroy_context(__DRIcontext * driContextPriv); -boolean dri_unbind_context(__DRIcontextPrivate * driContextPriv); +boolean dri_unbind_context(__DRIcontext * driContextPriv); boolean -dri_make_current(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv); +dri_make_current(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv); boolean dri_create_context(const __GLcontextModes * visual, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); /*********************************************************************** diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index 5625ff53cfd..1058dd38c25 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -44,9 +44,10 @@ #include "state_tracker/st_context.h" #include "state_tracker/st_cb_fbo.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_rect.h" - + static struct pipe_surface * dri_surface_from_handle(struct drm_api *api, struct pipe_screen *screen, @@ -62,11 +63,10 @@ dri_surface_from_handle(struct drm_api *api, templat.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET; templat.target = PIPE_TEXTURE_2D; templat.last_level = 0; - templat.depth[0] = 1; + templat.depth0 = 1; templat.format = format; - templat.width[0] = width; - templat.height[0] = height; - pf_get_block(templat.format, &templat.block); + templat.width0 = width; + templat.height0 = height; texture = api->texture_from_shared_handle(api, screen, &templat, "dri2 buffer", pitch, handle); @@ -118,7 +118,7 @@ dri2_check_if_pixmap(__DRIbuffer *buffers, int count) * This will be called a drawable is known to have been resized. */ void -dri_get_buffers(__DRIdrawablePrivate * dPriv) +dri_get_buffers(__DRIdrawable * dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); @@ -299,8 +299,8 @@ dri_flush_frontbuffer(struct pipe_screen *screen, * This is called when we need to set up GL rendering to a new X window. */ boolean -dri_create_buffer(__DRIscreenPrivate * sPriv, - __DRIdrawablePrivate * dPriv, +dri_create_buffer(__DRIscreen * sPriv, + __DRIdrawable * dPriv, const __GLcontextModes * visual, boolean isPixmap) { struct dri_screen *screen = sPriv->private; @@ -416,7 +416,7 @@ dri_swap_fences_push_back(struct dri_drawable *draw, } void -dri_destroy_buffer(__DRIdrawablePrivate * dPriv) +dri_destroy_buffer(__DRIdrawable * dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); struct pipe_fence_handle *fence; @@ -434,8 +434,8 @@ dri_destroy_buffer(__DRIdrawablePrivate * dPriv) static void dri1_update_drawables_locked(struct dri_context *ctx, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) { if (ctx->stLostLock) { ctx->stLostLock = FALSE; @@ -458,8 +458,8 @@ dri1_update_drawables_locked(struct dri_context *ctx, static void dri1_propagate_drawable_change(struct dri_context *ctx) { - __DRIdrawablePrivate *dPriv = ctx->dPriv; - __DRIdrawablePrivate *rPriv = ctx->rPriv; + __DRIdrawable *dPriv = ctx->dPriv; + __DRIdrawable *rPriv = ctx->rPriv; boolean flushed = FALSE; if (dPriv && ctx->d_stamp != dPriv->lastStamp) { @@ -532,7 +532,7 @@ static void dri1_swap_copy(struct dri_context *ctx, struct pipe_surface *dst, struct pipe_surface *src, - __DRIdrawablePrivate * dPriv, const struct drm_clip_rect *bbox) + __DRIdrawable * dPriv, const struct drm_clip_rect *bbox) { struct pipe_context *pipe = ctx->pipe; struct drm_clip_rect clip; @@ -563,7 +563,7 @@ dri1_swap_copy(struct dri_context *ctx, static void dri1_copy_to_front(struct dri_context *ctx, struct pipe_surface *surf, - __DRIdrawablePrivate * dPriv, + __DRIdrawable * dPriv, const struct drm_clip_rect *sub_box, struct pipe_fence_handle **fence) { @@ -636,7 +636,7 @@ dri1_flush_frontbuffer(struct pipe_screen *screen, } void -dri_swap_buffers(__DRIdrawablePrivate * dPriv) +dri_swap_buffers(__DRIdrawable * dPriv) { struct dri_context *ctx; struct pipe_surface *back_surf; @@ -668,7 +668,7 @@ dri_swap_buffers(__DRIdrawablePrivate * dPriv) } void -dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) +dri_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h) { struct pipe_screen *screen = dri_screen(dPriv->driScreenPriv)->pipe_screen; struct drm_clip_rect sub_bbox; diff --git a/src/gallium/state_trackers/dri/dri_drawable.h b/src/gallium/state_trackers/dri/dri_drawable.h index b910930db42..80bb5d7f617 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.h +++ b/src/gallium/state_trackers/dri/dri_drawable.h @@ -41,8 +41,8 @@ struct dri_context; struct dri_drawable { /* dri */ - __DRIdrawablePrivate *dPriv; - __DRIscreenPrivate *sPriv; + __DRIdrawable *dPriv; + __DRIscreen *sPriv; unsigned attachments[8]; unsigned num_attachments; @@ -67,7 +67,7 @@ struct dri_drawable }; static INLINE struct dri_drawable * -dri_drawable(__DRIdrawablePrivate * driDrawPriv) +dri_drawable(__DRIdrawable * driDrawPriv) { return (struct dri_drawable *)driDrawPriv->driverPrivate; } @@ -76,22 +76,22 @@ dri_drawable(__DRIdrawablePrivate * driDrawPriv) * dri_drawable.c */ boolean -dri_create_buffer(__DRIscreenPrivate * sPriv, - __DRIdrawablePrivate * dPriv, +dri_create_buffer(__DRIscreen * sPriv, + __DRIdrawable * dPriv, const __GLcontextModes * visual, boolean isPixmap); void dri_flush_frontbuffer(struct pipe_screen *screen, struct pipe_surface *surf, void *context_private); -void dri_swap_buffers(__DRIdrawablePrivate * dPriv); +void dri_swap_buffers(__DRIdrawable * dPriv); void -dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h); +dri_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h); -void dri_get_buffers(__DRIdrawablePrivate * dPriv); +void dri_get_buffers(__DRIdrawable * dPriv); -void dri_destroy_buffer(__DRIdrawablePrivate * dPriv); +void dri_destroy_buffer(__DRIdrawable * dPriv); void dri2_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv); diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index cb864d45d51..bb12baf2815 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -202,7 +202,7 @@ dri_fill_in_modes(struct dri_screen *screen, * Get information about previous buffer swaps. */ static int -dri_get_swap_info(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo) +dri_get_swap_info(__DRIdrawable * dPriv, __DRIswapInfo * sInfo) { if (dPriv == NULL || dPriv->driverPrivate == NULL || sInfo == NULL) return -1; @@ -220,7 +220,7 @@ dri_copy_version(struct dri1_api_version *dst, } static const __DRIconfig ** -dri_init_screen(__DRIscreenPrivate * sPriv) +dri_init_screen(__DRIscreen * sPriv) { struct dri_screen *screen; const __DRIconfig **configs; @@ -285,7 +285,7 @@ dri_init_screen(__DRIscreenPrivate * sPriv) * Returns the __GLcontextModes supported by this driver. */ static const __DRIconfig ** -dri_init_screen2(__DRIscreenPrivate * sPriv) +dri_init_screen2(__DRIscreen * sPriv) { struct dri_screen *screen; struct drm_create_screen_arg arg; @@ -319,7 +319,7 @@ dri_init_screen2(__DRIscreenPrivate * sPriv) } static void -dri_destroy_screen(__DRIscreenPrivate * sPriv) +dri_destroy_screen(__DRIscreen * sPriv) { struct dri_screen *screen = dri_screen(sPriv); @@ -346,4 +346,12 @@ PUBLIC const struct __DriverAPIRec driDriverAPI = { .InitScreen2 = dri_init_screen2, }; +/* This is the table of extensions that the loader will dlsym() for. */ +PUBLIC const __DRIextension *__driDriverExtensions[] = { + &driCoreExtension.base, + &driLegacyExtension.base, + &driDRI2Extension.base, + NULL +}; + /* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/src/gallium/state_trackers/dri/dri_screen.h b/src/gallium/state_trackers/dri/dri_screen.h index f6c56d0f0c5..03387a0e813 100644 --- a/src/gallium/state_trackers/dri/dri_screen.h +++ b/src/gallium/state_trackers/dri/dri_screen.h @@ -42,7 +42,7 @@ struct dri_screen { /* dri */ - __DRIscreenPrivate *sPriv; + __DRIscreen *sPriv; /** * Configuration cache with default values for all contexts @@ -63,7 +63,7 @@ struct dri_screen /** cast wrapper */ static INLINE struct dri_screen * -dri_screen(__DRIscreenPrivate * sPriv) +dri_screen(__DRIscreen * sPriv) { return (struct dri_screen *)sPriv->private; } diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c index 61b7f986831..d55aa51b82d 100644 --- a/src/gallium/state_trackers/egl/egl_surface.c +++ b/src/gallium/state_trackers/egl/egl_surface.c @@ -12,6 +12,7 @@ #include "state_tracker/drm_api.h" +#include "util/u_format.h" #include "util/u_rect.h" /* @@ -114,11 +115,10 @@ drm_create_texture(_EGLDisplay *dpy, templat.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY; templat.target = PIPE_TEXTURE_2D; templat.last_level = 0; - templat.depth[0] = 1; + templat.depth0 = 1; templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; - templat.width[0] = w; - templat.height[0] = h; - pf_get_block(templat.format, &templat.block); + templat.width0 = w; + templat.height0 = h; texture = screen->texture_create(dev->screen, &templat); diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index c76dfb31d2b..1783bc504d9 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -67,6 +67,10 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" +#include "trace/tr_screen.h" +#include "trace/tr_context.h" +#include "trace/tr_texture.h" + #include "xm_winsys.h" #include <GL/glx.h> @@ -87,6 +91,8 @@ void xmesa_set_driver( const struct xm_driver *templ ) */ pipe_mutex _xmesa_lock; +static struct pipe_screen *_screen = NULL; +static struct pipe_screen *screen = NULL; /**********************************************************************/ @@ -754,7 +760,7 @@ PUBLIC XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) { static GLboolean firstTime = GL_TRUE; - static struct pipe_screen *screen = NULL; + struct pipe_context *_pipe = NULL; struct pipe_context *pipe = NULL; XMesaContext c; GLcontext *mesaCtx; @@ -762,7 +768,8 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (firstTime) { pipe_mutex_init(_xmesa_lock); - screen = driver.create_pipe_screen(); + _screen = driver.create_pipe_screen(); + screen = trace_screen_create( _screen ); firstTime = GL_FALSE; } @@ -781,9 +788,11 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (screen == NULL) goto fail; - pipe = driver.create_pipe_context(screen, (void *) c); - if (pipe == NULL) + _pipe = driver.create_pipe_context(_screen, (void *) c); + if (_pipe == NULL) goto fail; + pipe = trace_context_create(screen, _pipe); + pipe->priv = c; c->st = st_create_context(pipe, &v->mesa_visual, @@ -1110,6 +1119,12 @@ void XMesaSwapBuffers( XMesaBuffer b ) st_swapbuffers(b->stfb, &frontLeftSurf, NULL); if (frontLeftSurf) { + if (_screen != screen) { + struct trace_surface *tr_surf = trace_surface( frontLeftSurf ); + struct pipe_surface *surf = tr_surf->surface; + frontLeftSurf = surf; + } + driver.display_surface(b, frontLeftSurf); } diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript index ec385e7c447..d4fdd43688f 100644 --- a/src/gallium/state_trackers/python/SConscript +++ b/src/gallium/state_trackers/python/SConscript @@ -38,10 +38,12 @@ if 'python' in env['statetrackers']: ], ) + env['no_import_lib'] = 1 + env.SharedLibrary( target = '_gallium', source = [ 'st_hardpipe_winsys.c', ], - LIBS = [pyst, softpipe, trace] + auxiliaries + env['LIBS'], + LIBS = [pyst, softpipe, trace] + gallium + env['LIBS'], ) diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 3f79cc1a3d7..96b13c2258e 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -46,6 +46,7 @@ #include "util/u_draw_quad.h" #include "util/u_tile.h" #include "util/u_math.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "tgsi/tgsi_text.h" #include "tgsi/tgsi_dump.h" @@ -80,7 +81,6 @@ %rename(Stencil) pipe_stencil_state; %rename(Alpha) pipe_alpha_state; %rename(DepthStencilAlpha) pipe_depth_stencil_alpha_state; -%rename(FormatBlock) pipe_format_block; %rename(Framebuffer) pipe_framebuffer_state; %rename(PolyStipple) pipe_poly_stipple; %rename(Rasterizer) pipe_rasterizer_state; diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index a40aa1e5187..84ce1a41e6d 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -52,11 +52,16 @@ struct st_context { cso_set_blend($self->cso, state); } - void set_sampler( unsigned index, const struct pipe_sampler_state *state ) { + void set_fragment_sampler( unsigned index, const struct pipe_sampler_state *state ) { cso_single_sampler($self->cso, index, state); cso_single_sampler_done($self->cso); } + void set_vertex_sampler( unsigned index, const struct pipe_sampler_state *state ) { + cso_single_vertex_sampler($self->cso, index, state); + cso_single_vertex_sampler_done($self->cso); + } + void set_rasterizer( const struct pipe_rasterizer_state *state ) { cso_set_rasterizer($self->cso, state); } @@ -103,6 +108,25 @@ struct st_context { $self->vs = vs; } + void set_geometry_shader( const struct pipe_shader_state *state ) { + void *gs; + + if(!state) { + cso_set_geometry_shader_handle($self->cso, NULL); + return; + } + + gs = $self->pipe->create_gs_state($self->pipe, state); + if(!gs) + return; + + if(cso_set_geometry_shader_handle($self->cso, gs) != PIPE_OK) + return; + + cso_delete_geometry_shader($self->cso, $self->gs); + $self->gs = gs; + } + /* * Parameter-like state (or properties) */ @@ -142,14 +166,24 @@ struct st_context { cso_set_viewport($self->cso, state); } - void set_sampler_texture(unsigned index, - struct pipe_texture *texture) { + void set_fragment_sampler_texture(unsigned index, + struct pipe_texture *texture) { + if(!texture) + texture = $self->default_texture; + pipe_texture_reference(&$self->fragment_sampler_textures[index], texture); + $self->pipe->set_fragment_sampler_textures($self->pipe, + PIPE_MAX_SAMPLERS, + $self->fragment_sampler_textures); + } + + void set_vertex_sampler_texture(unsigned index, + struct pipe_texture *texture) { if(!texture) texture = $self->default_texture; - pipe_texture_reference(&$self->sampler_textures[index], texture); - $self->pipe->set_sampler_textures($self->pipe, - PIPE_MAX_SAMPLERS, - $self->sampler_textures); + pipe_texture_reference(&$self->vertex_sampler_textures[index], texture); + $self->pipe->set_vertex_sampler_textures($self->pipe, + PIPE_MAX_VERTEX_SAMPLERS, + $self->vertex_sampler_textures); } void set_vertex_buffer(unsigned index, diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i index f16fe5b0ff7..2dc995adb07 100644 --- a/src/gallium/state_trackers/python/p_device.i +++ b/src/gallium/state_trackers/python/p_device.i @@ -112,10 +112,9 @@ struct st_device { struct pipe_texture templat; memset(&templat, 0, sizeof(templat)); templat.format = format; - pf_get_block(templat.format, &templat.block); - templat.width[0] = width; - templat.height[0] = height; - templat.depth[0] = depth; + templat.width0 = width; + templat.height0 = height; + templat.depth0 = depth; templat.last_level = last_level; templat.target = target; templat.tex_usage = tex_usage; diff --git a/src/gallium/state_trackers/python/p_format.i b/src/gallium/state_trackers/python/p_format.i index 26fb12b387f..68df0093315 100644 --- a/src/gallium/state_trackers/python/p_format.i +++ b/src/gallium/state_trackers/python/p_format.i @@ -152,11 +152,3 @@ enum pipe_format { PIPE_FORMAT_DXT5_SRGBA, }; - -struct pipe_format_block -{ - unsigned size; - unsigned width; - unsigned height; -}; - diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i index 1d513abf3c7..761587dc533 100644 --- a/src/gallium/state_trackers/python/p_texture.i +++ b/src/gallium/state_trackers/python/p_texture.i @@ -59,25 +59,17 @@ } unsigned get_width(unsigned level=0) { - return $self->width[level]; + return u_minify($self->width0, level); } unsigned get_height(unsigned level=0) { - return $self->height[level]; + return u_minify($self->height0, level); } unsigned get_depth(unsigned level=0) { - return $self->depth[level]; + return u_minify($self->depth0, level); } - - unsigned get_nblocksx(unsigned level=0) { - return $self->nblocksx[level]; - } - - unsigned get_nblocksy(unsigned level=0) { - return $self->nblocksy[level]; - } - + /** Get a surface which is a "view" into a texture */ struct st_surface * get_surface(unsigned face=0, unsigned level=0, unsigned zslice=0) @@ -88,7 +80,7 @@ SWIG_exception(SWIG_ValueError, "face out of bounds"); if(level > $self->last_level) SWIG_exception(SWIG_ValueError, "level out of bounds"); - if(zslice >= $self->depth[level]) + if(zslice >= u_minify($self->depth0, level)) SWIG_exception(SWIG_ValueError, "zslice out of bounds"); surface = CALLOC_STRUCT(st_surface); @@ -126,8 +118,6 @@ struct st_surface unsigned format; unsigned width; unsigned height; - unsigned nblocksx; - unsigned nblocksy; ~st_surface() { pipe_texture_reference(&$self->texture, NULL); @@ -142,8 +132,8 @@ struct st_surface struct pipe_transfer *transfer; unsigned stride; - stride = pf_get_nblocksx(&texture->block, w) * texture->block.size; - *LENGTH = pf_get_nblocksy(&texture->block, h) * stride; + stride = util_format_get_stride(texture->format, w); + *LENGTH = util_format_get_nblocksy(texture->format, h) * stride; *STRING = (char *) malloc(*LENGTH); if(!*STRING) return; @@ -169,9 +159,9 @@ struct st_surface struct pipe_transfer *transfer; if(stride == 0) - stride = pf_get_nblocksx(&texture->block, w) * texture->block.size; + stride = util_format_get_stride(texture->format, w); - if(LENGTH < pf_get_nblocksy(&texture->block, h) * stride) + if(LENGTH < util_format_get_nblocksy(texture->format, h) * stride) SWIG_exception(SWIG_ValueError, "offset must be smaller than buffer size"); transfer = screen->get_tex_transfer(screen, @@ -375,25 +365,13 @@ struct st_surface static unsigned st_surface_width_get(struct st_surface *surface) { - return surface->texture->width[surface->level]; + return u_minify(surface->texture->width0, surface->level); } static unsigned st_surface_height_get(struct st_surface *surface) { - return surface->texture->height[surface->level]; - } - - static unsigned - st_surface_nblocksx_get(struct st_surface *surface) - { - return surface->texture->nblocksx[surface->level]; - } - - static unsigned - st_surface_nblocksy_get(struct st_surface *surface) - { - return surface->texture->nblocksy[surface->level]; + return u_minify(surface->texture->height0, surface->level); } %} diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py index 348f2e43683..a68709f5cf3 100755 --- a/src/gallium/state_trackers/python/retrace/interpreter.py +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -52,10 +52,10 @@ def make_image(surface, x=None, y=None, w=None, h=None): w = surface.width - x if h is None: h = surface.height - y - data = surface.get_tile_rgba8(0, 0, surface.width, surface.height) + data = surface.get_tile_rgba8(x, y, surface.width, surface.height) import Image - outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1) + outimage = Image.fromstring('RGBA', (w, h), data, "raw", 'RGBA', 0, 1) return outimage def save_image(filename, surface, x=None, y=None, w=None, h=None): @@ -99,7 +99,6 @@ struct_factories = { "pipe_stencil_state": gallium.Stencil, "pipe_alpha_state": gallium.Alpha, "pipe_depth_stencil_alpha_state": gallium.DepthStencilAlpha, - "pipe_format_block": gallium.FormatBlock, #"pipe_framebuffer_state": gallium.Framebuffer, "pipe_poly_stipple": gallium.PolyStipple, "pipe_rasterizer_state": gallium.Rasterizer, @@ -279,9 +278,9 @@ class Screen(Object): def texture_create(self, templat): return self.real.texture_create( format = templat.format, - width = templat.width[0], - height = templat.height[0], - depth = templat.depth[0], + width = templat.width, + height = templat.height, + depth = templat.depth, last_level = templat.last_level, target = templat.target, tex_usage = templat.tex_usage, @@ -307,7 +306,7 @@ class Screen(Object): def surface_write(self, surface, data, stride, size): if surface is None: return - assert surface.nblocksy * stride == size +# assert surface.nblocksy * stride == size surface.put_tile_raw(0, 0, surface.width, surface.height, data, stride) def get_tex_transfer(self, texture, face, level, zslice, usage, x, y, w, h): @@ -388,9 +387,13 @@ class Context(Object): def delete_sampler_state(self, state): pass - def bind_sampler_states(self, num_states, states): + def bind_vertex_sampler_states(self, num_states, states): for i in range(num_states): - self.real.set_sampler(i, states[i]) + self.real.set_vertex_sampler(i, states[i]) + + def bind_fragment_sampler_states(self, num_states, states): + for i in range(num_states): + self.real.set_fragment_sampler(i, states[i]) def create_rasterizer_state(self, state): return state @@ -486,9 +489,13 @@ class Context(Object): def set_viewport_state(self, state): self.real.set_viewport(state) - def set_sampler_textures(self, num_textures, textures): + def set_fragment_sampler_textures(self, num_textures, textures): + for i in range(num_textures): + self.real.set_fragment_sampler_texture(i, textures[i]) + + def set_vertex_sampler_textures(self, num_textures, textures): for i in range(num_textures): - self.real.set_sampler_texture(i, textures[i]) + self.real.set_vertex_sampler_texture(i, textures[i]) def set_vertex_buffers(self, num_buffers, buffers): self.vbufs = buffers[0:num_buffers] @@ -508,10 +515,6 @@ class Context(Object): self.real.set_vertex_element(i, elements[i]) self.real.set_vertex_elements(num_elements) - def set_edgeflags(self, bitfield): - # FIXME - pass - def dump_vertices(self, start, count): if not self.interpreter.verbosity(2): return diff --git a/src/gallium/state_trackers/python/samples/gs.py b/src/gallium/state_trackers/python/samples/gs.py new file mode 100644 index 00000000000..1ceead5f17c --- /dev/null +++ b/src/gallium/state_trackers/python/samples/gs.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +########################################################################## +# +# Copyright 2009 VMware +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +########################################################################## + + +from gallium import * + + +def make_image(surface): + data = surface.get_tile_rgba8(0, 0, surface.width, surface.height) + + import Image + outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1) + return outimage + +def save_image(filename, surface): + outimage = make_image(surface) + outimage.save(filename, "PNG") + +def show_image(surface): + outimage = make_image(surface) + + import Tkinter as tk + from PIL import Image, ImageTk + root = tk.Tk() + + root.title('background image') + + image1 = ImageTk.PhotoImage(outimage) + w = image1.width() + h = image1.height() + x = 100 + y = 100 + root.geometry("%dx%d+%d+%d" % (w, h, x, y)) + panel1 = tk.Label(root, image=image1) + panel1.pack(side='top', fill='both', expand='yes') + panel1.image = image1 + root.mainloop() + + +def test(dev): + ctx = dev.context_create() + + width = 255 + height = 255 + minz = 0.0 + maxz = 1.0 + + # disabled blending/masking + blend = Blend() + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.colormask = PIPE_MASK_RGBA + ctx.set_blend(blend) + + # depth/stencil/alpha + depth_stencil_alpha = DepthStencilAlpha() + depth_stencil_alpha.depth.enabled = 1 + depth_stencil_alpha.depth.writemask = 1 + depth_stencil_alpha.depth.func = PIPE_FUNC_LESS + ctx.set_depth_stencil_alpha(depth_stencil_alpha) + + # rasterizer + rasterizer = Rasterizer() + rasterizer.front_winding = PIPE_WINDING_CW + rasterizer.cull_mode = PIPE_WINDING_NONE + rasterizer.scissor = 1 + ctx.set_rasterizer(rasterizer) + + # viewport + viewport = Viewport() + scale = FloatArray(4) + scale[0] = width / 2.0 + scale[1] = -height / 2.0 + scale[2] = (maxz - minz) / 2.0 + scale[3] = 1.0 + viewport.scale = scale + translate = FloatArray(4) + translate[0] = width / 2.0 + translate[1] = height / 2.0 + translate[2] = (maxz - minz) / 2.0 + translate[3] = 0.0 + viewport.translate = translate + ctx.set_viewport(viewport) + + # samplers + sampler = Sampler() + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE + sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.normalized_coords = 1 + ctx.set_sampler(0, sampler) + + # scissor + scissor = Scissor() + scissor.minx = 0 + scissor.miny = 0 + scissor.maxx = width + scissor.maxy = height + ctx.set_scissor(scissor) + + clip = Clip() + clip.nr = 0 + ctx.set_clip(clip) + + # framebuffer + cbuf = dev.texture_create( + PIPE_FORMAT_X8R8G8B8_UNORM, + width, height, + tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + ).get_surface() + zbuf = dev.texture_create( + PIPE_FORMAT_Z16_UNORM, + width, height, + tex_usage=PIPE_TEXTURE_USAGE_DEPTH_STENCIL, + ).get_surface() + fb = Framebuffer() + fb.width = width + fb.height = height + fb.nr_cbufs = 1 + fb.set_cbuf(0, cbuf) + fb.set_zsbuf(zbuf) + ctx.set_framebuffer(fb) + rgba = FloatArray(4); + rgba[0] = 0.0 + rgba[1] = 0.0 + rgba[2] = 0.0 + rgba[3] = 0.0 + ctx.clear(PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, rgba, 1.0, 0xff) + + # vertex shader + vs = Shader(''' + VERT + DCL IN[0], POSITION, CONSTANT + DCL IN[1], COLOR, CONSTANT + DCL OUT[0], POSITION, CONSTANT + DCL OUT[1], COLOR, CONSTANT + 0:MOV OUT[0], IN[0] + 1:MOV OUT[1], IN[1] + 2:END + ''') + ctx.set_vertex_shader(vs) + + gs = Shader(''' + GEOM + PROPERTY GS_INPUT_PRIMITIVE TRIANGLES + PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP + DCL IN[][0], POSITION, CONSTANT + DCL IN[][1], COLOR, CONSTANT + DCL OUT[0], POSITION, CONSTANT + DCL OUT[1], COLOR, CONSTANT + 0:MOV OUT[0], IN[0][0] + 1:MOV OUT[1], IN[0][1] + 2:EMIT + 3:MOV OUT[0], IN[1][0] + 4:MOV OUT[1], IN[1][1] + 5:EMIT + 6:MOV OUT[0], IN[2][0] + 7:MOV OUT[1], IN[2][1] + 8:EMIT + 9:ENDPRIM + 10:END + ''') + ctx.set_geometry_shader(gs) + + # fragment shader + fs = Shader(''' + FRAG + DCL IN[0], COLOR, LINEAR + DCL OUT[0], COLOR, CONSTANT + 0:MOV OUT[0], IN[0] + 1:END + ''') + ctx.set_fragment_shader(fs) + + nverts = 3 + nattrs = 2 + verts = FloatArray(nverts * nattrs * 4) + + verts[ 0] = 0.0 # x1 + verts[ 1] = 0.8 # y1 + verts[ 2] = 0.2 # z1 + verts[ 3] = 1.0 # w1 + verts[ 4] = 1.0 # r1 + verts[ 5] = 0.0 # g1 + verts[ 6] = 0.0 # b1 + verts[ 7] = 1.0 # a1 + verts[ 8] = -0.8 # x2 + verts[ 9] = -0.8 # y2 + verts[10] = 0.5 # z2 + verts[11] = 1.0 # w2 + verts[12] = 0.0 # r2 + verts[13] = 1.0 # g2 + verts[14] = 0.0 # b2 + verts[15] = 1.0 # a2 + verts[16] = 0.8 # x3 + verts[17] = -0.8 # y3 + verts[18] = 0.8 # z3 + verts[19] = 1.0 # w3 + verts[20] = 0.0 # r3 + verts[21] = 0.0 # g3 + verts[22] = 1.0 # b3 + verts[23] = 1.0 # a3 + + ctx.draw_vertices(PIPE_PRIM_TRIANGLES, + nverts, + nattrs, + verts) + + ctx.flush() + + show_image(cbuf) + #show_image(zbuf) + #save_image('cbuf.png', cbuf) + #save_image('zbuf.png', zbuf) + + + +def main(): + dev = Device() + test(dev) + + +if __name__ == '__main__': + main() diff --git a/src/gallium/state_trackers/python/samples/tri.py b/src/gallium/state_trackers/python/samples/tri.py index b721e0b5750..af80426dc6c 100644 --- a/src/gallium/state_trackers/python/samples/tri.py +++ b/src/gallium/state_trackers/python/samples/tri.py @@ -118,7 +118,7 @@ def test(dev): sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.normalized_coords = 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # scissor scissor = Scissor() @@ -159,7 +159,7 @@ def test(dev): # vertex shader vs = Shader(''' - VERT1.1 + VERT DCL IN[0], POSITION, CONSTANT DCL IN[1], COLOR, CONSTANT DCL OUT[0], POSITION, CONSTANT @@ -172,7 +172,7 @@ def test(dev): # fragment shader fs = Shader(''' - FRAG1.1 + FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR, CONSTANT 0:MOV OUT[0], IN[0] diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index ea7d18738f6..d144af2447d 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -62,8 +62,9 @@ st_device_reference(struct st_device **ptr, struct st_device *st_dev) { struct st_device *old_dev = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &st_dev->reference)) + if (pipe_reference(&(*ptr)->reference, &st_dev->reference)) st_device_really_destroy(old_dev); + *ptr = st_dev; } @@ -134,7 +135,9 @@ st_context_destroy(struct st_context *st_ctx) st_ctx->pipe->destroy(st_ctx->pipe); for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) - pipe_texture_reference(&st_ctx->sampler_textures[i], NULL); + pipe_texture_reference(&st_ctx->fragment_sampler_textures[i], NULL); + for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + pipe_texture_reference(&st_ctx->vertex_sampler_textures[i], NULL); pipe_texture_reference(&st_ctx->default_texture, NULL); FREE(st_ctx); @@ -249,12 +252,9 @@ st_context_create(struct st_device *st_dev) memset( &templat, 0, sizeof( templat ) ); templat.target = PIPE_TEXTURE_2D; templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; - templat.block.size = 4; - templat.block.width = 1; - templat.block.height = 1; - templat.width[0] = 1; - templat.height[0] = 1; - templat.depth[0] = 1; + templat.width0 = 1; + templat.height0 = 1; + templat.depth0 = 1; templat.last_level = 0; st_ctx->default_texture = screen->texture_create( screen, &templat ); @@ -264,8 +264,8 @@ st_context_create(struct st_device *st_dev) 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, - st_ctx->default_texture->width[0], - st_ctx->default_texture->height[0]); + st_ctx->default_texture->width0, + st_ctx->default_texture->height0); if (transfer) { uint32_t *map; map = (uint32_t *) screen->transfer_map(screen, transfer); @@ -278,9 +278,12 @@ st_context_create(struct st_device *st_dev) } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - pipe_texture_reference(&st_ctx->sampler_textures[i], st_ctx->default_texture); + pipe_texture_reference(&st_ctx->fragment_sampler_textures[i], st_ctx->default_texture); + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + pipe_texture_reference(&st_ctx->vertex_sampler_textures[i], st_ctx->default_texture); - cso_set_sampler_textures(st_ctx->cso, PIPE_MAX_SAMPLERS, st_ctx->sampler_textures); + cso_set_sampler_textures(st_ctx->cso, PIPE_MAX_SAMPLERS, st_ctx->fragment_sampler_textures); + cso_set_vertex_sampler_textures(st_ctx->cso, PIPE_MAX_VERTEX_SAMPLERS, st_ctx->vertex_sampler_textures); } /* vertex shader */ diff --git a/src/gallium/state_trackers/python/st_device.h b/src/gallium/state_trackers/python/st_device.h index a246b6a1f25..f786e134118 100644 --- a/src/gallium/state_trackers/python/st_device.h +++ b/src/gallium/state_trackers/python/st_device.h @@ -57,9 +57,11 @@ struct st_context { void *vs; void *fs; + void *gs; struct pipe_texture *default_texture; - struct pipe_texture *sampler_textures[PIPE_MAX_SAMPLERS]; + struct pipe_texture *fragment_sampler_textures[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_sampler_textures[PIPE_MAX_VERTEX_SAMPLERS]; unsigned num_vertex_buffers; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c index 53a01891e12..96377414211 100644 --- a/src/gallium/state_trackers/python/st_sample.c +++ b/src/gallium/state_trackers/python/st_sample.c @@ -30,6 +30,7 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_tile.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -423,7 +424,6 @@ dxt5_rgba_data[] = { static INLINE void st_sample_dxt_pixel_block(enum pipe_format format, - const struct pipe_format_block *block, uint8_t *raw, float *rgba, unsigned rgba_stride, unsigned w, unsigned h) @@ -462,21 +462,21 @@ st_sample_dxt_pixel_block(enum pipe_format format, for(ch = 0; ch < 4; ++ch) rgba[y*rgba_stride + x*4 + ch] = (float)(data[i].rgba[y*4*4 + x*4 + ch])/255.0f; - memcpy(raw, data[i].raw, block->size); + memcpy(raw, data[i].raw, util_format_get_blocksize(format)); } static INLINE void st_sample_generic_pixel_block(enum pipe_format format, - const struct pipe_format_block *block, uint8_t *raw, float *rgba, unsigned rgba_stride, unsigned w, unsigned h) { unsigned i; unsigned x, y, ch; + int blocksize = util_format_get_blocksize(format); - for(i = 0; i < block->size; ++i) + for(i = 0; i < blocksize; ++i) raw[i] = (uint8_t)st_random(); @@ -503,7 +503,6 @@ st_sample_generic_pixel_block(enum pipe_format format, */ void st_sample_pixel_block(enum pipe_format format, - const struct pipe_format_block *block, void *raw, float *rgba, unsigned rgba_stride, unsigned w, unsigned h) @@ -513,11 +512,11 @@ st_sample_pixel_block(enum pipe_format format, case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: - st_sample_dxt_pixel_block(format, block, raw, rgba, rgba_stride, w, h); + st_sample_dxt_pixel_block(format, raw, rgba, rgba_stride, w, h); break; default: - st_sample_generic_pixel_block(format, block, raw, rgba, rgba_stride, w, h); + st_sample_generic_pixel_block(format, raw, rgba, rgba_stride, w, h); break; } } @@ -528,8 +527,8 @@ st_sample_surface(struct st_surface *surface, float *rgba) { struct pipe_texture *texture = surface->texture; struct pipe_screen *screen = texture->screen; - unsigned width = texture->width[surface->level]; - unsigned height = texture->height[surface->level]; + unsigned width = u_minify(texture->width0, surface->level); + unsigned height = u_minify(texture->height0, surface->level); uint rgba_stride = width * 4; struct pipe_transfer *transfer; void *raw; @@ -548,18 +547,23 @@ st_sample_surface(struct st_surface *surface, float *rgba) raw = screen->transfer_map(screen, transfer); if (raw) { - const struct pipe_format_block *block = &texture->block; + enum pipe_format format = texture->format; uint x, y; + int nblocksx = util_format_get_nblocksx(format, width); + int nblocksy = util_format_get_nblocksy(format, height); + int blockwidth = util_format_get_blockwidth(format); + int blockheight = util_format_get_blockheight(format); + int blocksize = util_format_get_blocksize(format); - for (y = 0; y < transfer->nblocksy; ++y) { - for (x = 0; x < transfer->nblocksx; ++x) { - st_sample_pixel_block(texture->format, - block, - (uint8_t *) raw + y * transfer->stride + x * block->size, - rgba + y * block->height * rgba_stride + x * block->width * 4, + + for (y = 0; y < nblocksy; ++y) { + for (x = 0; x < nblocksx; ++x) { + st_sample_pixel_block(format, + (uint8_t *) raw + y * transfer->stride + x * blocksize, + rgba + y * blockheight * rgba_stride + x * blockwidth * 4, rgba_stride, - MIN2(block->width, width - x*block->width), - MIN2(block->height, height - y*block->height)); + MIN2(blockwidth, width - x*blockwidth), + MIN2(blockheight, height - y*blockheight)); } } diff --git a/src/gallium/state_trackers/python/st_sample.h b/src/gallium/state_trackers/python/st_sample.h index 0a27083549f..888114d3021 100644 --- a/src/gallium/state_trackers/python/st_sample.h +++ b/src/gallium/state_trackers/python/st_sample.h @@ -35,7 +35,6 @@ void st_sample_pixel_block(enum pipe_format format, - const struct pipe_format_block *block, void *raw, float *rgba, unsigned rgba_stride, unsigned w, unsigned h); diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index f0abd12e3dc..a3294e877a6 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -40,6 +40,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" @@ -157,16 +158,6 @@ st_softpipe_user_buffer_create(struct pipe_winsys *winsys, } -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - static struct pipe_buffer * st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, @@ -176,13 +167,10 @@ st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = round_up(nblocksx * block.size, alignment); + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); return winsys->buffer_create(winsys, alignment, usage, diff --git a/src/gallium/state_trackers/python/tests/base.py b/src/gallium/state_trackers/python/tests/base.py index 202ccfc3509..b022d073fda 100755 --- a/src/gallium/state_trackers/python/tests/base.py +++ b/src/gallium/state_trackers/python/tests/base.py @@ -47,7 +47,7 @@ for name, value in globals().items(): formats[value] = name def is_depth_stencil_format(format): - # FIXME: make and use binding to pf_is_depth_stencil + # FIXME: make and use binding to util_format_is_depth_or_stencil return format in ( PIPE_FORMAT_Z32_UNORM, PIPE_FORMAT_Z24S8_UNORM, diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-abs.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-abs.sh index 7a0006bf660..103d7497f48 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-abs.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-abs.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-add.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-add.sh index f7836c85ddb..bcb94205963 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-add.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-add.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dp3.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dp3.sh index c89cd748a80..b5281975d4a 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dp3.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dp3.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dp4.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dp4.sh index 6517e3c4945..d59df76e70b 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dp4.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dp4.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dst.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dst.sh index 464880ba68f..fbb20fa9f62 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dst.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-dst.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-ex2.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-ex2.sh index 2684076f1d6..b511288f4b6 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-ex2.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-ex2.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-flr.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-flr.sh index ad11e28918a..99a2f96103a 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-flr.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-flr.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-frc.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-frc.sh index 4f3aa30d666..a54c2623b0a 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-frc.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-frc.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lg2.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lg2.sh index 54c7c644598..5f5b4be1092 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lg2.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lg2.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lit.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lit.sh index 0e78ef86b54..6323c4712dc 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lit.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lit.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lrp.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lrp.sh index e9ee0f81478..740809d22e0 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lrp.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-lrp.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mad.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mad.sh index 439acd5bbde..413b9dc3916 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mad.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mad.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-max.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-max.sh index ab21b245dd8..b69f2132612 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-max.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-max.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-min.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-min.sh index 969ae73d98e..df284f49e71 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-min.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-min.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mov.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mov.sh index 612975e0570..64af72f381b 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mov.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mov.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mul.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mul.sh index ed158b0fc69..bdd0b0026b9 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mul.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-mul.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-rcp.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-rcp.sh index cc9feef07e1..f4b611b26ab 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-rcp.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-rcp.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-rsq.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-rsq.sh index 695621fdc97..d1e9b0b53be 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-rsq.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-rsq.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-sge.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-sge.sh index 9505bc3c3e3..1f33fac4727 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-sge.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-sge.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-abs.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-abs.sh index 9cd4b682955..ecd19248c64 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-abs.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-abs.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-absneg.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-absneg.sh index acd6aa750dd..c2d99ddd15b 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-absneg.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-absneg.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-neg.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-neg.sh index ba1b61503b8..a08ab6d2dcb 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-neg.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-neg.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-swz.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-swz.sh index 192aa7bb26b..6110647d979 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-swz.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-srcmod-swz.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-sub.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-sub.sh index 83441fa8207..673fca139aa 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-sub.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-sub.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-xpd.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-xpd.sh index d6f66c4927a..6ec8b1184cc 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-xpd.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-xpd.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py index d60fb38d1ae..eed6cdd1e64 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py @@ -96,7 +96,7 @@ def test(dev, name): sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.normalized_coords = 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # scissor scissor = Scissor() @@ -131,7 +131,7 @@ def test(dev, name): # vertex shader vs = Shader(''' - VERT1.1 + VERT DCL IN[0], POSITION DCL IN[1], COLOR DCL OUT[0], POSITION diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-abs.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-abs.sh index f0d0d5de171..79c9ca69fb1 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-abs.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-abs.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-add.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-add.sh index 936c851c9dc..ca97ad05df0 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-add.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-add.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-arl.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-arl.sh index 7638e96346e..321140e89e8 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-arl.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-arl.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL OUT[0], POSITION diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-arr.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-arr.sh index 28ce6f9a0cf..d60ea46b367 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-arr.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-arr.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL OUT[0], POSITION diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dp3.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dp3.sh index b57d68520fc..caff622fe61 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dp3.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dp3.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dp4.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dp4.sh index 0eb31719c58..3dd2fd1c2f7 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dp4.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dp4.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dst.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dst.sh index dc5e0eb92e5..da9cc18dfc0 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dst.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-dst.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-ex2.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-ex2.sh index 34057af4e66..4637227e5c4 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-ex2.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-ex2.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-flr.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-flr.sh index 44ad708119d..aa80d6e394c 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-flr.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-flr.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL OUT[0], POSITION diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-frc.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-frc.sh index d179749de84..64d1a494e13 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-frc.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-frc.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL OUT[0], POSITION diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lg2.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lg2.sh index f6e08d087c1..5cf16fd1aa8 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lg2.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lg2.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lit.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lit.sh index da98f30928e..a4a752d4d23 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lit.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lit.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lrp.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lrp.sh index 8c262580e23..4bb5f3ec3f4 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lrp.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-lrp.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mad.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mad.sh index eb07a3bd565..daaa941f15f 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mad.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mad.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-max.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-max.sh index 2d8b1fe3bfb..af279ec7f4a 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-max.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-max.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-min.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-min.sh index 84af0e29051..46d886c55b8 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-min.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-min.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mov.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mov.sh index bcdec07c204..0ef91637e03 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mov.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mov.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mul.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mul.sh index f3b57c30382..d34f6cd6e3c 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mul.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-mul.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-rcp.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-rcp.sh index 78af589b5c0..cfb3ec37dc2 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-rcp.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-rcp.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-rsq.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-rsq.sh index 1675c7d5ff1..faf1e6e7d41 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-rsq.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-rsq.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-sge.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-sge.sh index 3d92cd5aaeb..6de1d071ef4 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-sge.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-sge.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-slt.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-slt.sh index 85c60ff4ec8..9a524229845 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-slt.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-slt.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-abs.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-abs.sh index 6db417a62e5..dc87ce4ae70 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-abs.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-abs.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-absneg.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-absneg.sh index fc832380520..d82eb08fd30 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-absneg.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-absneg.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-neg.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-neg.sh index ce4e90b5e10..e39bebcd9f1 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-neg.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-neg.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-swz.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-swz.sh index c03de4c674e..6f20552f212 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-swz.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-srcmod-swz.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-sub.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-sub.sh index a583b958284..0f9678b8a3e 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-sub.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-sub.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-xpd.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-xpd.sh index 8def8943b03..39d42ae2a0d 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-xpd.sh +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-xpd.sh @@ -1,4 +1,4 @@ -VERT1.1 +VERT DCL IN[0], POSITION DCL IN[1], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py index 472769f2592..41bebd0604a 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py @@ -96,7 +96,7 @@ def test(dev, name): sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.normalized_coords = 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # scissor scissor = Scissor() @@ -135,7 +135,7 @@ def test(dev, name): # fragment shader fs = Shader(''' - FRAG1.1 + FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR, CONSTANT 0:MOV OUT[0], IN[0] diff --git a/src/gallium/state_trackers/python/tests/surface_copy.py b/src/gallium/state_trackers/python/tests/surface_copy.py index 3ceecbbd3aa..df5babb78af 100755 --- a/src/gallium/state_trackers/python/tests/surface_copy.py +++ b/src/gallium/state_trackers/python/tests/surface_copy.py @@ -98,9 +98,10 @@ class TextureTest(TestCase): y = 0 w = dst_surface.width h = dst_surface.height - - stride = dst_surface.nblocksx * dst_texture.block.size - size = dst_surface.nblocksy * stride + + # ??? + stride = pf_get_stride(texture->format, w) + size = pf_get_nblocksy(texture->format) * stride src_raw = os.urandom(size) src_surface.put_tile_raw(0, 0, w, h, src_raw, stride) diff --git a/src/gallium/state_trackers/python/tests/texture_render.py b/src/gallium/state_trackers/python/tests/texture_render.py index 0b76932b6ed..79287f2cace 100755 --- a/src/gallium/state_trackers/python/tests/texture_render.py +++ b/src/gallium/state_trackers/python/tests/texture_render.py @@ -144,8 +144,8 @@ class TextureTest(TestCase): sampler.normalized_coords = 1 sampler.min_lod = 0 sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 - ctx.set_sampler(0, sampler) - ctx.set_sampler_texture(0, src_texture) + ctx.set_fragment_sampler(0, sampler) + ctx.set_fragment_sampler_texture(0, src_texture) # framebuffer cbuf_tex = dev.texture_create( @@ -171,7 +171,7 @@ class TextureTest(TestCase): # vertex shader vs = Shader(''' - VERT1.1 + VERT DCL IN[0], POSITION, CONSTANT DCL IN[1], GENERIC, CONSTANT DCL OUT[0], POSITION, CONSTANT @@ -185,7 +185,7 @@ class TextureTest(TestCase): # fragment shader fs = Shader(''' - FRAG1.1 + FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR, CONSTANT DCL SAMP[0], CONSTANT diff --git a/src/gallium/state_trackers/python/tests/texture_sample.py b/src/gallium/state_trackers/python/tests/texture_sample.py index c7b78abbbec..520961c8051 100755 --- a/src/gallium/state_trackers/python/tests/texture_sample.py +++ b/src/gallium/state_trackers/python/tests/texture_sample.py @@ -169,7 +169,7 @@ class TextureColorSampleTest(TestCase): sampler.normalized_coords = 1 sampler.min_lod = 0 sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # texture texture = dev.texture_create( @@ -189,7 +189,7 @@ class TextureColorSampleTest(TestCase): zslice = zslice, ).sample_rgba(expected_rgba) - ctx.set_sampler_texture(0, texture) + ctx.set_fragment_sampler_texture(0, texture) # framebuffer cbuf_tex = dev.texture_create( @@ -216,7 +216,7 @@ class TextureColorSampleTest(TestCase): # vertex shader vs = Shader(''' - VERT1.1 + VERT DCL IN[0], POSITION, CONSTANT DCL IN[1], GENERIC, CONSTANT DCL OUT[0], POSITION, CONSTANT @@ -236,7 +236,7 @@ class TextureColorSampleTest(TestCase): PIPE_TEXTURE_CUBE: "CUBE", }[target] fs = Shader(''' - FRAG1.1 + FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR, CONSTANT DCL SAMP[0], CONSTANT @@ -359,7 +359,7 @@ class TextureDepthSampleTest(TestCase): sampler.normalized_coords = 1 sampler.min_lod = 0 sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # texture texture = dev.texture_create( @@ -379,7 +379,7 @@ class TextureDepthSampleTest(TestCase): zslice = zslice, ).sample_rgba(expected_rgba) - ctx.set_sampler_texture(0, texture) + ctx.set_fragment_sampler_texture(0, texture) # framebuffer cbuf_tex = dev.texture_create( @@ -415,7 +415,7 @@ class TextureDepthSampleTest(TestCase): # vertex shader vs = Shader(''' - VERT1.1 + VERT DCL IN[0], POSITION, CONSTANT DCL IN[1], GENERIC, CONSTANT DCL OUT[0], POSITION, CONSTANT @@ -435,7 +435,7 @@ class TextureDepthSampleTest(TestCase): PIPE_TEXTURE_CUBE: "CUBE", }[target] fs = Shader(''' - FRAG1.1 + FRAG DCL IN[0], GENERIC[0], LINEAR DCL SAMP[0], CONSTANT DCL OUT[0].z, POSITION diff --git a/src/gallium/state_trackers/python/tests/texture_transfer.py b/src/gallium/state_trackers/python/tests/texture_transfer.py index e65b425adf4..35daca9e498 100755 --- a/src/gallium/state_trackers/python/tests/texture_transfer.py +++ b/src/gallium/state_trackers/python/tests/texture_transfer.py @@ -86,8 +86,9 @@ class TextureTest(TestCase): surface = texture.get_surface(face, level, zslice) - stride = surface.nblocksx * texture.block.size - size = surface.nblocksy * stride + # ??? + stride = pf_get_stride(texture->format, w) + size = pf_get_nblocksy(texture->format) * stride in_raw = os.urandom(size) diff --git a/src/gallium/state_trackers/vega/Makefile b/src/gallium/state_trackers/vega/Makefile index b8c805b06c4..fc97bf51f8f 100644 --- a/src/gallium/state_trackers/vega/Makefile +++ b/src/gallium/state_trackers/vega/Makefile @@ -61,14 +61,7 @@ VG_MINOR = 0 VG_TINY = 0 GALLIUM_LIBS = \ - $(GALLIUM)/src/gallium/auxiliary/pipebuffer/libpipebuffer.a \ - $(GALLIUM)/src/gallium/auxiliary/sct/libsct.a \ - $(GALLIUM)/src/gallium/auxiliary/draw/libdraw.a \ - $(GALLIUM)/src/gallium/auxiliary/rtasm/librtasm.a \ - $(GALLIUM)/src/gallium/auxiliary/translate/libtranslate.a \ - $(GALLIUM)/src/gallium/auxiliary/cso_cache/libcso_cache.a \ - $(GALLIUM)/src/gallium/auxiliary/util/libutil.a \ - $(GALLIUM)/src/gallium/auxiliary/tgsi/libtgsi.a + $(GALLIUM)/src/gallium/auxiliary/libgallium.a .SUFFIXES : .cpp diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c index 862cbb03c43..2f984fb7b9a 100644 --- a/src/gallium/state_trackers/vega/api_filters.c +++ b/src/gallium/state_trackers/vega/api_filters.c @@ -38,6 +38,7 @@ #include "pipe/p_screen.h" #include "pipe/p_shader_tokens.h" +#include "util/u_format.h" #include "util/u_memory.h" @@ -68,10 +69,9 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx, templ.target = PIPE_TEXTURE_1D; templ.format = PIPE_FORMAT_A8R8G8B8_UNORM; templ.last_level = 0; - templ.width[0] = color_data_len; - templ.height[0] = 1; - templ.depth[0] = 1; - pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block); + templ.width0 = color_data_len; + templ.height0 = 1; + templ.depth0 = 1; templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; tex = screen->texture_create(screen, &templ); @@ -81,7 +81,7 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx, screen->get_tex_transfer(screen, tex, 0, 0, 0, PIPE_TRANSFER_READ_WRITE , - 0, 0, tex->width[0], tex->height[0]); + 0, 0, tex->width0, tex->height0); void *map = screen->transfer_map(screen, transfer); memcpy(map, color_data, sizeof(VGint)*color_data_len); screen->transfer_unmap(screen, transfer); diff --git a/src/gallium/state_trackers/vega/asm_filters.h b/src/gallium/state_trackers/vega/asm_filters.h index 9a49f2e12d0..60bed197e28 100644 --- a/src/gallium/state_trackers/vega/asm_filters.h +++ b/src/gallium/state_trackers/vega/asm_filters.h @@ -28,7 +28,7 @@ #define ASM_FILTERS_H static const char color_matrix_asm[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], GENERIC[0], PERSPECTIVE\n" "DCL OUT[0], COLOR, CONSTANT\n" "DCL CONST[0..4], CONSTANT\n" @@ -51,7 +51,7 @@ static const char color_matrix_asm[] = "END\n"; static const char convolution_asm[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], GENERIC[0], PERSPECTIVE\n" "DCL OUT[0], COLOR, CONSTANT\n" "DCL TEMP[0..4], CONSTANT\n" @@ -78,7 +78,7 @@ static const char convolution_asm[] = static const char lookup_asm[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], GENERIC[0], PERSPECTIVE\n" "DCL OUT[0], COLOR, CONSTANT\n" "DCL TEMP[0..2], CONSTANT\n" @@ -103,7 +103,7 @@ static const char lookup_asm[] = static const char lookup_single_asm[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], GENERIC[0], PERSPECTIVE\n" "DCL OUT[0], COLOR, CONSTANT\n" "DCL TEMP[0..2], CONSTANT\n" diff --git a/src/gallium/state_trackers/vega/asm_util.h b/src/gallium/state_trackers/vega/asm_util.h index 218e1d166db..903bfc88a4d 100644 --- a/src/gallium/state_trackers/vega/asm_util.h +++ b/src/gallium/state_trackers/vega/asm_util.h @@ -29,7 +29,7 @@ static const char pass_through_depth_asm[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], POSITION, LINEAR\n" "DCL OUT[0].z, POSITION, CONSTANT\n" "0: MOV OUT[0].z, IN[0].zzzz\n" @@ -39,7 +39,7 @@ static const char pass_through_depth_asm[] = /* μnew = μmask */ static const char set_mask_asm[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], GENERIC[0], PERSPECTIVE\n" "DCL SAMP[0], CONSTANT\n" "DCL OUT[0], COLOR, CONSTANT\n" @@ -48,7 +48,7 @@ static const char set_mask_asm[] = /* μnew = 1 – (1 – μmask)*(1 – μprev) */ static const char union_mask_asm[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], GENERIC[0], PERSPECTIVE\n" "DCL IN[1], POSITION, LINEAR\n" "DCL CONST[0], CONSTANT\n" @@ -65,7 +65,7 @@ static const char union_mask_asm[] = /* μnew = μmask *μprev */ static const char intersect_mask_asm[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], GENERIC[0], PERSPECTIVE\n" "DCL IN[1], POSITION, LINEAR\n" "DCL CONST[0], CONSTANT\n" @@ -79,7 +79,7 @@ static const char intersect_mask_asm[] = /* μnew = μprev*(1 – μmask) */ static const char subtract_mask_asm[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], GENERIC[0], PERSPECTIVE\n" "DCL IN[1], POSITION, LINEAR\n" "DCL CONST[0], CONSTANT\n" @@ -94,7 +94,7 @@ static const char subtract_mask_asm[] = static const char vs_plain_asm[] = - "VERT1.1\n" + "VERT\n" "DCL IN[0]\n" "DCL OUT[0], POSITION\n" "DCL TEMP[0]\n" @@ -105,7 +105,7 @@ static const char vs_plain_asm[] = "3: END\n"; static const char vs_clear_asm[] = - "VERT1.1\n" + "VERT\n" "DCL IN[0]\n" "DCL IN[1]\n" "DCL OUT[0], POSITION\n" @@ -120,7 +120,7 @@ static const char vs_clear_asm[] = static const char vs_texture_asm[] = - "VERT1.1\n" + "VERT\n" "DCL IN[0]\n" "DCL IN[1]\n" "DCL OUT[0], POSITION\n" diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 9a722980d52..1112ad9839d 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -39,6 +39,7 @@ #include "pipe/p_screen.h" #include "pipe/p_inlines.h" #include "util/u_blit.h" +#include "util/u_format.h" #include "util/u_tile.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -93,8 +94,8 @@ static void vg_copy_texture(struct vg_context *ctx, dst_loc[3] = height; dst_bounds[0] = 0.f; dst_bounds[1] = 0.f; - dst_bounds[2] = dst->width[0]; - dst_bounds[3] = dst->height[0]; + dst_bounds[2] = dst->width0; + dst_bounds[3] = dst->height0; src_loc[0] = sx; src_loc[1] = sy; @@ -102,8 +103,8 @@ static void vg_copy_texture(struct vg_context *ctx, src_loc[3] = height; src_bounds[0] = 0.f; src_bounds[1] = 0.f; - src_bounds[2] = src->width[0]; - src_bounds[3] = src->height[0]; + src_bounds[2] = src->width0; + src_bounds[3] = src->height0; vg_bound_rect(src_loc, src_bounds, src_shift); vg_bound_rect(dst_loc, dst_bounds, dst_shift); @@ -270,11 +271,10 @@ struct vg_image * image_create(VGImageFormat format, memset(&pt, 0, sizeof(pt)); pt.target = PIPE_TEXTURE_2D; pt.format = pformat; - pf_get_block(pformat, &pt.block); pt.last_level = 0; - pt.width[0] = width; - pt.height[0] = height; - pt.depth[0] = 1; + pt.width0 = width; + pt.height0 = height; + pt.depth0 = 1; pt.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; newtex = screen->texture_create(screen, &pt); @@ -414,7 +414,7 @@ void image_sub_data(struct vg_image *image, { /* upload color_data */ struct pipe_transfer *transfer = screen->get_tex_transfer( screen, texture, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, texture->width[0], texture->height[0]); + PIPE_TRANSFER_WRITE, 0, 0, texture->width0, texture->height0); src += (dataStride * yoffset); for (i = 0; i < height; i++) { _vega_unpack_float_span_rgba(ctx, width, xoffset, src, dataFormat, temp); @@ -644,7 +644,7 @@ VGint image_sampler_filter(struct vg_context *ctx) return PIPE_TEX_FILTER_NEAREST; break; case VG_IMAGE_QUALITY_BETTER: - /*return PIPE_TEX_FILTER_ANISO;*/ + /* possibly use anisotropic filtering */ return PIPE_TEX_FILTER_LINEAR; break; default: diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c index 24650a37d50..42300bb6d57 100644 --- a/src/gallium/state_trackers/vega/mask.c +++ b/src/gallium/state_trackers/vega/mask.c @@ -36,6 +36,7 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_memory.h" struct vg_mask_layer { @@ -426,7 +427,7 @@ static void mask_using_texture(struct pipe_texture *texture, if (!surface) return; if (!intersect_rectangles(surface->width, surface->height, - texture->width[0], texture->height[0], + texture->width0, texture->height0, x, y, width, height, offsets, loc)) return; @@ -491,11 +492,10 @@ struct vg_mask_layer * mask_layer_create(VGint width, VGint height) memset(&pt, 0, sizeof(pt)); pt.target = PIPE_TEXTURE_2D; pt.format = PIPE_FORMAT_A8R8G8B8_UNORM; - pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &pt.block); pt.last_level = 0; - pt.width[0] = width; - pt.height[0] = height; - pt.depth[0] = 1; + pt.width0 = width; + pt.height0 = height; + pt.depth0 = 1; pt.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; pt.compressed = 0; @@ -607,8 +607,8 @@ void mask_render_to(struct path *path, struct vg_mask_layer *temp_layer; VGint width, height; - width = fb_buffers->alpha_mask->width[0]; - height = fb_buffers->alpha_mask->width[0]; + width = fb_buffers->alpha_mask->width0; + height = fb_buffers->alpha_mask->width0; temp_layer = mask_layer_create(width, height); diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index 04a6ba9cdcd..cc73771d358 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -34,6 +34,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -151,10 +152,9 @@ static INLINE struct pipe_texture *create_gradient_texture(struct vg_paint *p) templ.target = PIPE_TEXTURE_1D; templ.format = PIPE_FORMAT_A8R8G8B8_UNORM; templ.last_level = 0; - templ.width[0] = 1024; - templ.height[0] = 1; - templ.depth[0] = 1; - pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block); + templ.width0 = 1024; + templ.height0 = 1; + templ.depth0 = 1; templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; tex = screen->texture_create(screen, &templ); @@ -328,8 +328,8 @@ static INLINE void paint_pattern_buffer(struct vg_paint *paint, void *buffer) map[4] = 0.f; map[5] = 1.f; - map[6] = paint->pattern.texture->width[0]; - map[7] = paint->pattern.texture->height[0]; + map[6] = paint->pattern.texture->width0; + map[7] = paint->pattern.texture->height0; { struct matrix mat; memcpy(&mat, &ctx->state.vg.fill_paint_to_user_matrix, diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index 396c88aa3d2..64e3a7c5453 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -35,6 +35,7 @@ #include "pipe/p_shader_tokens.h" #include "util/u_draw_quad.h" +#include "util/u_format.h" #include "util/u_simple_shaders.h" #include "util/u_memory.h" #include "util/u_rect.h" @@ -56,7 +57,7 @@ static void setup_shaders(struct renderer *ctx) { struct pipe_context *pipe = ctx->pipe; /* fragment shader */ - ctx->fs = util_make_fragment_tex_shader(pipe); + ctx->fs = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D); } static struct pipe_buffer * @@ -230,13 +231,13 @@ void renderer_draw_texture(struct renderer *r, struct pipe_buffer *buf; VGfloat s0, t0, s1, t1; - assert(tex->width[0] != 0); - assert(tex->height[0] != 0); + assert(tex->width0 != 0); + assert(tex->height0 != 0); - s0 = x1offset / tex->width[0]; - s1 = x2offset / tex->width[0]; - t0 = y1offset / tex->height[0]; - t1 = y2offset / tex->height[0]; + s0 = x1offset / tex->width0; + s1 = x2offset / tex->width0; + t0 = y1offset / tex->height0; + t1 = y2offset / tex->height0; cso_save_vertex_shader(r->cso); /* shaders */ @@ -276,10 +277,10 @@ void renderer_copy_texture(struct renderer *ctx, struct pipe_framebuffer_state fb; float s0, t0, s1, t1; - assert(src->width[0] != 0); - assert(src->height[0] != 0); - assert(dst->width[0] != 0); - assert(dst->height[0] != 0); + assert(src->width0 != 0); + assert(src->height0 != 0); + assert(dst->width0 != 0); + assert(dst->height0 != 0); #if 0 debug_printf("copy texture [%f, %f, %f, %f], [%f, %f, %f, %f]\n", @@ -287,10 +288,10 @@ void renderer_copy_texture(struct renderer *ctx, #endif #if 1 - s0 = sx1 / src->width[0]; - s1 = sx2 / src->width[0]; - t0 = sy1 / src->height[0]; - t1 = sy2 / src->height[0]; + s0 = sx1 / src->width0; + s1 = sx2 / src->width0; + t0 = sy1 / src->height0; + t1 = sy2 / src->height0; #else s0 = 0; s1 = 1; @@ -445,10 +446,9 @@ void renderer_copy_surface(struct renderer *ctx, texTemp.target = PIPE_TEXTURE_2D; texTemp.format = src->format; texTemp.last_level = 0; - texTemp.width[0] = srcW; - texTemp.height[0] = srcH; - texTemp.depth[0] = 1; - pf_get_block(src->format, &texTemp.block); + texTemp.width0 = srcW; + texTemp.height0 = srcH; + texTemp.depth0 = 1; tex = screen->texture_create(screen, &texTemp); if (!tex) @@ -570,13 +570,13 @@ void renderer_texture_quad(struct renderer *r, struct pipe_buffer *buf; VGfloat s0, t0, s1, t1; - assert(tex->width[0] != 0); - assert(tex->height[0] != 0); + assert(tex->width0 != 0); + assert(tex->height0 != 0); - s0 = x1offset / tex->width[0]; - s1 = x2offset / tex->width[0]; - t0 = y1offset / tex->height[0]; - t1 = y2offset / tex->height[0]; + s0 = x1offset / tex->width0; + s1 = x2offset / tex->width0; + t0 = y1offset / tex->height0; + t1 = y2offset / tex->height0; cso_save_vertex_shader(r->cso); /* shaders */ diff --git a/src/gallium/state_trackers/vega/shaders_cache.c b/src/gallium/state_trackers/vega/shaders_cache.c index fd0831fab1f..f620075d0bc 100644 --- a/src/gallium/state_trackers/vega/shaders_cache.c +++ b/src/gallium/state_trackers/vega/shaders_cache.c @@ -97,7 +97,7 @@ static INLINE struct tgsi_token *tokens_from_assembly(const char *txt, int num_t /* static const char max_shader_preamble[] = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], POSITION, LINEAR\n" "DCL IN[1], GENERIC[0], PERSPECTIVE\n" "DCL OUT[0], COLOR, CONSTANT\n" @@ -168,7 +168,7 @@ create_preamble(char *txt, --end_temp; --end_sampler; - sprintf(txt, "FRAG1.1\n"); + sprintf(txt, "FRAG\n"); if (declare_input) { sprintf(txt + strlen(txt), "DCL IN[0], POSITION, LINEAR\n"); diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c index a8ab9397f90..e5039132758 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.c +++ b/src/gallium/state_trackers/vega/vg_tracker.c @@ -31,6 +31,7 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" #include "pipe/p_screen.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_rect.h" @@ -51,13 +52,12 @@ create_texture(struct pipe_context *pipe, enum pipe_format format, } templ.target = PIPE_TEXTURE_2D; - pf_get_block(templ.format, &templ.block); - templ.width[0] = width; - templ.height[0] = height; - templ.depth[0] = 1; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; templ.last_level = 0; - if (pf_get_component_bits(format, PIPE_FORMAT_COMP_S)) { + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) { templ.tex_usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL; } else { templ.tex_usage = (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | diff --git a/src/gallium/state_trackers/vega/vg_translate.c b/src/gallium/state_trackers/vega/vg_translate.c index 00e07647062..03575ca3ddd 100644 --- a/src/gallium/state_trackers/vega/vg_translate.c +++ b/src/gallium/state_trackers/vega/vg_translate.c @@ -474,6 +474,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, VGfloat rgba[][4]) { VGint i; + union util_color uc; switch (dataFormat) { case VG_sRGBX_8888: { @@ -486,8 +487,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -502,8 +506,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -519,8 +526,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -536,8 +546,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = ((*src >> 0) & 31)/31.; clr[3] = 1.f; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -552,8 +565,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = ((*src >> 1) & 31)/31.; clr[3] = ((*src >> 0) & 1)/1.; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -568,8 +584,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = ((*src >> 4) & 15)/15.; clr[3] = ((*src >> 0) & 15)/15.; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -578,8 +597,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, VGubyte *src = (VGubyte *)data; src += offset; for (i = 0; i < n; ++i) { - util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -594,8 +616,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -610,8 +635,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -627,8 +655,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -638,8 +669,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, VGubyte *src = (VGubyte *)data; src += offset; for (i = 0; i < n; ++i) { - util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -648,8 +682,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, VGubyte *src = (VGubyte *)data; src += offset; for (i = 0; i < n; ++i) { - util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -667,8 +704,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = clr[0]; clr[3] = 1.f; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i+j]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i+j][0] = uc.f[0]; + rgba[i+j][1] = uc.f[1]; + rgba[i+j][2] = uc.f[2]; + rgba[i+j][3] = uc.f[3]; } ++src; } @@ -688,8 +728,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = 0.f; clr[3] = (((*src) & (1<<shift)) >> shift); - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i+j]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i+j][0] = uc.f[0]; + rgba[i+j][1] = uc.f[1]; + rgba[i+j][2] = uc.f[2]; + rgba[i+j][3] = uc.f[3]; } ++src; } @@ -715,8 +758,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = 0.f; clr[3] = ((*src) & (bitter)) >> shift; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i +j]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i+j][0] = uc.f[0]; + rgba[i+j][1] = uc.f[1]; + rgba[i+j][2] = uc.f[2]; + rgba[i+j][3] = uc.f[3]; } ++src; } @@ -735,8 +781,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; b = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -752,8 +801,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; b = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -775,8 +827,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; b = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -792,8 +847,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; b = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -811,8 +869,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -828,8 +889,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -853,8 +917,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -870,8 +937,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -889,8 +959,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; r = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -906,8 +979,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; r = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -929,8 +1005,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; r = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -946,8 +1025,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; r = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript index b05944a33b3..352c087475e 100644 --- a/src/gallium/state_trackers/wgl/SConscript +++ b/src/gallium/state_trackers/wgl/SConscript @@ -11,10 +11,11 @@ if env['platform'] in ['windows']: '.', ]) - env.Append(CPPDEFINES = [ + env.AppendUnique(CPPDEFINES = [ '_GDI32_', # prevent wgl* being declared __declspec(dllimport) 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers 'WIN32_THREADS', # use Win32 thread API + 'WIN32_LEAN_AND_MEAN', # http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx ]) sources = [ diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c index 6d095019815..129a6298a77 100644 --- a/src/gallium/state_trackers/wgl/stw_framebuffer.c +++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c @@ -30,6 +30,7 @@ #include "main/context.h" #include "pipe/p_format.h" #include "pipe/p_screen.h" +#include "util/u_format.h" #include "state_tracker/st_context.h" #include "state_tracker/st_public.h" @@ -267,15 +268,13 @@ stw_framebuffer_allocate( enum pipe_format colorFormat, depthFormat, stencilFormat; colorFormat = pfi->color_format; - - assert(pf_layout( pfi->depth_stencil_format ) == PIPE_FORMAT_LAYOUT_RGBAZS ); - - if(pf_get_component_bits( pfi->depth_stencil_format, PIPE_FORMAT_COMP_Z )) + + if(util_format_get_component_bits(pfi->depth_stencil_format, UTIL_FORMAT_COLORSPACE_ZS, 0)) depthFormat = pfi->depth_stencil_format; else depthFormat = PIPE_FORMAT_NONE; - if(pf_get_component_bits( pfi->depth_stencil_format, PIPE_FORMAT_COMP_S )) + if(util_format_get_component_bits(pfi->depth_stencil_format, UTIL_FORMAT_COLORSPACE_ZS, 1)) stencilFormat = pfi->depth_stencil_format; else stencilFormat = PIPE_FORMAT_NONE; diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c index 7abe5d9f7fa..54cc3614129 100644 --- a/src/gallium/state_trackers/wgl/stw_pixelformat.c +++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_screen.h" +#include "util/u_format.h" #include "util/u_debug.h" #include "stw_icd.h" @@ -132,14 +133,12 @@ stw_pixelformat_add( if(stw_dev->pixelformat_extended_count >= STW_MAX_PIXELFORMATS) return; - assert(pf_layout( color->format ) == PIPE_FORMAT_LAYOUT_RGBAZS ); - assert(pf_get_component_bits( color->format, PIPE_FORMAT_COMP_R ) == color->bits.red ); - assert(pf_get_component_bits( color->format, PIPE_FORMAT_COMP_G ) == color->bits.green ); - assert(pf_get_component_bits( color->format, PIPE_FORMAT_COMP_B ) == color->bits.blue ); - assert(pf_get_component_bits( color->format, PIPE_FORMAT_COMP_A ) == color->bits.alpha ); - assert(pf_layout( depth->format ) == PIPE_FORMAT_LAYOUT_RGBAZS ); - assert(pf_get_component_bits( depth->format, PIPE_FORMAT_COMP_Z ) == depth->bits.depth ); - assert(pf_get_component_bits( depth->format, PIPE_FORMAT_COMP_S ) == depth->bits.stencil ); + assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 0) == color->bits.red); + assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 1) == color->bits.green); + assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 2) == color->bits.blue); + assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 3) == color->bits.alpha); + assert(util_format_get_component_bits(depth->format, UTIL_FORMAT_COLORSPACE_ZS, 0) == depth->bits.depth); + assert(util_format_get_component_bits(depth->format, UTIL_FORMAT_COLORSPACE_ZS, 1) == depth->bits.stencil); pfi = &stw_dev->pixelformats[stw_dev->pixelformat_extended_count]; diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index a5975aad515..1c248a629e6 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -423,8 +423,6 @@ bind_samplers(struct exa_context *exa, int op, - - static INLINE boolean matrix_from_pict_transform(PictTransform *trans, float *matrix) { if (!trans) diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index 0160b1aa59f..650d2c0d1db 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -50,6 +50,7 @@ #endif #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_rect.h" #ifdef HAVE_LIBKMS @@ -204,11 +205,10 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image) templat.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY; templat.target = PIPE_TEXTURE_2D; templat.last_level = 0; - templat.depth[0] = 1; + templat.depth0 = 1; templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; - templat.width[0] = 64; - templat.height[0] = 64; - pf_get_block(templat.format, &templat.block); + templat.width0 = 64; + templat.height0 = 64; crtcp->cursor_tex = ms->screen->texture_create(ms->screen, &templat); @@ -224,7 +224,7 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image) PIPE_TRANSFER_WRITE, 0, 0, 64, 64); ptr = ms->screen->transfer_map(ms->screen, transfer); - util_copy_rect(ptr, &crtcp->cursor_tex->block, + util_copy_rect(ptr, crtcp->cursor_tex->format, transfer->stride, 0, 0, 64, 64, (void*)image, 64 * 4, 0, 0); ms->screen->transfer_unmap(ms->screen, transfer); diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 4fa47548a43..fd82f4fa1d1 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -40,6 +40,7 @@ #include "pipe/p_state.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_rect.h" /* Make all the #if cases in the code esier to read */ @@ -92,7 +93,7 @@ dri2_do_create_buffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int form case 9: #endif if (exa_priv->depth_stencil_tex && - !pf_is_depth_stencil(exa_priv->depth_stencil_tex->format)) + !util_format_is_depth_or_stencil(exa_priv->depth_stencil_tex->format)) exa_priv->depth_stencil_tex = NULL; /* Fall through */ case DRI2BufferDepth: @@ -108,10 +109,9 @@ dri2_do_create_buffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int form else template.format = ms->ds_depth_bits_last ? PIPE_FORMAT_S8Z24_UNORM : PIPE_FORMAT_Z24S8_UNORM; - pf_get_block(template.format, &template.block); - template.width[0] = pDraw->width; - template.height[0] = pDraw->height; - template.depth[0] = 1; + template.width0 = pDraw->width; + template.height0 = pDraw->height; + template.depth0 = 1; template.last_level = 0; template.tex_usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL | PIPE_TEXTURE_USAGE_DISPLAY_TARGET; diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index 4e7882551dd..aa68570b9c0 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -43,6 +43,7 @@ #include "pipe/p_state.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_rect.h" #include "util/u_math.h" #include "util/u_debug.h" @@ -202,7 +203,7 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst, x, y, w, h, dst_pitch); #endif - util_copy_rect((unsigned char*)dst, &priv->tex->block, dst_pitch, 0, 0, + util_copy_rect((unsigned char*)dst, priv->tex->format, dst_pitch, 0, 0, w, h, exa->scrn->transfer_map(exa->scrn, transfer), transfer->stride, 0, 0); @@ -242,7 +243,7 @@ ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src, #endif util_copy_rect(exa->scrn->transfer_map(exa->scrn, transfer), - &priv->tex->block, transfer->stride, 0, 0, w, h, + priv->tex->format, transfer->stride, 0, 0, w, h, (unsigned char*)src, src_pitch, 0, 0); exa->scrn->transfer_unmap(exa->scrn, transfer); @@ -274,8 +275,8 @@ ExaPrepareAccess(PixmapPtr pPix, int index) PIPE_REFERENCED_FOR_WRITE) exa->pipe->flush(exa->pipe, 0, NULL); - assert(pPix->drawable.width <= priv->tex->width[0]); - assert(pPix->drawable.height <= priv->tex->height[0]); + assert(pPix->drawable.width <= priv->tex->width0); + assert(pPix->drawable.height <= priv->tex->height0); priv->map_transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, @@ -532,8 +533,8 @@ ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, dstX, dstY, srcX, srcY, width, height, - exa->copy.src_texture->width[0], - exa->copy.src_texture->height[0]); + exa->copy.src_texture->width0, + exa->copy.src_texture->height0); } } @@ -875,8 +876,8 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, if (priv->tex) debug_printf(" ==> old texture %dx%d\n", - priv->tex->width[0], - priv->tex->height[0]); + priv->tex->width0, + priv->tex->height0); } @@ -904,8 +905,8 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, /* Deal with screen resize */ if ((exa->accel || priv->flags) && (!priv->tex || - !size_match(width, priv->tex->width[0]) || - !size_match(height, priv->tex->height[0]) || + !size_match(width, priv->tex->width0) || + !size_match(height, priv->tex->height0) || priv->tex_flags != priv->flags)) { struct pipe_texture *texture = NULL; struct pipe_texture template; @@ -913,17 +914,16 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, memset(&template, 0, sizeof(template)); template.target = PIPE_TEXTURE_2D; exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &priv->picture_format); - pf_get_block(template.format, &template.block); if (ROUND_UP_TEXTURES && priv->flags == 0) { - template.width[0] = util_next_power_of_two(width); - template.height[0] = util_next_power_of_two(height); + template.width0 = util_next_power_of_two(width); + template.height0 = util_next_power_of_two(height); } else { - template.width[0] = width; - template.height[0] = height; + template.width0 = width; + template.height0 = height; } - template.depth[0] = 1; + template.depth0 = 1; template.last_level = 0; template.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET | priv->flags; priv->tex_flags = priv->flags; @@ -938,12 +938,12 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, src_surf = xorg_gpu_surface(exa->pipe->screen, priv); if (exa->pipe->surface_copy) { exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf, - 0, 0, min(width, texture->width[0]), - min(height, texture->height[0])); + 0, 0, min(width, texture->width0), + min(height, texture->height0)); } else { util_surface_copy(exa->pipe, FALSE, dst_surf, 0, 0, src_surf, - 0, 0, min(width, texture->width[0]), - min(height, texture->height[0])); + 0, 0, min(width, texture->width0), + min(height, texture->height0)); } exa->scrn->tex_surface_destroy(dst_surf); exa->scrn->tex_surface_destroy(src_surf); @@ -976,8 +976,8 @@ xorg_exa_set_texture(PixmapPtr pPixmap, struct pipe_texture *tex) if (!priv) return FALSE; - if (pPixmap->drawable.width != tex->width[0] || - pPixmap->drawable.height != tex->height[0]) + if (pPixmap->drawable.width != tex->width0 || + pPixmap->drawable.height != tex->height0) return FALSE; pipe_texture_reference(&priv->tex, tex); @@ -999,10 +999,9 @@ xorg_exa_create_root_texture(ScrnInfoPtr pScrn, memset(&template, 0, sizeof(template)); template.target = PIPE_TEXTURE_2D; exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &dummy); - pf_get_block(template.format, &template.block); - template.width[0] = width; - template.height[0] = height; - template.depth[0] = 1; + template.width0 = width; + template.height0 = height; + template.depth0 = 1; template.last_level = 0; template.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET; template.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY; diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index 37c8942cff9..d80f341e6c2 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -5,6 +5,7 @@ #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_rect.h" @@ -170,14 +171,14 @@ add_vertex_data1(struct xorg_renderer *r, map_point(src_matrix, pt3[0], pt3[1], &pt3[0], &pt3[1]); } - s0 = pt0[0] / src->width[0]; - s1 = pt1[0] / src->width[0]; - s2 = pt2[0] / src->width[0]; - s3 = pt3[0] / src->width[0]; - t0 = pt0[1] / src->height[0]; - t1 = pt1[1] / src->height[0]; - t2 = pt2[1] / src->height[0]; - t3 = pt3[1] / src->height[0]; + s0 = pt0[0] / src->width0; + s1 = pt1[0] / src->width0; + s2 = pt2[0] / src->width0; + s3 = pt3[0] / src->width0; + t0 = pt0[1] / src->height0; + t1 = pt1[1] / src->height0; + t2 = pt2[1] / src->height0; + t3 = pt3[1] / src->height0; /* 1st vertex */ add_vertex_1tex(r, dstX, dstY, s0, t0); @@ -248,15 +249,15 @@ add_vertex_data2(struct xorg_renderer *r, map_point(mask_matrix, mpt1[0], mpt1[1], &mpt1[0], &mpt1[1]); } - src_s0 = spt0[0] / src->width[0]; - src_t0 = spt0[1] / src->height[0]; - src_s1 = spt1[0] / src->width[0]; - src_t1 = spt1[1] / src->height[0]; + src_s0 = spt0[0] / src->width0; + src_t0 = spt0[1] / src->height0; + src_s1 = spt1[0] / src->width0; + src_t1 = spt1[1] / src->height0; - mask_s0 = mpt0[0] / mask->width[0]; - mask_t0 = mpt0[1] / mask->height[0]; - mask_s1 = mpt1[0] / mask->width[0]; - mask_t1 = mpt1[1] / mask->height[0]; + mask_s0 = mpt0[0] / mask->width0; + mask_t0 = mpt0[1] / mask->height0; + mask_s1 = mpt1[0] / mask->width0; + mask_t1 = mpt1[1] / mask->height0; /* 1st vertex */ add_vertex_2tex(r, dstX, dstY, @@ -286,10 +287,10 @@ setup_vertex_data_yuv(struct xorg_renderer *r, spt1[0] = srcX + srcW; spt1[1] = srcY + srcH; - s0 = spt0[0] / tex[0]->width[0]; - t0 = spt0[1] / tex[0]->height[0]; - s1 = spt1[0] / tex[0]->width[0]; - t1 = spt1[1] / tex[0]->height[0]; + s0 = spt0[0] / tex[0]->width0; + t0 = spt0[1] / tex[0]->height0; + s1 = spt1[0] / tex[0]->width0; + t1 = spt1[1] / tex[0]->height0; /* 1st vertex */ add_vertex_1tex(r, dstX, dstY, s0, t0); @@ -510,10 +511,9 @@ renderer_clone_texture(struct xorg_renderer *r, templ.target = PIPE_TEXTURE_2D; templ.format = format; templ.last_level = 0; - templ.width[0] = src->width[0]; - templ.height[0] = src->height[0]; - templ.depth[0] = 1; - pf_get_block(format, &templ.block); + templ.width0 = src->width0; + templ.height0 = src->height0; + templ.depth0 = 1; templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; pt = screen->texture_create(screen, &templ); @@ -534,13 +534,13 @@ renderer_clone_texture(struct xorg_renderer *r, ps_tex, /* dest */ 0, 0, /* destx/y */ ps_read, - 0, 0, src->width[0], src->height[0]); + 0, 0, src->width0, src->height0); } else { util_surface_copy(pipe, FALSE, ps_tex, /* dest */ 0, 0, /* destx/y */ ps_read, - 0, 0, src->width[0], src->height[0]); + 0, 0, src->width0, src->height0); } pipe_surface_reference(&ps_read, NULL); pipe_surface_reference(&ps_tex, NULL); diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index 19c50051a85..6b5a41a3727 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -13,6 +13,8 @@ #include "pipe/p_screen.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" + /*XXX get these from pipe's texture limits */ #define IMAGE_MAX_WIDTH 2048 #define IMAGE_MAX_HEIGHT 2048 @@ -167,10 +169,9 @@ create_component_texture(struct pipe_context *pipe, templ.target = PIPE_TEXTURE_2D; templ.format = PIPE_FORMAT_L8_UNORM; templ.last_level = 0; - templ.width[0] = width; - templ.height[0] = height; - templ.depth[0] = 1; - pf_get_block(PIPE_FORMAT_L8_UNORM, &templ.block); + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; tex = screen->texture_create(screen, &templ); @@ -183,18 +184,18 @@ check_yuv_textures(struct xorg_xv_port_priv *priv, int width, int height) { struct pipe_texture **dst = priv->yuv[priv->current_set]; if (!dst[0] || - dst[0]->width[0] != width || - dst[0]->height[0] != height) { + dst[0]->width0 != width || + dst[0]->height0 != height) { pipe_texture_reference(&dst[0], NULL); } if (!dst[1] || - dst[1]->width[0] != width || - dst[1]->height[0] != height) { + dst[1]->width0 != width || + dst[1]->height0 != height) { pipe_texture_reference(&dst[1], NULL); } if (!dst[2] || - dst[2]->width[0] != width || - dst[2]->height[0] != height) { + dst[2]->width0 != width || + dst[2]->height0 != height) { pipe_texture_reference(&dst[2], NULL); } diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index bf9038f356e..0e39a390c69 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2009 Younes Manton. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ #include <assert.h> @@ -103,10 +103,9 @@ CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, u /* XXX: Needs to match the drawable's format? */ template.format = PIPE_FORMAT_X8R8G8B8_UNORM; template.last_level = 0; - template.width[0] = width; - template.height[0] = height; - template.depth[0] = 1; - pf_get_block(template.format, &template.block); + template.width0 = width; + template.height0 = height; + template.depth0 = 1; template.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET; tex = vpipe->screen->texture_create(vpipe->screen, &template); diff --git a/src/gallium/winsys/drm/SConscript b/src/gallium/winsys/drm/SConscript index 9f7b383d2d3..66b73a8bf93 100644 --- a/src/gallium/winsys/drm/SConscript +++ b/src/gallium/winsys/drm/SConscript @@ -58,6 +58,11 @@ if env['dri']: 'intel/SConscript', ]) + if 'i965' in env['winsys']: + SConscript([ + 'i965/SConscript', + ]) + if 'radeon' in env['winsys']: SConscript([ 'radeon/SConscript', diff --git a/src/gallium/winsys/drm/i965/Makefile b/src/gallium/winsys/drm/i965/Makefile new file mode 100644 index 00000000000..d8feef6824a --- /dev/null +++ b/src/gallium/winsys/drm/i965/Makefile @@ -0,0 +1,12 @@ +# src/gallium/winsys/drm/intel/Makefile +TOP = ../../../../.. +include $(TOP)/configs/current + +SUBDIRS = gem $(GALLIUM_STATE_TRACKERS_DIRS) + +default install clean: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) $@) || exit 1; \ + fi \ + done diff --git a/src/gallium/winsys/drm/i965/SConscript b/src/gallium/winsys/drm/i965/SConscript new file mode 100644 index 00000000000..50d7b75ed68 --- /dev/null +++ b/src/gallium/winsys/drm/i965/SConscript @@ -0,0 +1,7 @@ +Import('*') + +SConscript(['gem/SConscript',]) + +if 'mesa' in env['statetrackers']: + + SConscript(['dri/SConscript']) diff --git a/src/gallium/winsys/drm/i965/dri/Makefile b/src/gallium/winsys/drm/i965/dri/Makefile new file mode 100644 index 00000000000..f7e81eed87b --- /dev/null +++ b/src/gallium/winsys/drm/i965/dri/Makefile @@ -0,0 +1,26 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = i965_dri.so + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ + $(TOP)/src/gallium/winsys/drm/i965/gem/libi965drm.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/identity/libidentity.a \ + $(TOP)/src/gallium/drivers/i965/libi965.a + + +DRIVER_SOURCES = + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) \ + $(DRIVER_SOURCES) + +include ../../Makefile.template + +DRI_LIB_DEPS += -ldrm_intel + +symlinks: $(TOP)/$(LIB_DIR)/gallium + @rm -f $(TOP)/$(LIB_DIR)/gallium/i965_dri.so diff --git a/src/gallium/winsys/drm/i965/dri/SConscript b/src/gallium/winsys/drm/i965/dri/SConscript new file mode 100644 index 00000000000..a99533fd245 --- /dev/null +++ b/src/gallium/winsys/drm/i965/dri/SConscript @@ -0,0 +1,19 @@ +Import('*') + +env = drienv.Clone() + +env.ParseConfig('pkg-config --cflags --libs libdrm_intel') + +drivers = [ + st_dri, + i965drm, + i965, + trace, +] + +env.LoadableModule( + target ='i965_dri.so', + source = COMMON_GALLIUM_SOURCES, + LIBS = drivers + mesa + gallium + env['LIBS'], + SHLIBPREFIX = '', +) diff --git a/src/gallium/winsys/drm/i965/egl/Makefile b/src/gallium/winsys/drm/i965/egl/Makefile new file mode 100644 index 00000000000..a1b32eb2a79 --- /dev/null +++ b/src/gallium/winsys/drm/i965/egl/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../../../.. +GALLIUMDIR = ../../../.. +include $(TOP)/configs/current + +LIBNAME = EGL_i965.so + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \ + $(GALLIUMDIR)/winsys/drm/i965/gem/libi965drm.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/i965/libi965.a + +DRIVER_SOURCES = + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) \ + $(DRIVER_SOURCES) + +DRIVER_EXTRAS = -ldrm_intel + +ASM_SOURCES = + +DRIVER_DEFINES = -I../gem $(shell pkg-config libdrm --atleast-version=2.3.1 \ + && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/winsys/drm/i965/gem/Makefile b/src/gallium/winsys/drm/i965/gem/Makefile new file mode 100644 index 00000000000..6a7497b6be3 --- /dev/null +++ b/src/gallium/winsys/drm/i965/gem/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = i965drm + +C_SOURCES = \ + i965_drm_buffer.c \ + i965_drm_api.c + +LIBRARY_INCLUDES = $(shell pkg-config libdrm --cflags-only-I) + +LIBRARY_DEFINES = $(shell pkg-config libdrm --cflags-only-other) + +include ../../../../Makefile.template diff --git a/src/gallium/winsys/drm/i965/gem/SConscript b/src/gallium/winsys/drm/i965/gem/SConscript new file mode 100644 index 00000000000..6256ec6eaf0 --- /dev/null +++ b/src/gallium/winsys/drm/i965/gem/SConscript @@ -0,0 +1,15 @@ +Import('*') + +env = drienv.Clone() + +i965drm_sources = [ + 'i965_drm_api.c', + 'i965_drm_buffer.c', +] + +i965drm = env.ConvenienceLibrary( + target ='i965drm', + source = i965drm_sources, +) + +Export('i965drm') diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c new file mode 100644 index 00000000000..fc9678d2b62 --- /dev/null +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c @@ -0,0 +1,243 @@ + +#include <stdio.h> +#include "state_tracker/drm_api.h" + +#include "i965_drm_winsys.h" +#include "util/u_memory.h" + +#include "i965/brw_context.h" /* XXX: shouldn't be doing this */ +#include "i965/brw_screen.h" /* XXX: shouldn't be doing this */ + +#include "trace/tr_drm.h" + +/* + * Helper functions + */ + + +static void +i965_libdrm_get_device_id(unsigned int *device_id) +{ + char path[512]; + FILE *file; + void *shutup_gcc; + + /* + * FIXME: Fix this up to use a drm ioctl or whatever. + */ + + snprintf(path, sizeof(path), "/sys/class/drm/card0/device/device"); + file = fopen(path, "r"); + if (!file) { + return; + } + + shutup_gcc = fgets(path, sizeof(path), file); + sscanf(path, "%x", device_id); + fclose(file); +} + +static struct i965_libdrm_buffer * +i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws, + const char* name, unsigned handle) +{ + struct i965_libdrm_buffer *buf = CALLOC_STRUCT(i965_libdrm_buffer); + uint32_t swizzle = 0; + + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + + if (!buf) + return NULL; + pipe_reference_init(&buf->base.reference, 1); + buf->bo = drm_intel_bo_gem_create_from_name(idws->gem, name, handle); + buf->base.size = buf->bo->size; + buf->base.sws = &idws->base; + buf->flinked = TRUE; + buf->flink = handle; + + + if (!buf->bo) + goto err; + + drm_intel_bo_get_tiling(buf->bo, &buf->tiling, &swizzle); + if (buf->tiling != 0) + buf->map_gtt = TRUE; + + return buf; + +err: + FREE(buf); + return NULL; +} + + +/* + * Exported functions + */ + + +static struct pipe_texture * +i965_libdrm_texture_from_shared_handle(struct drm_api *api, + struct pipe_screen *screen, + struct pipe_texture *template, + const char* name, + unsigned pitch, + unsigned handle) +{ + /* XXX: this is silly -- there should be a way to get directly from + * the "drm_api" struct to ourselves, without peering into + * unrelated code: + */ + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(brw_screen(screen)->sws); + struct i965_libdrm_buffer *buffer; + + if (BRW_DUMP) + debug_printf("%s %s pitch %d handle 0x%x\n", __FUNCTION__, + name, pitch, handle); + + buffer = i965_libdrm_buffer_from_handle(idws, name, handle); + if (!buffer) + return NULL; + + return brw_texture_blanket_winsys_buffer(screen, template, pitch, + buffer->tiling, + &buffer->base); +} + + +static boolean +i965_libdrm_shared_handle_from_texture(struct drm_api *api, + struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned *pitch, + unsigned *handle) +{ + struct i965_libdrm_buffer *buf = NULL; + struct brw_winsys_buffer *buffer = NULL; + + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + + if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch)) + return FALSE; + + buf = i965_libdrm_buffer(buffer); + if (!buf->flinked) { + if (drm_intel_bo_flink(buf->bo, &buf->flink)) + return FALSE; + buf->flinked = TRUE; + } + + *handle = buf->flink; + + if (BRW_DUMP) + debug_printf(" -> pitch %d handle 0x%x\n", *pitch, *handle); + + return TRUE; +} + +static boolean +i965_libdrm_local_handle_from_texture(struct drm_api *api, + struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned *pitch, + unsigned *handle) +{ + struct brw_winsys_buffer *buffer = NULL; + + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + + if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch)) + return FALSE; + + *handle = i965_libdrm_buffer(buffer)->bo->handle; + + if (BRW_DUMP) + debug_printf(" -> pitch %d handle 0x%x\n", *pitch, *handle); + + return TRUE; +} + +static void +i965_libdrm_winsys_destroy(struct brw_winsys_screen *iws) +{ + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(iws); + + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + + drm_intel_bufmgr_destroy(idws->gem); + + FREE(idws); +} + +static struct pipe_screen * +i965_libdrm_create_screen(struct drm_api *api, int drmFD, + struct drm_create_screen_arg *arg) +{ + struct i965_libdrm_winsys *idws; + unsigned int deviceID; + + debug_printf("%s\n", __FUNCTION__); + + if (arg != NULL) { + switch(arg->mode) { + case DRM_CREATE_NORMAL: + break; + default: + return NULL; + } + } + + idws = CALLOC_STRUCT(i965_libdrm_winsys); + if (!idws) + return NULL; + + i965_libdrm_get_device_id(&deviceID); + + i965_libdrm_winsys_init_buffer_functions(idws); + + idws->fd = drmFD; + idws->id = deviceID; + + idws->base.destroy = i965_libdrm_winsys_destroy; + + idws->gem = drm_intel_bufmgr_gem_init(idws->fd, BRW_BATCH_SIZE); + drm_intel_bufmgr_gem_enable_reuse(idws->gem); + + idws->send_cmd = !debug_get_bool_option("BRW_NO_HW", FALSE); + + return brw_create_screen(&idws->base, deviceID); +} + +static struct pipe_context * +i965_libdrm_create_context(struct drm_api *api, struct pipe_screen *screen) +{ + return brw_create_context(screen); +} + +static void +destroy(struct drm_api *api) +{ + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + +} + +struct drm_api i965_libdrm_api = +{ + .create_context = i965_libdrm_create_context, + .create_screen = i965_libdrm_create_screen, + .texture_from_shared_handle = i965_libdrm_texture_from_shared_handle, + .shared_handle_from_texture = i965_libdrm_shared_handle_from_texture, + .local_handle_from_texture = i965_libdrm_local_handle_from_texture, + .destroy = destroy, +}; + +struct drm_api * +drm_api_create() +{ + return trace_drm_create(&i965_libdrm_api); +} diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c new file mode 100644 index 00000000000..a4a72b372dd --- /dev/null +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c @@ -0,0 +1,427 @@ + +#include "i965_drm_winsys.h" +#include "util/u_memory.h" + +#include "i915_drm.h" +#include "intel_bufmgr.h" + + + +const char *names[BRW_BUFFER_TYPE_MAX] = { + "TEXTURE", + "SCANOUT", + "VERTEX", + "CURBE", + "QUERY", + "SHADER_CONSTANTS", + "WM_SCRATCH", + "BATCH", + "GENERAL_STATE", + "SURFACE_STATE", + "PIXEL", + "GENERIC", +}; + +const char *usages[BRW_USAGE_MAX] = { + "STATE", + "QUERY_RESULT", + "RENDER_TARGET", + "DEPTH_BUFFER", + "BLIT_SOURCE", + "BLIT_DEST", + "SAMPLER", + "VERTEX", + "SCRATCH" +}; + + +const char *data_types[BRW_DATA_MAX] = +{ + "GS: CC_VP", + "GS: CC_UNIT", + "GS: WM_PROG", + "GS: SAMPLER_DEFAULT_COLOR", + "GS: SAMPLER", + "GS: WM_UNIT", + "GS: SF_PROG", + "GS: SF_VP", + "GS: SF_UNIT", + "GS: VS_UNIT", + "GS: VS_PROG", + "GS: GS_UNIT", + "GS: GS_PROG", + "GS: CLIP_VP", + "GS: CLIP_UNIT", + "GS: CLIP_PROG", + "SS: SURFACE", + "SS: SURF_BIND", + "CONSTANT DATA", + "BATCH DATA", + "(untyped)" +}; + +static enum pipe_error +i965_libdrm_bo_alloc(struct brw_winsys_screen *sws, + enum brw_buffer_type type, + unsigned size, + unsigned alignment, + struct brw_winsys_buffer **bo_out) +{ + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(sws); + struct i965_libdrm_buffer *buf; + + if (BRW_DUMP) + debug_printf("%s type %s sz %d align %d\n", + __FUNCTION__, names[type], size, alignment ); + + buf = CALLOC_STRUCT(i965_libdrm_buffer); + if (!buf) + return PIPE_ERROR_OUT_OF_MEMORY; + + switch (type) { + case BRW_BUFFER_TYPE_TEXTURE: +/* case BRW_BUFFER_TYPE_SCANOUT:*/ + case BRW_BUFFER_TYPE_VERTEX: + case BRW_BUFFER_TYPE_CURBE: + case BRW_BUFFER_TYPE_QUERY: + case BRW_BUFFER_TYPE_SHADER_CONSTANTS: + case BRW_BUFFER_TYPE_SHADER_SCRATCH: + case BRW_BUFFER_TYPE_BATCH: + case BRW_BUFFER_TYPE_GENERAL_STATE: + case BRW_BUFFER_TYPE_SURFACE_STATE: + case BRW_BUFFER_TYPE_PIXEL: + case BRW_BUFFER_TYPE_GENERIC: + break; + case BRW_BUFFER_TYPE_SCANOUT: + buf->map_gtt = TRUE; + break; + default: + assert(0); + break; + } + + buf->bo = drm_intel_bo_alloc(idws->gem, + names[type], + size, + alignment); + + if (!buf->bo) + goto err; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.size = size; + buf->base.sws = sws; + + *bo_out = &buf->base; + return PIPE_OK; + +err: + assert(0); + FREE(buf); + return PIPE_ERROR_OUT_OF_MEMORY; +} + +static void +i965_libdrm_bo_destroy(struct brw_winsys_buffer *buffer) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + + drm_intel_bo_unreference(buf->bo); + FREE(buffer); +} + +static enum pipe_error +i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer, + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *buffer2) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + struct i965_libdrm_buffer *buf2 = i965_libdrm_buffer(buffer2); + int read, write; + int ret; + + if (BRW_DUMP) + debug_printf("%s buf %p offset %x delta %x buf2 %p/%s/%s\n", + __FUNCTION__, (void *)buffer, + offset, delta, + (void *)buffer2, names[buf2->data_type], usages[usage]); + + switch (usage) { + case BRW_USAGE_STATE: + read = I915_GEM_DOMAIN_INSTRUCTION; + write = 0; + break; + case BRW_USAGE_QUERY_RESULT: + read = I915_GEM_DOMAIN_INSTRUCTION; + write = I915_GEM_DOMAIN_INSTRUCTION; + break; + case BRW_USAGE_RENDER_TARGET: + read = I915_GEM_DOMAIN_RENDER; + write = 0; + break; + case BRW_USAGE_DEPTH_BUFFER: + read = I915_GEM_DOMAIN_RENDER; + write = I915_GEM_DOMAIN_RENDER; + break; + case BRW_USAGE_BLIT_SOURCE: + read = 0; + write = I915_GEM_DOMAIN_RENDER; + break; + case BRW_USAGE_BLIT_DEST: + read = I915_GEM_DOMAIN_RENDER; + write = I915_GEM_DOMAIN_RENDER; + break; + case BRW_USAGE_SAMPLER: + read = I915_GEM_DOMAIN_SAMPLER; + write = 0; + break; + case BRW_USAGE_VERTEX: + read = I915_GEM_DOMAIN_VERTEX; + write = 0; + break; + case BRW_USAGE_SCRATCH: + read = 0; + write = 0; + break; + default: + assert(0); + return -1; + } + + /* Needed?? + ((uint32_t *)buf->bo->virtual)[offset/4] = (delta + + buf2->bo->offset); + */ + + ret = dri_bo_emit_reloc( buf->bo, read, write, delta, offset, buf2->bo ); + if (ret) + return -1; + + return 0; +} + +static enum pipe_error +i965_libdrm_bo_exec(struct brw_winsys_buffer *buffer, + unsigned bytes_used) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws); + int ret; + + if (BRW_DUMP) + debug_printf("execute buffer %p, bytes %d\n", (void *)buffer, bytes_used); + + if (idws->send_cmd) { + ret = dri_bo_exec(buf->bo, bytes_used, NULL, 0, 0); + if (ret) + return PIPE_ERROR; + } + + return PIPE_OK; +} + +static enum pipe_error +i965_libdrm_bo_subdata(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, + size_t offset, + size_t size, + const void *data, + const struct brw_winsys_reloc *reloc, + unsigned nr_reloc) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws); + int ret, i; + + (void)data_type; + + if (BRW_DUMP) + debug_printf("%s buf %p off %d sz %d %s relocs: %d\n", + __FUNCTION__, + (void *)buffer, offset, size, + data_types[data_type], + nr_reloc); + + if (BRW_DUMP) + brw_dump_data( idws->id, + data_type, + buf->bo->offset + offset, + data, size ); + + /* XXX: use bo_map_gtt/memcpy/unmap_gtt under some circumstances??? + */ + ret = drm_intel_bo_subdata(buf->bo, offset, size, (void*)data); + if (ret) + return PIPE_ERROR; + + for (i = 0; i < nr_reloc; i++) { + i965_libdrm_bo_emit_reloc(buffer, reloc[i].usage, reloc[i].delta, + reloc[i].offset, reloc[i].bo); + } + + return PIPE_OK; +} + +static boolean +i965_libdrm_bo_is_busy(struct brw_winsys_buffer *buffer) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + boolean ret; + + if (BRW_DUMP) + debug_printf("%s %p\n", __FUNCTION__, (void *)buffer); + + ret = drm_intel_bo_busy(buf->bo); + + if (BRW_DUMP) + debug_printf(" --> %d\n", ret); + + return ret; +} + +static boolean +i965_libdrm_bo_references(struct brw_winsys_buffer *a, + struct brw_winsys_buffer *b) +{ + struct i965_libdrm_buffer *bufa = i965_libdrm_buffer(a); + struct i965_libdrm_buffer *bufb = i965_libdrm_buffer(b); + boolean ret; + + if (BRW_DUMP) + debug_printf("%s %p %p\n", __FUNCTION__, (void *)a, (void *)b); + + ret = drm_intel_bo_references(bufa->bo, bufb->bo); + + if (BRW_DUMP) + debug_printf(" --> %d\n", ret); + + return ret; +} + +/* XXX: couldn't this be handled by returning true/false on + * bo_emit_reloc? + */ +static enum pipe_error +i965_libdrm_check_aperture_space(struct brw_winsys_screen *iws, + struct brw_winsys_buffer **buffers, + unsigned count) +{ + static drm_intel_bo *bos[128]; + int i; + int ret; + + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + + if (count > Elements(bos)) { + assert(0); + return FALSE; + } + + for (i = 0; i < count; i++) + bos[i] = i965_libdrm_buffer(buffers[i])->bo; + + /* XXX: converting from ??? to pipe_error: + */ + ret = dri_bufmgr_check_aperture_space(bos, count); + + if (BRW_DUMP) + debug_printf(" --> %d (ok == %d)\n", ret, PIPE_OK); + + return ret; +} + +static void * +i965_libdrm_bo_map(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, + unsigned offset, + unsigned length, + boolean write, + boolean discard, + boolean flush_explicit) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + int ret; + + + if (BRW_DUMP) + debug_printf("%s %p %s %s\n", __FUNCTION__, (void *)buffer, + write ? "read/write" : "read", + write ? data_types[data_type] : ""); + + if (!buf->map_count) { + if (buf->map_gtt) { + ret = drm_intel_gem_bo_map_gtt(buf->bo); + if (ret) + return NULL; + } + else { + ret = drm_intel_bo_map(buf->bo, write); + if (ret) + return NULL; + } + } + + buf->data_type = data_type; + buf->map_count++; + return buf->bo->virtual; +} + +static void +i965_libdrm_bo_flush_range(struct brw_winsys_buffer *buffer, + unsigned offset, + unsigned length) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws); + + if (BRW_DUMP) + debug_printf("%s %s offset %d len %d\n", __FUNCTION__, + data_types[buf->data_type], + offset, length); + + if (BRW_DUMP) + brw_dump_data( idws->id, + buf->data_type, + buf->bo->offset + offset, + buf->bo->virtual + offset, + length ); +} + +static void +i965_libdrm_bo_unmap(struct brw_winsys_buffer *buffer) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + + if (--buf->map_count > 0) + return; + + if (buf->map_gtt) + drm_intel_gem_bo_unmap_gtt(buf->bo); + else + drm_intel_bo_unmap(buf->bo); +} + +void +i965_libdrm_winsys_init_buffer_functions(struct i965_libdrm_winsys *idws) +{ + idws->base.bo_alloc = i965_libdrm_bo_alloc; + idws->base.bo_destroy = i965_libdrm_bo_destroy; + idws->base.bo_emit_reloc = i965_libdrm_bo_emit_reloc; + idws->base.bo_exec = i965_libdrm_bo_exec; + idws->base.bo_subdata = i965_libdrm_bo_subdata; + idws->base.bo_is_busy = i965_libdrm_bo_is_busy; + idws->base.bo_references = i965_libdrm_bo_references; + idws->base.check_aperture_space = i965_libdrm_check_aperture_space; + idws->base.bo_map = i965_libdrm_bo_map; + idws->base.bo_flush_range = i965_libdrm_bo_flush_range; + idws->base.bo_unmap = i965_libdrm_bo_unmap; +} diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h new file mode 100644 index 00000000000..c6a7d4a8c51 --- /dev/null +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h @@ -0,0 +1,64 @@ + +#ifndef INTEL_DRM_WINSYS_H +#define INTEL_DRM_WINSYS_H + +#include "i965/brw_winsys.h" + +#include "drm.h" +#include "intel_bufmgr.h" + + + +/* + * Winsys + */ + + +struct i965_libdrm_winsys +{ + struct brw_winsys_screen base; + drm_intel_bufmgr *gem; + + boolean send_cmd; + + int fd; /**< Drm file discriptor */ + + unsigned id; +}; + +static INLINE struct i965_libdrm_winsys * +i965_libdrm_winsys(struct brw_winsys_screen *iws) +{ + return (struct i965_libdrm_winsys *)iws; +} + +struct i965_libdrm_winsys *i965_libdrm_winsys_create(int fd, unsigned pci_id); + +void i965_libdrm_winsys_init_buffer_functions(struct i965_libdrm_winsys *idws); + + +/* Buffer. + */ +struct i965_libdrm_buffer { + struct brw_winsys_buffer base; + + drm_intel_bo *bo; + + void *ptr; + unsigned map_count; + unsigned data_type; /* valid while mapped */ + unsigned tiling; + + boolean map_gtt; + boolean flinked; + unsigned flink; +}; + +static INLINE struct i965_libdrm_buffer * +i965_libdrm_buffer(struct brw_winsys_buffer *buffer) +{ + return (struct i965_libdrm_buffer *)buffer; +} + + +#endif diff --git a/src/gallium/winsys/drm/i965/xlib/Makefile b/src/gallium/winsys/drm/i965/xlib/Makefile new file mode 100644 index 00000000000..0efa0ca6f9a --- /dev/null +++ b/src/gallium/winsys/drm/i965/xlib/Makefile @@ -0,0 +1,97 @@ +# src/gallium/winsys/xlib/Makefile + +# This makefile produces a "stand-alone" libGL.so which is based on +# Xlib (no DRI HW acceleration) + + +TOP = ../../../../../.. +include $(TOP)/configs/current + + +GL_MAJOR = 1 +GL_MINOR = 5 +GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) + + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/drivers/i965 \ + -I$(TOP)/src/gallium/drivers/i965/include \ + -I$(TOP)/src/gallium/state_trackers/glx/xlib \ + -I$(TOP)/src/gallium/auxiliary \ + -I/usr/include/drm + +XLIB_WINSYS_SOURCES = \ + xlib_i965.c \ + + + +XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o) + + + +LIBS = \ + $(TOP)/src/gallium/drivers/i965/libi965.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/state_trackers/glx/xlib/libxlib.a \ + $(TOP)/src/mesa/libglapi.a \ + $(TOP)/src/mesa/libmesagallium.a \ + $(GALLIUM_AUXILIARIES) + +# $(TOP)/src/gallium/drivers/i965/lib/libi9xx.a \ + +.SUFFIXES : .cpp + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(DEFINES) $(CFLAGS) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDE_DIRS) $(DEFINES) $(CXXFLAGS) $< -o $@ + + + +default: $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME) + +$(TOP)/$(LIB_DIR)/gallium: + @ mkdir -p $(TOP)/$(LIB_DIR)/gallium + +# Make the libGL.so library +$(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS) Makefile + $(TOP)/bin/mklib -o $(GL_LIB) \ + -linker "$(CC)" \ + -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ + -install $(TOP)/$(LIB_DIR)/gallium \ + $(MKLIB_OPTIONS) $(XLIB_WINSYS_OBJECTS) \ + -Wl,--start-group $(LIBS) -Wl,--end-group $(GL_LIB_DEPS) + + +depend: $(XLIB_WINSYS_SOURCES) + @ echo "running $(MKDEP)" + @ rm -f depend # workaround oops on gutsy?!? + @ touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(XLIB_WINSYS_SOURCES) \ + > /dev/null 2>/dev/null + + +install: default + $(INSTALL) -d $(INSTALL_DIR)/include/GL + $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR) + $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL + @if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \ + $(MINSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \ + fi + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h + +clean: + -rm -f *.o + + +include depend diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c new file mode 100644 index 00000000000..d2b9a1ab311 --- /dev/null +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -0,0 +1,522 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "pipe/p_error.h" +#include "pipe/p_context.h" + +#include "xm_winsys.h" + +#include "i965/brw_winsys.h" +#include "i965/brw_screen.h" +#include "i965/brw_reg.h" +#include "i965/brw_structs_dump.h" + +#define MAX_VRAM (128*1024*1024) + + + +extern int brw_disasm (FILE *file, + const struct brw_instruction *inst, + unsigned count ); + +extern int intel_decode(const uint32_t *data, + int count, + uint32_t hw_offset, + uint32_t devid); + +struct xlib_brw_buffer +{ + struct brw_winsys_buffer base; + char *virtual; + unsigned offset; + unsigned type; + int map_count; + boolean modified; +}; + + +/** + * Subclass of brw_winsys_screen for Xlib winsys + */ +struct xlib_brw_winsys +{ + struct brw_winsys_screen base; + struct brw_chipset chipset; + + unsigned size; + unsigned used; +}; + +static struct xlib_brw_winsys * +xlib_brw_winsys( struct brw_winsys_screen *screen ) +{ + return (struct xlib_brw_winsys *)screen; +} + + +static struct xlib_brw_buffer * +xlib_brw_buffer( struct brw_winsys_buffer *buffer ) +{ + return (struct xlib_brw_buffer *)buffer; +} + + + +const char *names[BRW_BUFFER_TYPE_MAX] = { + "TEXTURE", + "SCANOUT", + "VERTEX", + "CURBE", + "QUERY", + "SHADER_CONSTANTS", + "WM_SCRATCH", + "BATCH", + "GENERAL_STATE", + "SURFACE_STATE", + "PIXEL", + "GENERIC", +}; + +const char *usages[BRW_USAGE_MAX] = { + "STATE", + "QUERY_RESULT", + "RENDER_TARGET", + "DEPTH_BUFFER", + "BLIT_SOURCE", + "BLIT_DEST", + "SAMPLER", + "VERTEX", + "SCRATCH" +}; + + +const char *data_types[BRW_DATA_MAX] = +{ + "GS: CC_VP", + "GS: CC_UNIT", + "GS: WM_PROG", + "GS: SAMPLER_DEFAULT_COLOR", + "GS: SAMPLER", + "GS: WM_UNIT", + "GS: SF_PROG", + "GS: SF_VP", + "GS: SF_UNIT", + "GS: VS_UNIT", + "GS: VS_PROG", + "GS: GS_UNIT", + "GS: GS_PROG", + "GS: CLIP_VP", + "GS: CLIP_UNIT", + "GS: CLIP_PROG", + "SS: SURFACE", + "SS: SURF_BIND", + "CONSTANT DATA", + "BATCH DATA", + "(untyped)" +}; + + +static enum pipe_error +xlib_brw_bo_alloc( struct brw_winsys_screen *sws, + enum brw_buffer_type type, + unsigned size, + unsigned alignment, + struct brw_winsys_buffer **bo_out ) +{ + struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws); + struct xlib_brw_buffer *buf; + + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("%s type %s sz %d align %d\n", + __FUNCTION__, names[type], size, alignment ); + + buf = CALLOC_STRUCT(xlib_brw_buffer); + if (!buf) + return PIPE_ERROR_OUT_OF_MEMORY; + + pipe_reference_init(&buf->base.reference, 1); + + buf->offset = align(xbw->used, alignment); + buf->type = type; + buf->virtual = MALLOC(size); + buf->base.size = size; + buf->base.sws = sws; + + xbw->used = align(xbw->used, alignment) + size; + if (xbw->used > MAX_VRAM) + goto err; + + /* XXX: possibly rentrant call to bo_destroy: + */ + bo_reference(bo_out, &buf->base); + return PIPE_OK; + +err: + assert(0); + FREE(buf->virtual); + FREE(buf); + return PIPE_ERROR_OUT_OF_MEMORY; +} + +static void +xlib_brw_bo_destroy( struct brw_winsys_buffer *buffer ) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + + FREE(buf); +} + +static int +xlib_brw_bo_emit_reloc( struct brw_winsys_buffer *buffer, + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *buffer2) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + struct xlib_brw_buffer *buf2 = xlib_brw_buffer(buffer2); + + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("%s buf %p offset %x val %x + %x buf2 %p/%s/%s\n", + __FUNCTION__, (void *)buffer, offset, + buf2->offset, delta, + (void *)buffer2, names[buf2->type], usages[usage]); + + *(uint32_t *)(buf->virtual + offset) = buf2->offset + delta; + + return 0; +} + +static int +xlib_brw_bo_exec( struct brw_winsys_buffer *buffer, + unsigned bytes_used ) +{ + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("execute buffer %p, bytes %d\n", (void *)buffer, bytes_used); + + return 0; +} + + + + +static int +xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, + size_t offset, + size_t size, + const void *data, + const struct brw_winsys_reloc *reloc, + unsigned nr_relocs) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + struct xlib_brw_winsys *xbw = xlib_brw_winsys(buffer->sws); + unsigned i; + + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("%s buf %p off %d sz %d %s relocs: %d\n", + __FUNCTION__, + (void *)buffer, offset, size, + data_types[data_type], + nr_relocs); + + assert(buf->base.size >= offset + size); + memcpy(buf->virtual + offset, data, size); + + /* Apply the relocations: + */ + for (i = 0; i < nr_relocs; i++) { + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("\treloc[%d] usage %s off %d value %x+%x\n", + i, usages[reloc[i].usage], reloc[i].offset, + xlib_brw_buffer(reloc[i].bo)->offset, reloc[i].delta); + + *(unsigned *)(buf->virtual + offset + reloc[i].offset) = + xlib_brw_buffer(reloc[i].bo)->offset + reloc[i].delta; + } + + if (BRW_DUMP) + brw_dump_data( xbw->chipset.pci_id, + data_type, + buf->offset + offset, + buf->virtual + offset, size ); + + + return 0; +} + + +static boolean +xlib_brw_bo_is_busy(struct brw_winsys_buffer *buffer) +{ + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("%s %p\n", __FUNCTION__, (void *)buffer); + return TRUE; +} + +static boolean +xlib_brw_bo_references(struct brw_winsys_buffer *a, + struct brw_winsys_buffer *b) +{ + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("%s %p %p\n", __FUNCTION__, (void *)a, (void *)b); + return TRUE; +} + +static enum pipe_error +xlib_brw_check_aperture_space( struct brw_winsys_screen *iws, + struct brw_winsys_buffer **buffers, + unsigned count ) +{ + unsigned tot_size = 0; + unsigned i; + + for (i = 0; i < count; i++) + tot_size += buffers[i]->size; + + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("%s %d bufs, tot_size: %d kb\n", + __FUNCTION__, count, + (tot_size + 1023) / 1024); + + return PIPE_OK; +} + +static void * +xlib_brw_bo_map(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, + unsigned offset, + unsigned length, + boolean write, + boolean discard, + boolean explicit) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("%s %p %s %s\n", __FUNCTION__, (void *)buffer, + write ? "read/write" : "read", + write ? data_types[data_type] : ""); + + if (write) + buf->modified = 1; + + buf->map_count++; + return buf->virtual; +} + + +static void +xlib_brw_bo_flush_range( struct brw_winsys_buffer *buffer, + unsigned offset, + unsigned length ) +{ +} + + +static void +xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("%s %p\n", __FUNCTION__, (void *)buffer); + + --buf->map_count; + assert(buf->map_count >= 0); + + if (buf->map_count == 0 && + buf->modified) { + + buf->modified = 0; + + /* Consider dumping new buffer contents here, using the + * flush-range info to minimize verbosity. + */ + } +} + + +static void +xlib_brw_bo_wait_idle( struct brw_winsys_buffer *buffer ) +{ +} + + +static void +xlib_brw_winsys_destroy( struct brw_winsys_screen *sws ) +{ + struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws); + + FREE(xbw); +} + +static struct brw_winsys_screen * +xlib_create_brw_winsys_screen( void ) +{ + struct xlib_brw_winsys *ws; + + ws = CALLOC_STRUCT(xlib_brw_winsys); + if (!ws) + return NULL; + + ws->used = 0; + + ws->base.destroy = xlib_brw_winsys_destroy; + ws->base.bo_alloc = xlib_brw_bo_alloc; + ws->base.bo_destroy = xlib_brw_bo_destroy; + ws->base.bo_emit_reloc = xlib_brw_bo_emit_reloc; + ws->base.bo_exec = xlib_brw_bo_exec; + ws->base.bo_subdata = xlib_brw_bo_subdata; + ws->base.bo_is_busy = xlib_brw_bo_is_busy; + ws->base.bo_references = xlib_brw_bo_references; + ws->base.check_aperture_space = xlib_brw_check_aperture_space; + ws->base.bo_map = xlib_brw_bo_map; + ws->base.bo_flush_range = xlib_brw_bo_flush_range; + ws->base.bo_unmap = xlib_brw_bo_unmap; + ws->base.bo_wait_idle = xlib_brw_bo_wait_idle; + + return &ws->base; +} + + +/*********************************************************************** + * Implementation of Xlib co-state-tracker's winsys interface + */ + +static void +xlib_i965_display_surface(struct xmesa_buffer *xm_buffer, + struct pipe_surface *surf) +{ + struct brw_surface *surface = brw_surface(surf); + struct xlib_brw_buffer *bo = xlib_brw_buffer(surface->bo); + + if (BRW_DEBUG & DEBUG_WINSYS) + debug_printf("%s offset %x+%x sz %dx%d\n", __FUNCTION__, + bo->offset, + surface->draw_offset, + surf->width, + surf->height); +} + +static void +xlib_i965_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_surface *surf, + void *context_private) +{ + xlib_i965_display_surface(NULL, surf); +} + + +static struct pipe_screen * +xlib_create_i965_screen( void ) +{ + struct brw_winsys_screen *winsys; + struct pipe_screen *screen; + + winsys = xlib_create_brw_winsys_screen(); + if (winsys == NULL) + return NULL; + + screen = brw_create_screen(winsys, PCI_CHIP_GM45_GM); + if (screen == NULL) + goto fail; + + xlib_brw_winsys(winsys)->chipset = brw_screen(screen)->chipset; + + screen->flush_frontbuffer = xlib_i965_flush_frontbuffer; + return screen; + +fail: + if (winsys) + winsys->destroy( winsys ); + + return NULL; +} + + +static struct pipe_context * +xlib_create_i965_context( struct pipe_screen *screen, + void *context_private ) +{ + struct pipe_context *pipe; + + pipe = brw_create_context(screen); + if (pipe == NULL) + goto fail; + + pipe->priv = context_private; + return pipe; + +fail: + /* Free stuff here */ + return NULL; +} + + + + +struct xm_driver xlib_i965_driver = +{ + .create_pipe_screen = xlib_create_i965_screen, + .create_pipe_context = xlib_create_i965_context, + .display_surface = xlib_i965_display_surface +}; + + +/* Register this driver at library load: + */ +static void _init( void ) __attribute__((constructor)); +static void _init( void ) +{ + xmesa_set_driver( &xlib_i965_driver ); +} + + + +/*********************************************************************** + * + * Butt-ugly hack to convince the linker not to throw away public GL + * symbols (they are all referenced from getprocaddress, I guess). + */ +extern void (*linker_foo(const unsigned char *procName))(); +extern void (*glXGetProcAddress(const unsigned char *procName))(); + +extern void (*linker_foo(const unsigned char *procName))() +{ + return glXGetProcAddress(procName); +} diff --git a/src/gallium/winsys/drm/i965/xorg/Makefile b/src/gallium/winsys/drm/i965/xorg/Makefile new file mode 100644 index 00000000000..d91d0006efd --- /dev/null +++ b/src/gallium/winsys/drm/i965/xorg/Makefile @@ -0,0 +1,57 @@ +TARGET = modesetting_drv.so +CFILES = $(wildcard ./*.c) +OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) +TOP = ../../../../../.. + +include $(TOP)/configs/current + +INCLUDES = \ + $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \ + -I../gem \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/mesa \ + -I$(TOP)/include \ + -I$(TOP)/src/egl/main + +LIBS = \ + $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ + $(TOP)/src/gallium/winsys/drm/i965/gem/libi965drm.a \ + $(TOP)/src/gallium/drivers/i965/libi965.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(GALLIUM_AUXILIARIES) + +DRIVER_DEFINES = \ + -DHAVE_CONFIG_H + + +############################################# + + + +all default: $(TARGET) + +$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS) + $(TOP)/bin/mklib -noprefix -o $@ \ + $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_intel + +clean: + rm -rf $(OBJECTS) $(TARGET) + +install: + $(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) + $(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) + + +############################################## + + +.c.o: + $(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@ + + +############################################## + +.PHONY = all clean install diff --git a/src/gallium/winsys/drm/i965/xorg/intel_xorg.c b/src/gallium/winsys/drm/i965/xorg/intel_xorg.c new file mode 100644 index 00000000000..ac691cb76b3 --- /dev/null +++ b/src/gallium/winsys/drm/i965/xorg/intel_xorg.c @@ -0,0 +1,147 @@ +/* + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + * Author: Alan Hourihane <[email protected]> + * Author: Jakob Bornecrantz <[email protected]> + * + */ + +#include "../../../../state_trackers/xorg/xorg_winsys.h" + +static void intel_xorg_identify(int flags); +static Bool intel_xorg_pci_probe(DriverPtr driver, + int entity_num, + struct pci_device *device, + intptr_t match_data); + +static const struct pci_id_match intel_xorg_device_match[] = { + {0x8086, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0}, + {0, 0, 0}, +}; + +static SymTabRec intel_xorg_chipsets[] = { + {PCI_MATCH_ANY, "Intel Graphics Device"}, + {-1, NULL} +}; + +static PciChipsets intel_xorg_pci_devices[] = { + {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL}, + {-1, -1, NULL} +}; + +static XF86ModuleVersionInfo intel_xorg_version = { + "modesetting", + MODULEVENDORSTRING, + MODINFOSTRING1, + MODINFOSTRING2, + XORG_VERSION_CURRENT, + 0, 1, 0, /* major, minor, patch */ + ABI_CLASS_VIDEODRV, + ABI_VIDEODRV_VERSION, + MOD_CLASS_VIDEODRV, + {0, 0, 0, 0} +}; + +/* + * Xorg driver exported structures + */ + +_X_EXPORT DriverRec modesetting = { + 1, + "modesetting", + intel_xorg_identify, + NULL, + xorg_tracker_available_options, + NULL, + 0, + NULL, + intel_xorg_device_match, + intel_xorg_pci_probe +}; + +static MODULESETUPPROTO(intel_xorg_setup); + +_X_EXPORT XF86ModuleData modesettingModuleData = { + &intel_xorg_version, + intel_xorg_setup, + NULL +}; + +/* + * Xorg driver functions + */ + +static pointer +intel_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) +{ + static Bool setupDone = 0; + + /* This module should be loaded only once, but check to be sure. + */ + if (!setupDone) { + setupDone = 1; + xf86AddDriver(&modesetting, module, HaveDriverFuncs); + + /* + * The return value must be non-NULL on success even though there + * is no TearDownProc. + */ + return (pointer) 1; + } else { + if (errmaj) + *errmaj = LDR_ONCEONLY; + return NULL; + } +} + +static void +intel_xorg_identify(int flags) +{ + xf86PrintChipsets("modesetting", "Driver for Modesetting Kernel Drivers", + intel_xorg_chipsets); +} + +static Bool +intel_xorg_pci_probe(DriverPtr driver, + int entity_num, struct pci_device *device, intptr_t match_data) +{ + ScrnInfoPtr scrn = NULL; + EntityInfoPtr entity; + + scrn = xf86ConfigPciEntity(scrn, 0, entity_num, intel_xorg_pci_devices, + NULL, NULL, NULL, NULL, NULL); + if (scrn != NULL) { + scrn->driverVersion = 1; + scrn->driverName = "i965"; + scrn->name = "modesetting"; + scrn->Probe = NULL; + + entity = xf86GetEntityInfo(entity_num); + + /* Use all the functions from the xorg tracker */ + xorg_tracker_set_functions(scrn); + } + return scrn != NULL; +} diff --git a/src/gallium/winsys/drm/intel/dri/Makefile b/src/gallium/winsys/drm/intel/dri/Makefile index c0ecd9680e2..26aae4122eb 100644 --- a/src/gallium/winsys/drm/intel/dri/Makefile +++ b/src/gallium/winsys/drm/intel/dri/Makefile @@ -24,4 +24,3 @@ DRI_LIB_DEPS += -ldrm_intel symlinks: $(TOP)/$(LIB_DIR)/gallium @rm -f $(TOP)/$(LIB_DIR)/gallium/i965_dri.so - ln -s i915_dri.so $(TOP)/$(LIB_DIR)/gallium/i965_dri.so diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript index b1b654d9f8b..104e987083f 100644 --- a/src/gallium/winsys/drm/intel/dri/SConscript +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -15,6 +15,6 @@ drivers = [ env.LoadableModule( target ='i915_dri.so', source = COMMON_GALLIUM_SOURCES, - LIBS = drivers + mesa + auxiliaries + env['LIBS'], + LIBS = drivers + mesa + gallium + env['LIBS'], SHLIBPREFIX = '', ) diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c b/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c index e70bfe7b44e..e8b58742ab7 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c @@ -39,11 +39,12 @@ intel_drm_fence_reference(struct intel_winsys *iws, struct intel_drm_fence *old = (struct intel_drm_fence *)*ptr; struct intel_drm_fence *f = (struct intel_drm_fence *)fence; - if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) { + if (pipe_reference(&((struct intel_drm_fence *)(*ptr))->reference, &f->reference)) { if (old->bo) drm_intel_bo_unreference(old->bo); FREE(old); } + *ptr = fence; } static int diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index 317dc44d22f..7106a06492d 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -1,5 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "nouveau_drm_api.h" @@ -24,11 +25,10 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen, tmpl.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY; tmpl.target = PIPE_TEXTURE_2D; tmpl.last_level = 0; - tmpl.depth[0] = 1; + tmpl.depth0 = 1; tmpl.format = format; - tmpl.width[0] = width; - tmpl.height[0] = height; - pf_get_block(tmpl.format, &tmpl.block); + tmpl.width0 = width; + tmpl.height0 = height; pt = api->texture_from_shared_handle(api, pscreen, &tmpl, "front buffer", pitch, handle); @@ -247,7 +247,7 @@ nouveau_drm_handle_from_pt(struct drm_api *api, struct pipe_screen *pscreen, return false; *handle = mt->bo->handle; - *stride = mt->base.nblocksx[0] * mt->base.block.size; + *stride = util_format_get_stride(mt->base.format, mt->base.width0); return true; } diff --git a/src/gallium/winsys/drm/radeon/core/Makefile b/src/gallium/winsys/drm/radeon/core/Makefile index 42a6f4abc21..860cbb6dbf8 100644 --- a/src/gallium/winsys/drm/radeon/core/Makefile +++ b/src/gallium/winsys/drm/radeon/core/Makefile @@ -7,8 +7,7 @@ LIBNAME = radeonwinsys C_SOURCES = \ radeon_buffer.c \ radeon_drm.c \ - radeon_r300.c \ - radeon_winsys_softpipe.c + radeon_r300.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r300 \ $(shell pkg-config libdrm --cflags-only-I) diff --git a/src/gallium/winsys/drm/radeon/core/SConscript b/src/gallium/winsys/drm/radeon/core/SConscript index 2ad68e403fe..f4e9c397bdf 100644 --- a/src/gallium/winsys/drm/radeon/core/SConscript +++ b/src/gallium/winsys/drm/radeon/core/SConscript @@ -6,7 +6,6 @@ radeon_sources = [ 'radeon_buffer.c', 'radeon_drm.c', 'radeon_r300.c', - 'radeon_winsys_softpipe.c', ] env.Append(CPPPATH = '#/src/gallium/drivers/r300') diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 81cd9dc4fb1..d2367b245a2 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -35,7 +35,10 @@ #include "radeon_bo_gem.h" #include "softpipe/sp_texture.h" #include "r300_context.h" +#include "util/u_format.h" +#include "util/u_math.h" #include <X11/Xutil.h> + struct radeon_vl_context { Display *display; @@ -113,17 +116,13 @@ static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws, unsigned tex_usage, unsigned *stride) { - struct pipe_format_block block; - unsigned nblocksx, nblocksy, size; - - pf_get_block(format, &block); - - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - /* Radeons enjoy things in multiples of 32. */ /* XXX this can be 32 when POT */ - *stride = (nblocksx * block.size + 63) & ~63; + const unsigned alignment = 64; + unsigned nblocksy, size; + + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); size = *stride * nblocksy; return radeon_buffer_create(ws, 64, usage, size); @@ -142,10 +141,15 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, struct pipe_buffer *buffer, unsigned flags) { + struct radeon_winsys_priv *priv = ((struct radeon_winsys *)ws)->priv; struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; int write = 0; + if (radeon_bo_is_referenced_by_cs(radeon_buffer->bo, priv->cs)) { + priv->flush_cb(priv->flush_data); + } + if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { uint32_t domain; @@ -317,13 +321,10 @@ struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_co memset(&tmpl, 0, sizeof(tmpl)); tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET; tmpl.target = PIPE_TEXTURE_2D; - tmpl.width[0] = w; - tmpl.height[0] = h; - tmpl.depth[0] = 1; + tmpl.width0 = w; + tmpl.height0 = h; + tmpl.depth0 = 1; tmpl.format = format; - pf_get_block(tmpl.format, &tmpl.block); - tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w); - tmpl.nblocksy[0] = pf_get_nblocksy(&tmpl.block, h); pt = pipe_screen->texture_blanket(pipe_screen, &tmpl, &pitch, pb); if (pt == NULL) { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h index f5153b06af5..d7f17564a9f 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h @@ -45,6 +45,8 @@ #include "radeon_drm.h" +#include "radeon_winsys.h" + struct radeon_pipe_buffer { struct pipe_buffer base; struct radeon_bo *bo; @@ -66,14 +68,10 @@ struct radeon_winsys_priv { /* Current CS. */ struct radeon_cs* cs; -}; - -struct radeon_winsys { - /* Parent class. */ - struct pipe_winsys base; - /* This corresponds to void* radeon_winsys in r300_winsys. */ - struct radeon_winsys_priv* priv; + /* Flush CB */ + void (*flush_cb)(void *); + void *flush_data; }; struct radeon_winsys* radeon_pipe_winsys(int fb); diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 69f14e54f26..05194fc52a2 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -29,21 +29,95 @@ * Joakim Sindholt <[email protected]> */ +#include "softpipe/sp_winsys.h" + #include "radeon_drm.h" +/* Helper function to do the ioctls needed for setup and init. */ +static void do_ioctls(int fd, struct radeon_winsys* winsys) +{ + struct drm_radeon_gem_info gem_info = {0}; + struct drm_radeon_info info = {0}; + int target = 0; + int retval; + + info.value = (unsigned long)⌖ + + /* We do things in a specific order here. + * + * First, the PCI ID. This is essential and should return usable numbers + * for all Radeons. If this fails, we probably got handed an FD for some + * non-Radeon card. + * + * The GB and Z pipe requests should always succeed, but they might not + * return sensical values for all chipsets, but that's alright because + * the pipe drivers already know that. + * + * The GEM info is actually bogus on the kernel side, as well as our side + * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because + * we don't actually use the info for anything yet. + * XXX update the above when we can safely use vram_size instead of vram_visible */ + info.request = RADEON_INFO_DEVICE_ID; + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (retval) { + fprintf(stderr, "%s: Failed to get PCI ID, " + "error number %d\n", __FUNCTION__, retval); + exit(1); + } + winsys->pci_id = target; + + info.request = RADEON_INFO_NUM_GB_PIPES; + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (retval) { + fprintf(stderr, "%s: Failed to get GB pipe count, " + "error number %d\n", __FUNCTION__, retval); + exit(1); + } + winsys->gb_pipes = target; + + info.request = RADEON_INFO_NUM_Z_PIPES; + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (retval) { + fprintf(stderr, "%s: Failed to get Z pipe count, " + "error number %d\n", __FUNCTION__, retval); + exit(1); + } + winsys->z_pipes = target; + + retval = drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, + &gem_info, sizeof(gem_info)); + if (retval) { + fprintf(stderr, "%s: Failed to get MM info, error number %d\n", + __FUNCTION__, retval); + exit(1); + } + winsys->gart_size = gem_info.gart_size; + /* XXX */ + winsys->vram_size = gem_info.vram_visible; +} + +/* Guess at whether this chipset should use r300g. + * + * I believe that this check is valid, but I haven't been exhaustive. */ +static boolean is_r3xx(int pciid) +{ + return (pciid > 0x3150) && (pciid < 0x796f); +} + /* Create a pipe_screen. */ struct pipe_screen* radeon_create_screen(struct drm_api* api, int drmFB, struct drm_create_screen_arg *arg) { - struct radeon_winsys* winsys = radeon_pipe_winsys(drmFB); + struct radeon_winsys* rwinsys = radeon_pipe_winsys(drmFB); + do_ioctls(drmFB, rwinsys); - if (debug_get_bool_option("RADEON_SOFTPIPE", FALSE)) { - return softpipe_create_screen((struct pipe_winsys*)winsys); + if (!is_r3xx(rwinsys->pci_id) || + debug_get_bool_option("RADEON_SOFTPIPE", FALSE)) { + return softpipe_create_screen((struct pipe_winsys*)rwinsys); } else { - struct r300_winsys* r300 = radeon_create_r300_winsys(drmFB, winsys); - FREE(winsys); - return r300_create_screen(r300); + radeon_setup_winsys(drmFB, rwinsys); + return r300_create_screen(rwinsys); } } @@ -51,11 +125,13 @@ struct pipe_screen* radeon_create_screen(struct drm_api* api, struct pipe_context* radeon_create_context(struct drm_api* api, struct pipe_screen* screen) { - if (debug_get_bool_option("RADEON_SOFTPIPE", FALSE)) { - return radeon_create_softpipe(screen->winsys); + struct radeon_winsys* rwinsys = (struct radeon_winsys*)screen->winsys; + + if (!is_r3xx(rwinsys->pci_id) || + debug_get_bool_option("RADEON_SOFTPIPE", FALSE)) { + return softpipe_create(screen); } else { - return r300_create_context(screen, - (struct r300_winsys*)screen->winsys); + return r300_create_context(screen, rwinsys); } } @@ -130,7 +206,7 @@ static boolean radeon_shared_handle_from_texture(struct drm_api *api, int retval, fd; struct drm_gem_flink flink; struct radeon_pipe_buffer* radeon_buffer; - struct pipe_buffer *buffer; + struct pipe_buffer *buffer = NULL; if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { return FALSE; @@ -163,7 +239,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api, unsigned *stride, unsigned *handle) { - struct pipe_buffer *buffer; + struct pipe_buffer *buffer = NULL; if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { return FALSE; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h index 9a789ec1a45..bf0e78138d7 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h @@ -44,7 +44,6 @@ #include "radeon_buffer.h" #include "radeon_r300.h" -#include "radeon_winsys_softpipe.h" /* XXX */ #include "r300_screen.h" diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index 7ea5d1fb4e7..0875ee41cbf 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -22,36 +22,29 @@ #include "radeon_r300.h" -static void radeon_r300_set_flush_cb(struct r300_winsys *winsys, - void (*flush_cb)(void *), - void *data) +static void radeon_set_flush_cb(struct radeon_winsys *winsys, + void (*flush_cb)(void *), + void *data) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - radeon_cs_space_set_flush(priv->cs, flush_cb, - data); + winsys->priv->flush_cb = flush_cb; + winsys->priv->flush_data = data; + radeon_cs_space_set_flush(winsys->priv->cs, flush_cb, data); } -static boolean radeon_r300_add_buffer(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd) +static boolean radeon_add_buffer(struct radeon_winsys* winsys, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; struct radeon_bo* bo = ((struct radeon_pipe_buffer*)pbuffer)->bo; - radeon_cs_space_add_persistent_bo(priv->cs, bo, rd, wd); + radeon_cs_space_add_persistent_bo(winsys->priv->cs, bo, rd, wd); return TRUE; } -static boolean radeon_r300_validate(struct r300_winsys* winsys) +static boolean radeon_validate(struct radeon_winsys* winsys) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - if (radeon_cs_space_check(priv->cs) < 0) { + if (radeon_cs_space_check(winsys->priv->cs) < 0) { return FALSE; } @@ -59,45 +52,37 @@ static boolean radeon_r300_validate(struct r300_winsys* winsys) return TRUE; } -static boolean radeon_r300_check_cs(struct r300_winsys* winsys, int size) +static boolean radeon_check_cs(struct radeon_winsys* winsys, int size) { - /* XXX check size here, lazy ass! */ - /* XXX also validate buffers */ - return TRUE; + struct radeon_cs* cs = winsys->priv->cs; + + return radeon_validate(winsys) && cs->cdw + size <= cs->ndw; } -static void radeon_r300_begin_cs(struct r300_winsys* winsys, - int size, - const char* file, - const char* function, - int line) +static void radeon_begin_cs(struct radeon_winsys* winsys, + int size, + const char* file, + const char* function, + int line) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - radeon_cs_begin(priv->cs, size, file, function, line); + radeon_cs_begin(winsys->priv->cs, size, file, function, line); } -static void radeon_r300_write_cs_dword(struct r300_winsys* winsys, - uint32_t dword) +static void radeon_write_cs_dword(struct radeon_winsys* winsys, + uint32_t dword) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - radeon_cs_write_dword(priv->cs, dword); + radeon_cs_write_dword(winsys->priv->cs, dword); } -static void radeon_r300_write_cs_reloc(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd, - uint32_t flags) +static void radeon_write_cs_reloc(struct radeon_winsys* winsys, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd, + uint32_t flags) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; int retval = 0; - retval = radeon_cs_write_reloc(priv->cs, + retval = radeon_cs_write_reloc(winsys->priv->cs, ((struct radeon_pipe_buffer*)pbuffer)->bo, rd, wd, flags); if (retval) { @@ -106,132 +91,60 @@ static void radeon_r300_write_cs_reloc(struct r300_winsys* winsys, } } -static void radeon_r300_reset_bos(struct r300_winsys *winsys) +static void radeon_reset_bos(struct radeon_winsys *winsys) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - radeon_cs_space_reset_bos(priv->cs); + radeon_cs_space_reset_bos(winsys->priv->cs); } -static void radeon_r300_end_cs(struct r300_winsys* winsys, - const char* file, - const char* function, - int line) +static void radeon_end_cs(struct radeon_winsys* winsys, + const char* file, + const char* function, + int line) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - radeon_cs_end(priv->cs, file, function, line); + radeon_cs_end(winsys->priv->cs, file, function, line); } -static void radeon_r300_flush_cs(struct r300_winsys* winsys) +static void radeon_flush_cs(struct radeon_winsys* winsys) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; int retval; /* Emit the CS. */ - retval = radeon_cs_emit(priv->cs); + retval = radeon_cs_emit(winsys->priv->cs); if (retval) { debug_printf("radeon: Bad CS, dumping...\n"); - radeon_cs_print(priv->cs, stderr); + radeon_cs_print(winsys->priv->cs, stderr); } /* Reset CS. * Someday, when we care about performance, we should really find a way * to rotate between two or three CS objects so that the GPU can be * spinning through one CS while another one is being filled. */ - radeon_cs_erase(priv->cs); + radeon_cs_erase(winsys->priv->cs); } -/* Helper function to do the ioctls needed for setup and init. */ -static void do_ioctls(struct r300_winsys* winsys, int fd) +void +radeon_setup_winsys(int fd, struct radeon_winsys* winsys) { - struct drm_radeon_gem_info gem_info = {0}; - struct drm_radeon_info info = {0}; - int target = 0; - int retval; - - info.value = (unsigned long)⌖ - - /* First, get the number of pixel pipes */ - info.request = RADEON_INFO_NUM_GB_PIPES; - retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get GB pipe count, " - "error number %d\n", __FUNCTION__, retval); - exit(1); - } - winsys->gb_pipes = target; - - /* get Z pipes */ - info.request = RADEON_INFO_NUM_Z_PIPES; - retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get GB pipe count, " - "error number %d\n", __FUNCTION__, retval); - exit(1); - } - winsys->z_pipes = target; - - /* Then, get PCI ID */ - info.request = RADEON_INFO_DEVICE_ID; - retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get PCI ID, " - "error number %d\n", __FUNCTION__, retval); - exit(1); - } - winsys->pci_id = target; - - /* Finally, retrieve MM info */ - retval = drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, - &gem_info, sizeof(gem_info)); - if (retval) { - fprintf(stderr, "%s: Failed to get MM info, error number %d\n", - __FUNCTION__, retval); - exit(1); - } - winsys->gart_size = gem_info.gart_size; - /* XXX */ - winsys->vram_size = gem_info.vram_visible; -} - -struct r300_winsys* -radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys) -{ - struct r300_winsys* winsys = CALLOC_STRUCT(r300_winsys); - struct radeon_winsys_priv* priv; - - if (winsys == NULL) { - return NULL; - } - - priv = old_winsys->priv; - - do_ioctls(winsys, fd); + struct radeon_winsys_priv* priv = winsys->priv; priv->csm = radeon_cs_manager_gem_ctor(fd); + /* Size limit on IBs is 64 kibibytes. */ priv->cs = radeon_cs_create(priv->csm, 1024 * 64 / 4); radeon_cs_set_limit(priv->cs, RADEON_GEM_DOMAIN_GTT, winsys->gart_size); radeon_cs_set_limit(priv->cs, RADEON_GEM_DOMAIN_VRAM, winsys->vram_size); - winsys->add_buffer = radeon_r300_add_buffer; - winsys->validate = radeon_r300_validate; - - winsys->check_cs = radeon_r300_check_cs; - winsys->begin_cs = radeon_r300_begin_cs; - winsys->write_cs_dword = radeon_r300_write_cs_dword; - winsys->write_cs_reloc = radeon_r300_write_cs_reloc; - winsys->end_cs = radeon_r300_end_cs; - winsys->flush_cs = radeon_r300_flush_cs; - winsys->reset_bos = radeon_r300_reset_bos; - winsys->set_flush_cb = radeon_r300_set_flush_cb; - - memcpy(winsys, old_winsys, sizeof(struct radeon_winsys)); - - return winsys; + winsys->add_buffer = radeon_add_buffer; + winsys->validate = radeon_validate; + + winsys->check_cs = radeon_check_cs; + winsys->begin_cs = radeon_begin_cs; + winsys->write_cs_dword = radeon_write_cs_dword; + winsys->write_cs_reloc = radeon_write_cs_reloc; + winsys->end_cs = radeon_end_cs; + winsys->flush_cs = radeon_flush_cs; + winsys->reset_bos = radeon_reset_bos; + winsys->set_flush_cb = radeon_set_flush_cb; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.h b/src/gallium/winsys/drm/radeon/core/radeon_r300.h index 775d7937fd8..cfbdb302661 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.h @@ -34,9 +34,6 @@ #include "radeon_buffer.h" -struct radeon_winsys; - -struct r300_winsys* -radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys); +void radeon_setup_winsys(int fd, struct radeon_winsys* winsys); #endif /* RADEON_R300_H */ diff --git a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h new file mode 100644 index 00000000000..9edc9e038c3 --- /dev/null +++ b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h @@ -0,0 +1,105 @@ +/* + * Copyright © 2009 Corbin Simpson + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Corbin Simpson <[email protected]> + */ +#ifndef RADEON_WINSYS_H +#define RADEON_WINSYS_H + +#include "pipe/internal/p_winsys_screen.h" + +struct radeon_winsys_priv; + +struct radeon_winsys { + /* Parent class. */ + struct pipe_winsys base; + + /* Winsys private */ + struct radeon_winsys_priv* priv; + + /* PCI ID */ + uint32_t pci_id; + + /* GB pipe count */ + uint32_t gb_pipes; + + /* Z pipe count (rv530 only) */ + uint32_t z_pipes; + + /* GART size. */ + uint32_t gart_size; + + /* VRAM size. */ + uint32_t vram_size; + + /* Add a pipe_buffer to the list of buffer objects to validate. */ + boolean (*add_buffer)(struct radeon_winsys* winsys, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd); + + /* Revalidate all currently setup pipe_buffers. + * Returns TRUE if a flush is required. */ + boolean (*validate)(struct radeon_winsys* winsys); + + /* Check to see if there's room for commands. */ + boolean (*check_cs)(struct radeon_winsys* winsys, int size); + + /* Start a command emit. */ + void (*begin_cs)(struct radeon_winsys* winsys, + int size, + const char* file, + const char* function, + int line); + + /* Write a dword to the command buffer. */ + void (*write_cs_dword)(struct radeon_winsys* winsys, uint32_t dword); + + /* Write a relocated dword to the command buffer. */ + void (*write_cs_reloc)(struct radeon_winsys* winsys, + struct pipe_buffer* bo, + uint32_t rd, + uint32_t wd, + uint32_t flags); + + /* Finish a command emit. */ + void (*end_cs)(struct radeon_winsys* winsys, + const char* file, + const char* function, + int line); + + /* Flush the CS. */ + void (*flush_cs)(struct radeon_winsys* winsys); + + /* winsys flush - callback from winsys when flush required */ + void (*set_flush_cb)(struct radeon_winsys *winsys, + void (*flush_cb)(void *), void *data); + + void (*reset_bos)(struct radeon_winsys *winsys); +}; + +#endif diff --git a/src/gallium/winsys/drm/radeon/core/radeon_winsys_softpipe.h b/src/gallium/winsys/drm/radeon/core/radeon_winsys_softpipe.h deleted file mode 100644 index 04740e41a51..00000000000 --- a/src/gallium/winsys/drm/radeon/core/radeon_winsys_softpipe.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright © 2008 Jérôme Glisse - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ -/* - * Authors: - * Jérôme Glisse <[email protected]> - */ -#ifndef RADEON_WINSYS_SOFTPIPE_H -#define RADEON_WINSYS_SOFTPIPE_H - -#include <stdio.h> - -#include "pipe/p_defines.h" -#include "pipe/p_format.h" - -#include "softpipe/sp_winsys.h" - -#include "util/u_memory.h" - -struct pipe_context *radeon_create_softpipe(struct pipe_winsys* winsys); - -#endif diff --git a/src/gallium/winsys/drm/radeon/dri/SConscript b/src/gallium/winsys/drm/radeon/dri/SConscript index aea987a3aca..c4989d1b595 100644 --- a/src/gallium/winsys/drm/radeon/dri/SConscript +++ b/src/gallium/winsys/drm/radeon/dri/SConscript @@ -13,5 +13,5 @@ drivers = [ env.SharedLibrary( target ='radeon_dri.so', source = COMMON_GALLIUM_SOURCES, - LIBS = st_dri + radeonwinsys + mesa + drivers + auxiliaries + env['LIBS'], + LIBS = st_dri + radeonwinsys + mesa + drivers + gallium + env['LIBS'], ) diff --git a/src/gallium/winsys/drm/radeon/python/SConscript b/src/gallium/winsys/drm/radeon/python/SConscript index 3200fd8d1b0..91cae986975 100644 --- a/src/gallium/winsys/drm/radeon/python/SConscript +++ b/src/gallium/winsys/drm/radeon/python/SConscript @@ -29,5 +29,5 @@ if env['platform'] == 'linux': env.SharedLibrary( target ='_gallium', source = sources, - LIBS = [pyst] + drivers + auxiliaries + env['LIBS'], + LIBS = [pyst] + drivers + gallium + env['LIBS'], ) diff --git a/src/gallium/winsys/drm/radeon/xorg/Makefile b/src/gallium/winsys/drm/radeon/xorg/Makefile index 9fa16dab24c..0eb1b3988f3 100644 --- a/src/gallium/winsys/drm/radeon/xorg/Makefile +++ b/src/gallium/winsys/drm/radeon/xorg/Makefile @@ -1,11 +1,16 @@ -TARGET = modesetting_drv.so -CFILES = $(wildcard ./*.c) -OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) -GALLIUMDIR = ../../../.. TOP = ../../../../../.. + +GALLIUMDIR = $(TOP)/src/gallium + +TARGET = radeong_drv.so + +CFILES = $(wildcard ./*.c) + include ${TOP}/configs/current +OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) + CFLAGS = -DHAVE_CONFIG_H \ -g -Wall -Wimplicit-function-declaration -fPIC \ $(shell pkg-config --cflags pixman-1 xorg-server libdrm xproto) \ @@ -24,16 +29,21 @@ LIBS = \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) +TARGET_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET) ############################################# +all default: $(TARGET) $(TARGET_STAGING) - -all default: $(TARGET) - -$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a +$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a $(LIBS) $(TOP)/bin/mklib -noprefix -o $@ \ $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_radeon +$(TOP)/$(LIB_DIR)/gallium: + mkdir -p $@ + +$(TARGET_STAGING): $(TARGET) $(TOP)/$(LIB_DIR)/gallium + $(INSTALL) $(TARGET) $(TOP)/$(LIB_DIR)/gallium + clean: rm -rf $(OBJECTS) $(TARGET) diff --git a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c index 837f2aa8fec..bb76cc03499 100644 --- a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c +++ b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c @@ -53,7 +53,7 @@ static PciChipsets radeon_xorg_pci_devices[] = { }; static XF86ModuleVersionInfo radeon_xorg_version = { - "modesetting", + "radeong", MODULEVENDORSTRING, MODINFOSTRING1, MODINFOSTRING2, @@ -69,9 +69,9 @@ static XF86ModuleVersionInfo radeon_xorg_version = { * Xorg driver exported structures */ -_X_EXPORT DriverRec modesetting = { +_X_EXPORT DriverRec radeong = { 1, - "modesetting", + "radeong", radeon_xorg_identify, NULL, xorg_tracker_available_options, @@ -84,7 +84,7 @@ _X_EXPORT DriverRec modesetting = { static MODULESETUPPROTO(radeon_xorg_setup); -_X_EXPORT XF86ModuleData modesettingModuleData = { +_X_EXPORT XF86ModuleData radeongModuleData = { &radeon_xorg_version, radeon_xorg_setup, NULL @@ -103,7 +103,7 @@ radeon_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) */ if (!setupDone) { setupDone = 1; - xf86AddDriver(&modesetting, module, HaveDriverFuncs); + xf86AddDriver(&radeong, module, HaveDriverFuncs); /* * The return value must be non-NULL on success even though there @@ -120,7 +120,7 @@ radeon_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) static void radeon_xorg_identify(int flags) { - xf86PrintChipsets("modesetting", "Driver for Modesetting Kernel Drivers", + xf86PrintChipsets("radeong", "Driver for Radeon Gallium with KMS", radeon_xorg_chipsets); } @@ -135,8 +135,8 @@ radeon_xorg_pci_probe(DriverPtr driver, NULL, NULL, NULL, NULL, NULL); if (scrn != NULL) { scrn->driverVersion = 1; - scrn->driverName = "radeon"; - scrn->name = "modesetting"; + scrn->driverName = "radeong"; + scrn->name = "radeong"; scrn->Probe = NULL; entity = xf86GetEntityInfo(entity_num); diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.c b/src/gallium/winsys/drm/vmware/core/vmw_surface.c index 64eb32f8b94..5f1b9ad5770 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_surface.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.c @@ -47,7 +47,7 @@ vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst, src_ref = src ? &src->refcnt : NULL; dst_ref = dst ? &dst->refcnt : NULL; - if (pipe_reference(&dst_ref, src_ref)) { + if (pipe_reference(dst_ref, src_ref)) { vmw_ioctl_surface_destroy(dst->screen, dst->sid); #ifdef DEBUG /* to detect dangling pointers */ diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript index 1019f577a5f..84319f91ff1 100644 --- a/src/gallium/winsys/drm/vmware/dri/SConscript +++ b/src/gallium/winsys/drm/vmware/dri/SConscript @@ -48,7 +48,7 @@ if env['platform'] == 'linux': svgadrm, svga, mesa, - auxiliaries, + gallium, ]) # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions diff --git a/src/gallium/winsys/drm/vmware/xorg/SConscript b/src/gallium/winsys/drm/vmware/xorg/SConscript index 5e78c1f89bf..1e5d8ff7fed 100644 --- a/src/gallium/winsys/drm/vmware/xorg/SConscript +++ b/src/gallium/winsys/drm/vmware/xorg/SConscript @@ -38,7 +38,7 @@ if env['platform'] == 'linux': st_xorg, svgadrm, svga, - auxiliaries, + gallium, ]) sources = [ diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index d02f8250478..599973ce127 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -41,6 +41,7 @@ #include "pipe/p_state.h" #include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" +#include "util/u_math.h" #include "softpipe/sp_winsys.h" #include "softpipe/sp_texture.h" @@ -138,17 +139,6 @@ lookup_context(_EGLContext *ctx) } -static unsigned int -bitcount(unsigned int n) -{ - unsigned int bits; - for (bits = 0; n > 0; n = n >> 1) { - bits += (n & 1); - } - return bits; -} - - /** * Create the EGLConfigs. (one per X visual) */ @@ -174,9 +164,9 @@ create_configs(struct xlib_egl_display *xdpy, _EGLDisplay *disp) for (i = 0; i < num_visuals; i++) { _EGLConfig *config = calloc(1, sizeof(_EGLConfig)); int id = i + 1; - int rbits = bitcount(visInfo[i].red_mask); - int gbits = bitcount(visInfo[i].green_mask); - int bbits = bitcount(visInfo[i].blue_mask); + int rbits = util_bitcount(visInfo[i].red_mask); + int gbits = util_bitcount(visInfo[i].green_mask); + int bbits = util_bitcount(visInfo[i].blue_mask); int abits = bbits == 8 ? 8 : 0; int zbits = 24; int sbits = 8; diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index 79ff2cc985d..6ee3ede38cb 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -38,6 +38,7 @@ #include "pipe/internal/p_winsys_screen.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -71,16 +72,6 @@ sw_pipe_buffer(struct pipe_buffer *b) } -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - static const char * get_name(struct pipe_winsys *pws) { @@ -170,13 +161,10 @@ surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = round_up(nblocksx * block.size, alignment); + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); return winsys->buffer_create(winsys, alignment, usage, diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile index 2997f6b79ce..3965bd949f4 100644 --- a/src/gallium/winsys/g3dvl/nouveau/Makefile +++ b/src/gallium/winsys/g3dvl/nouveau/Makefile @@ -19,11 +19,7 @@ CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \ LDFLAGS += -L${DRMDIR}/lib \ -L${DRIDIR}/lib \ -L${GALLIUMDIR}/winsys/drm/nouveau/common \ - -L${GALLIUMDIR}/auxiliary/draw \ - -L${GALLIUMDIR}/auxiliary/tgsi \ - -L${GALLIUMDIR}/auxiliary/translate \ - -L${GALLIUMDIR}/auxiliary/rtasm \ - -L${GALLIUMDIR}/auxiliary/cso_cache \ + -L${GALLIUMDIR}/auxiliary \ -L${GALLIUMDIR}/drivers/nv04 \ -L${GALLIUMDIR}/drivers/nv10 \ -L${GALLIUMDIR}/drivers/nv20 \ @@ -31,7 +27,7 @@ LDFLAGS += -L${DRMDIR}/lib \ -L${GALLIUMDIR}/drivers/nv40 \ -L${GALLIUMDIR}/drivers/nv50 -LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm +LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -lgallium -lm ############################################# diff --git a/src/gallium/winsys/g3dvl/xlib/Makefile b/src/gallium/winsys/g3dvl/xlib/Makefile index cf765ef51a5..9877660a276 100644 --- a/src/gallium/winsys/g3dvl/xlib/Makefile +++ b/src/gallium/winsys/g3dvl/xlib/Makefile @@ -25,13 +25,7 @@ SOURCES = xsp_winsys.c OBJECTS = $(SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o LIBS = $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/auxiliary/vl/libvl.a \ - $(TOP)/src/gallium/auxiliary/tgsi/libtgsi.a \ - $(TOP)/src/gallium/auxiliary/draw/libdraw.a \ - $(TOP)/src/gallium/auxiliary/translate/libtranslate.a \ - $(TOP)/src/gallium/auxiliary/cso_cache/libcso_cache.a \ - $(TOP)/src/gallium/auxiliary/rtasm/librtasm.a \ - $(TOP)/src/gallium/auxiliary/util/libutil.a + $(TOP)/src/gallium/auxiliary/libgallium.a .c.o: $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c index 08067aad64c..f15bcd37b50 100644 --- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c +++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c @@ -30,6 +30,7 @@ #include <pipe/internal/p_winsys_screen.h> #include <pipe/p_state.h> #include <pipe/p_inlines.h> +#include <util/u_format.h> #include <util/u_memory.h> #include <util/u_math.h> #include <softpipe/sp_winsys.h> @@ -138,13 +139,10 @@ static struct pipe_buffer* xsp_surface_buffer_create ) { const unsigned int ALIGNMENT = 1; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = align(nblocksx * block.size, ALIGNMENT); + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), ALIGNMENT); return pws->buffer_create(pws, ALIGNMENT, usage, *stride * nblocksy); diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript index 917a81c4386..4cbc86f3311 100644 --- a/src/gallium/winsys/gdi/SConscript +++ b/src/gallium/winsys/gdi/SConscript @@ -47,5 +47,5 @@ if env['platform'] == 'windows': env.SharedLibrary( target ='opengl32', source = sources, - LIBS = wgl + glapi + mesa + drivers + auxiliaries + env['LIBS'], + LIBS = wgl + glapi + mesa + drivers + gallium + glsl + env['LIBS'], ) diff --git a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c index e8bc0f55ac4..7d076be3a31 100644 --- a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c @@ -39,6 +39,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "llvmpipe/lp_winsys.h" @@ -49,7 +50,6 @@ struct gdi_llvmpipe_displaytarget { enum pipe_format format; - struct pipe_format_block block; unsigned width; unsigned height; unsigned stride; @@ -118,16 +118,6 @@ gdi_llvmpipe_displaytarget_destroy(struct llvmpipe_winsys *winsys, } -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - static struct llvmpipe_displaytarget * gdi_llvmpipe_displaytarget_create(struct llvmpipe_winsys *winsys, enum pipe_format format, @@ -147,10 +137,10 @@ gdi_llvmpipe_displaytarget_create(struct llvmpipe_winsys *winsys, gdt->width = width; gdt->height = height; - bpp = pf_get_bits(format); - cpp = pf_get_size(format); + bpp = util_format_get_blocksizebits(format); + cpp = util_format_get_blocksize(format); - gdt->stride = round_up(width * cpp, alignment); + gdt->stride = align(width * cpp, alignment); gdt->size = gdt->stride * height; gdt->data = align_malloc(gdt->size, alignment); diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 5e0ccf32f48..2ad794c3f0f 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -42,6 +42,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" @@ -151,16 +152,6 @@ gdi_softpipe_user_buffer_create(struct pipe_winsys *winsys, } -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - static struct pipe_buffer * gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, @@ -170,13 +161,10 @@ gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = round_up(nblocksx * block.size, alignment); + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); return winsys->buffer_create(winsys, alignment, usage, @@ -283,10 +271,10 @@ gdi_softpipe_present(struct pipe_screen *screen, memset(&bmi, 0, sizeof(BITMAPINFO)); bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); - bmi.bmiHeader.biWidth = texture->stride[surface->level] / pf_get_size(surface->format); + bmi.bmiHeader.biWidth = texture->stride[surface->level] / util_format_get_blocksize(surface->format); bmi.bmiHeader.biHeight= -(long)surface->height; bmi.bmiHeader.biPlanes = 1; - bmi.bmiHeader.biBitCount = pf_get_bits(surface->format); + bmi.bmiHeader.biBitCount = util_format_get_blocksizebits(surface->format); bmi.bmiHeader.biCompression = BI_RGB; bmi.bmiHeader.biSizeImage = 0; bmi.bmiHeader.biXPelsPerMeter = 0; diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index a0293fe9b4b..9482e8f9b11 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -23,17 +23,14 @@ INCLUDE_DIRS = \ -I$(TOP)/src/gallium/auxiliary DEFINES += \ - -DGALLIUM_SOFTPIPE \ - -DGALLIUM_TRACE \ - -DGALLIUM_BRW + -DGALLIUM_SOFTPIPE #-DGALLIUM_CELL will be defined by the config */ XLIB_WINSYS_SOURCES = \ xlib.c \ xlib_cell.c \ xlib_llvmpipe.c \ - xlib_softpipe.c \ - xlib_trace.c + xlib_softpipe.c XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o) diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index dfe550f733b..713841aeb1b 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -22,7 +22,7 @@ if env['platform'] == 'linux' \ 'xlib.c', ] - drivers = [] + drivers = [trace] if 'softpipe' in env['drivers']: env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') @@ -42,16 +42,11 @@ if env['platform'] == 'linux' \ sources += ['xlib_cell.c'] drivers += [cell] - if 'trace' in env['drivers']: - env.Append(CPPDEFINES = 'GALLIUM_TRACE') - sources += ['xlib_trace.c'] - drivers += [trace] - # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions libgl = env.SharedLibrary( target ='GL', source = sources, - LIBS = st_xlib + glapi + mesa + drivers + auxiliaries + env['LIBS'], + LIBS = st_xlib + glapi + mesa + glsl + drivers + gallium + env['LIBS'], ) env.InstallSharedLibrary(libgl, version=(1, 5)) diff --git a/src/gallium/winsys/xlib/xlib.c b/src/gallium/winsys/xlib/xlib.c index 163cc8863cb..6dbe05f193e 100644 --- a/src/gallium/winsys/xlib/xlib.c +++ b/src/gallium/winsys/xlib/xlib.c @@ -42,7 +42,6 @@ */ enum mode { - MODE_TRACE, MODE_CELL, MODE_LLVMPIPE, MODE_SOFTPIPE @@ -51,9 +50,6 @@ enum mode { static enum mode get_mode() { - if (getenv("XMESA_TRACE")) - return MODE_TRACE; - #ifdef GALLIUM_CELL if (!getenv("GALLIUM_NOCELL")) return MODE_CELL; @@ -73,11 +69,6 @@ static void _init( void ) enum mode xlib_mode = get_mode(); switch (xlib_mode) { - case MODE_TRACE: -#if defined(GALLIUM_TRACE) && defined(GALLIUM_SOFTPIPE) - xmesa_set_driver( &xlib_trace_driver ); -#endif - break; case MODE_CELL: #if defined(GALLIUM_CELL) xmesa_set_driver( &xlib_cell_driver ); diff --git a/src/gallium/winsys/xlib/xlib.h b/src/gallium/winsys/xlib/xlib.h index f0855035f77..8e091d0c084 100644 --- a/src/gallium/winsys/xlib/xlib.h +++ b/src/gallium/winsys/xlib/xlib.h @@ -5,7 +5,6 @@ #include "pipe/p_compiler.h" #include "xm_winsys.h" -extern struct xm_driver xlib_trace_driver; extern struct xm_driver xlib_softpipe_driver; extern struct xm_driver xlib_llvmpipe_driver; extern struct xm_driver xlib_cell_driver; diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c index 5f8dc2ab241..47ae0519a4b 100644 --- a/src/gallium/winsys/xlib/xlib_cell.c +++ b/src/gallium/winsys/xlib/xlib_cell.c @@ -45,6 +45,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -277,15 +278,6 @@ xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - static struct pipe_buffer * xm_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, @@ -295,18 +287,15 @@ xm_surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = round_up(nblocksx * block.size, alignment); + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); return winsys->buffer_create(winsys, alignment, usage, /* XXX a bit of a hack */ - *stride * round_up(nblocksy, TILE_SIZE)); + *stride * align(nblocksy, TILE_SIZE)); } diff --git a/src/gallium/winsys/xlib/xlib_llvmpipe.c b/src/gallium/winsys/xlib/xlib_llvmpipe.c index 3dd15e099b0..2a434b5fd21 100644 --- a/src/gallium/winsys/xlib/xlib_llvmpipe.c +++ b/src/gallium/winsys/xlib/xlib_llvmpipe.c @@ -44,6 +44,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "llvmpipe/lp_winsys.h" @@ -58,7 +59,6 @@ struct xm_displaytarget { enum pipe_format format; - struct pipe_format_block block; unsigned width; unsigned height; unsigned stride; @@ -262,10 +262,10 @@ xm_llvmpipe_display(struct xmesa_buffer *xm_buffer, { if (xm_dt->tempImage == NULL) { - assert(xm_dt->block.width == 1); - assert(xm_dt->block.height == 1); + assert(util_format_get_blockwidth(xm_dt->format) == 1); + assert(util_format_get_blockheight(xm_dt->format) == 1); alloc_shm_ximage(xm_dt, xm_buffer, - xm_dt->stride / xm_dt->block.size, + xm_dt->stride / util_format_get_blocksize(xm_dt->format), xm_dt->height); } @@ -321,7 +321,7 @@ xm_displaytarget_create(struct llvmpipe_winsys *winsys, unsigned *stride) { struct xm_displaytarget *xm_dt = CALLOC_STRUCT(xm_displaytarget); - unsigned nblocksx, nblocksy, size; + unsigned nblocksy, size; xm_dt = CALLOC_STRUCT(xm_displaytarget); if(!xm_dt) @@ -331,10 +331,8 @@ xm_displaytarget_create(struct llvmpipe_winsys *winsys, xm_dt->width = width; xm_dt->height = height; - pf_get_block(format, &xm_dt->block); - nblocksx = pf_get_nblocksx(&xm_dt->block, width); - nblocksy = pf_get_nblocksy(&xm_dt->block, height); - xm_dt->stride = align(nblocksx * xm_dt->block.size, alignment); + nblocksy = util_format_get_nblocksy(format, height); + xm_dt->stride = align(util_format_get_stride(format, width), alignment); size = xm_dt->stride * nblocksy; #ifdef USE_XSHM diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c index 260b39e2a0f..f7c0099584e 100644 --- a/src/gallium/winsys/xlib/xlib_softpipe.c +++ b/src/gallium/winsys/xlib/xlib_softpipe.c @@ -42,6 +42,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" @@ -254,10 +255,10 @@ xlib_softpipe_display_surface(struct xmesa_buffer *b, { if (xm_buf->tempImage == NULL) { - assert(surf->texture->block.width == 1); - assert(surf->texture->block.height == 1); + assert(util_format_get_blockwidth(surf->texture->format) == 1); + assert(util_format_get_blockheight(surf->texture->format) == 1); alloc_shm_ximage(xm_buf, b, spt->stride[surf->level] / - surf->texture->block.size, surf->height); + util_format_get_blocksize(surf->texture->format), surf->height); } ximage = xm_buf->tempImage; @@ -360,13 +361,10 @@ xm_surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy, size; + unsigned nblocksy, size; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = align(nblocksx * block.size, alignment); + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); size = *stride * nblocksy; #ifdef USE_XSHM diff --git a/src/gallium/winsys/xlib/xlib_trace.c b/src/gallium/winsys/xlib/xlib_trace.c deleted file mode 100644 index dbea655ab45..00000000000 --- a/src/gallium/winsys/xlib/xlib_trace.c +++ /dev/null @@ -1,113 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ - -/* - * Authors: - * Keith Whitwell - * Brian Paul - */ - - -#include "xlib.h" - -#include "trace/tr_screen.h" -#include "trace/tr_context.h" -#include "trace/tr_texture.h" - -#include "pipe/p_screen.h" - - - -static struct pipe_screen * -xlib_create_trace_screen( void ) -{ - struct pipe_screen *screen, *trace_screen; - - screen = xlib_softpipe_driver.create_pipe_screen(); - if (screen == NULL) - goto fail; - - /* Wrap it: - */ - trace_screen = trace_screen_create(screen); - if (trace_screen == NULL) - goto fail; - - return trace_screen; - -fail: - if (screen) - screen->destroy( screen ); - return NULL; -} - -static struct pipe_context * -xlib_create_trace_context( struct pipe_screen *_screen, - void *priv ) -{ - struct trace_screen *tr_scr = trace_screen( _screen ); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_context *pipe, *trace_pipe; - - pipe = xlib_softpipe_driver.create_pipe_context( screen, priv ); - if (pipe == NULL) - goto fail; - - /* Wrap it: - */ - trace_pipe = trace_context_create(_screen, pipe); - if (trace_pipe == NULL) - goto fail; - - trace_pipe->priv = priv; - - return trace_pipe; - -fail: - if (pipe) - pipe->destroy( pipe ); - return NULL; -} - -static void -xlib_trace_display_surface( struct xmesa_buffer *buffer, - struct pipe_surface *_surf ) -{ - struct trace_surface *tr_surf = trace_surface( _surf ); - struct pipe_surface *surf = tr_surf->surface; - - xlib_softpipe_driver.display_surface( buffer, surf ); -} - - -struct xm_driver xlib_trace_driver = -{ - .create_pipe_screen = xlib_create_trace_screen, - .create_pipe_context = xlib_create_trace_context, - .display_surface = xlib_trace_display_surface, -}; |