diff options
Diffstat (limited to 'src/gallium')
307 files changed, 30442 insertions, 9191 deletions
diff --git a/src/gallium/Android.common.mk b/src/gallium/Android.common.mk new file mode 100644 index 00000000000..782510ff0f4 --- /dev/null +++ b/src/gallium/Android.common.mk @@ -0,0 +1,32 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# src/gallium/Android.common.mk + +LOCAL_C_INCLUDES += \ + $(GALLIUM_TOP)/include \ + $(GALLIUM_TOP)/auxiliary \ + $(GALLIUM_TOP)/winsys \ + $(GALLIUM_TOP)/drivers + +include $(MESA_COMMON_MK) diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk new file mode 100644 index 00000000000..b49a61b1ffd --- /dev/null +++ b/src/gallium/Android.mk @@ -0,0 +1,44 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# src/gallium/Android.mk + +GALLIUM_TOP := $(call my-dir) +GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk + +SUBDIRS := \ + targets/egl-static \ + state_trackers/egl \ + auxiliary + +# swrast +SUBDIRS += winsys/sw/android drivers/softpipe + +# r600g +ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),) +SUBDIRS += winsys/radeon/drm +SUBDIRS += winsys/r600/drm drivers/r600 +endif + +mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS)) +include $(mkfiles) diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk new file mode 100644 index 00000000000..0c37dd31ab6 --- /dev/null +++ b/src/gallium/auxiliary/Android.mk @@ -0,0 +1,55 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES and GENERATED_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_C_INCLUDES := $(GALLIUM_TOP)/auxiliary/util + +LOCAL_MODULE := libmesa_gallium + +# generate sources +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +intermediates := $(call local-intermediates-dir) +LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(GENERATED_SOURCES)) + +$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2) +$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@ + +$(intermediates)/indices/u_indices_gen.c \ +$(intermediates)/indices/u_unfilled_gen.c \ +$(intermediates)/util/u_format_srgb.c \ +$(intermediates)/util/u_half.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py + $(transform-generated-source) + +$(intermediates)/util/u_format_table.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/util/u_format.csv + $(transform-generated-source) + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 7dae7bc908b..896c058fde9 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -3,205 +3,10 @@ include $(TOP)/configs/current LIBNAME = gallium -C_SOURCES = \ - cso_cache/cso_cache.c \ - cso_cache/cso_context.c \ - cso_cache/cso_hash.c \ - draw/draw_context.c \ - draw/draw_fs.c \ - draw/draw_gs.c \ - draw/draw_pipe.c \ - draw/draw_pipe_aaline.c \ - draw/draw_pipe_aapoint.c \ - draw/draw_pipe_clip.c \ - draw/draw_pipe_cull.c \ - draw/draw_pipe_flatshade.c \ - draw/draw_pipe_offset.c \ - draw/draw_pipe_pstipple.c \ - draw/draw_pipe_stipple.c \ - draw/draw_pipe_twoside.c \ - draw/draw_pipe_unfilled.c \ - draw/draw_pipe_util.c \ - draw/draw_pipe_validate.c \ - draw/draw_pipe_vbuf.c \ - draw/draw_pipe_wide_line.c \ - draw/draw_pipe_wide_point.c \ - draw/draw_pt.c \ - draw/draw_pt_emit.c \ - draw/draw_pt_fetch.c \ - draw/draw_pt_fetch_emit.c \ - draw/draw_pt_fetch_shade_emit.c \ - draw/draw_pt_fetch_shade_pipeline.c \ - draw/draw_pt_post_vs.c \ - draw/draw_pt_so_emit.c \ - draw/draw_pt_util.c \ - draw/draw_pt_vsplit.c \ - draw/draw_vertex.c \ - draw/draw_vs.c \ - draw/draw_vs_aos.c \ - draw/draw_vs_aos_io.c \ - draw/draw_vs_aos_machine.c \ - draw/draw_vs_exec.c \ - draw/draw_vs_ppc.c \ - draw/draw_vs_sse.c \ - draw/draw_vs_variant.c \ - indices/u_indices_gen.c \ - indices/u_unfilled_gen.c \ - os/os_misc.c \ - os/os_stream.c \ - os/os_stream_log.c \ - os/os_stream_null.c \ - os/os_stream_stdc.c \ - os/os_stream_str.c \ - os/os_time.c \ - pipebuffer/pb_buffer_fenced.c \ - pipebuffer/pb_buffer_malloc.c \ - pipebuffer/pb_bufmgr_alt.c \ - pipebuffer/pb_bufmgr_cache.c \ - pipebuffer/pb_bufmgr_debug.c \ - pipebuffer/pb_bufmgr_mm.c \ - pipebuffer/pb_bufmgr_ondemand.c \ - pipebuffer/pb_bufmgr_pool.c \ - pipebuffer/pb_bufmgr_slab.c \ - pipebuffer/pb_validate.c \ - rbug/rbug_connection.c \ - rbug/rbug_context.c \ - rbug/rbug_core.c \ - rbug/rbug_demarshal.c \ - rbug/rbug_texture.c \ - rbug/rbug_shader.c \ - rtasm/rtasm_cpu.c \ - rtasm/rtasm_execmem.c \ - rtasm/rtasm_ppc.c \ - rtasm/rtasm_ppc_spe.c \ - rtasm/rtasm_x86sse.c \ - tgsi/tgsi_build.c \ - tgsi/tgsi_dump.c \ - tgsi/tgsi_exec.c \ - tgsi/tgsi_info.c \ - tgsi/tgsi_iterate.c \ - tgsi/tgsi_parse.c \ - tgsi/tgsi_ppc.c \ - tgsi/tgsi_sanity.c \ - tgsi/tgsi_scan.c \ - tgsi/tgsi_sse2.c \ - tgsi/tgsi_text.c \ - tgsi/tgsi_transform.c \ - tgsi/tgsi_ureg.c \ - tgsi/tgsi_util.c \ - translate/translate.c \ - translate/translate_cache.c \ - translate/translate_generic.c \ - translate/translate_sse.c \ - util/u_debug.c \ - util/u_debug_describe.c \ - util/u_debug_refcnt.c \ - util/u_debug_stack.c \ - util/u_debug_symbol.c \ - util/u_dump_defines.c \ - util/u_dump_state.c \ - util/u_bitmask.c \ - util/u_blit.c \ - util/u_blitter.c \ - util/u_cache.c \ - util/u_caps.c \ - util/u_cpu_detect.c \ - util/u_dl.c \ - util/u_draw.c \ - util/u_draw_quad.c \ - util/u_format.c \ - util/u_format_other.c \ - util/u_format_latc.c \ - util/u_format_s3tc.c \ - util/u_format_rgtc.c \ - util/u_format_srgb.c \ - util/u_format_table.c \ - util/u_format_tests.c \ - util/u_format_yuv.c \ - util/u_format_zs.c \ - util/u_framebuffer.c \ - util/u_gen_mipmap.c \ - util/u_half.c \ - util/u_handle_table.c \ - util/u_hash.c \ - util/u_hash_table.c \ - util/u_index_modify.c \ - util/u_keymap.c \ - util/u_linear.c \ - util/u_linkage.c \ - util/u_network.c \ - util/u_math.c \ - util/u_mm.c \ - util/u_pstipple.c \ - util/u_rect.c \ - util/u_ringbuffer.c \ - util/u_sampler.c \ - util/u_simple_shaders.c \ - util/u_slab.c \ - util/u_snprintf.c \ - util/u_staging.c \ - util/u_surface.c \ - util/u_surfaces.c \ - util/u_texture.c \ - util/u_tile.c \ - util/u_transfer.c \ - util/u_resource.c \ - util/u_upload_mgr.c \ - util/u_vbuf_mgr.c \ - vl/vl_csc.c \ - vl/vl_compositor.c \ - vl/vl_decoder.c \ - vl/vl_mpeg12_decoder.c \ - vl/vl_mpeg12_bitstream.c \ - vl/vl_zscan.c \ - vl/vl_idct.c \ - vl/vl_mc.c \ - vl/vl_vertex_buffers.c \ - vl/vl_video_buffer.c - -GALLIVM_SOURCES = \ - gallivm/lp_bld_arit.c \ - gallivm/lp_bld_assert.c \ - gallivm/lp_bld_bitarit.c \ - gallivm/lp_bld_const.c \ - gallivm/lp_bld_conv.c \ - gallivm/lp_bld_flow.c \ - gallivm/lp_bld_format_aos.c \ - gallivm/lp_bld_format_soa.c \ - gallivm/lp_bld_format_yuv.c \ - gallivm/lp_bld_gather.c \ - gallivm/lp_bld_init.c \ - gallivm/lp_bld_intr.c \ - gallivm/lp_bld_logic.c \ - gallivm/lp_bld_pack.c \ - gallivm/lp_bld_printf.c \ - gallivm/lp_bld_quad.c \ - gallivm/lp_bld_sample.c \ - gallivm/lp_bld_sample_aos.c \ - gallivm/lp_bld_sample_soa.c \ - gallivm/lp_bld_struct.c \ - gallivm/lp_bld_swizzle.c \ - gallivm/lp_bld_tgsi_aos.c \ - gallivm/lp_bld_tgsi_info.c \ - gallivm/lp_bld_tgsi_soa.c \ - gallivm/lp_bld_type.c \ - draw/draw_llvm.c \ - draw/draw_llvm_sample.c \ - draw/draw_llvm_translate.c \ - draw/draw_vs_llvm.c \ - draw/draw_pt_fetch_shade_pipeline_llvm.c - -GALLIVM_CPP_SOURCES = \ - gallivm/lp_bld_debug.cpp \ - gallivm/lp_bld_misc.cpp - -GENERATED_SOURCES = \ - indices/u_indices_gen.c \ - indices/u_unfilled_gen.c \ - util/u_format_srgb.c \ - util/u_format_table.c \ - util/u_half.c +# get source lists +include Makefile.sources +C_SOURCES += $(GENERATED_SOURCES) ifeq ($(MESA_LLVM),1) C_SOURCES += \ diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources new file mode 100644 index 00000000000..766beb0fafc --- /dev/null +++ b/src/gallium/auxiliary/Makefile.sources @@ -0,0 +1,200 @@ +C_SOURCES := \ + cso_cache/cso_cache.c \ + cso_cache/cso_context.c \ + cso_cache/cso_hash.c \ + draw/draw_context.c \ + draw/draw_fs.c \ + draw/draw_gs.c \ + draw/draw_pipe.c \ + draw/draw_pipe_aaline.c \ + draw/draw_pipe_aapoint.c \ + draw/draw_pipe_clip.c \ + draw/draw_pipe_cull.c \ + draw/draw_pipe_flatshade.c \ + draw/draw_pipe_offset.c \ + draw/draw_pipe_pstipple.c \ + draw/draw_pipe_stipple.c \ + draw/draw_pipe_twoside.c \ + draw/draw_pipe_unfilled.c \ + draw/draw_pipe_util.c \ + draw/draw_pipe_validate.c \ + draw/draw_pipe_vbuf.c \ + draw/draw_pipe_wide_line.c \ + draw/draw_pipe_wide_point.c \ + draw/draw_pt.c \ + draw/draw_pt_emit.c \ + draw/draw_pt_fetch.c \ + draw/draw_pt_fetch_emit.c \ + draw/draw_pt_fetch_shade_emit.c \ + draw/draw_pt_fetch_shade_pipeline.c \ + draw/draw_pt_post_vs.c \ + draw/draw_pt_so_emit.c \ + draw/draw_pt_util.c \ + draw/draw_pt_vsplit.c \ + draw/draw_vertex.c \ + draw/draw_vs.c \ + draw/draw_vs_aos.c \ + draw/draw_vs_aos_io.c \ + draw/draw_vs_aos_machine.c \ + draw/draw_vs_exec.c \ + draw/draw_vs_ppc.c \ + draw/draw_vs_sse.c \ + draw/draw_vs_variant.c \ + os/os_misc.c \ + os/os_stream.c \ + os/os_stream_log.c \ + os/os_stream_null.c \ + os/os_stream_stdc.c \ + os/os_stream_str.c \ + os/os_time.c \ + pipebuffer/pb_buffer_fenced.c \ + pipebuffer/pb_buffer_malloc.c \ + pipebuffer/pb_bufmgr_alt.c \ + pipebuffer/pb_bufmgr_cache.c \ + pipebuffer/pb_bufmgr_debug.c \ + pipebuffer/pb_bufmgr_mm.c \ + pipebuffer/pb_bufmgr_ondemand.c \ + pipebuffer/pb_bufmgr_pool.c \ + pipebuffer/pb_bufmgr_slab.c \ + pipebuffer/pb_validate.c \ + postprocess/pp_celshade.c \ + postprocess/pp_colors.c \ + postprocess/pp_init.c \ + postprocess/pp_mlaa.c \ + postprocess/pp_run.c \ + postprocess/pp_program.c \ + rbug/rbug_connection.c \ + rbug/rbug_context.c \ + rbug/rbug_core.c \ + rbug/rbug_demarshal.c \ + rbug/rbug_texture.c \ + rbug/rbug_shader.c \ + rtasm/rtasm_cpu.c \ + rtasm/rtasm_execmem.c \ + rtasm/rtasm_ppc.c \ + rtasm/rtasm_ppc_spe.c \ + rtasm/rtasm_x86sse.c \ + tgsi/tgsi_build.c \ + tgsi/tgsi_dump.c \ + tgsi/tgsi_exec.c \ + tgsi/tgsi_info.c \ + tgsi/tgsi_iterate.c \ + tgsi/tgsi_parse.c \ + tgsi/tgsi_ppc.c \ + tgsi/tgsi_sanity.c \ + tgsi/tgsi_scan.c \ + tgsi/tgsi_sse2.c \ + tgsi/tgsi_text.c \ + tgsi/tgsi_transform.c \ + tgsi/tgsi_ureg.c \ + tgsi/tgsi_util.c \ + translate/translate.c \ + translate/translate_cache.c \ + translate/translate_generic.c \ + translate/translate_sse.c \ + util/u_debug.c \ + util/u_debug_describe.c \ + util/u_debug_memory.c \ + util/u_debug_refcnt.c \ + util/u_debug_stack.c \ + util/u_debug_symbol.c \ + util/u_dump_defines.c \ + util/u_dump_state.c \ + util/u_bitmask.c \ + util/u_blit.c \ + util/u_blitter.c \ + util/u_cache.c \ + util/u_caps.c \ + util/u_cpu_detect.c \ + util/u_dl.c \ + util/u_draw.c \ + util/u_draw_quad.c \ + util/u_format.c \ + util/u_format_other.c \ + util/u_format_latc.c \ + util/u_format_s3tc.c \ + util/u_format_rgtc.c \ + util/u_format_tests.c \ + util/u_format_yuv.c \ + util/u_format_zs.c \ + util/u_framebuffer.c \ + util/u_gen_mipmap.c \ + util/u_handle_table.c \ + util/u_hash.c \ + util/u_hash_table.c \ + util/u_index_modify.c \ + util/u_keymap.c \ + util/u_linear.c \ + util/u_linkage.c \ + util/u_network.c \ + util/u_math.c \ + util/u_mm.c \ + util/u_pstipple.c \ + util/u_rect.c \ + util/u_ringbuffer.c \ + util/u_sampler.c \ + util/u_simple_shaders.c \ + util/u_slab.c \ + util/u_snprintf.c \ + util/u_staging.c \ + util/u_surface.c \ + util/u_surfaces.c \ + util/u_texture.c \ + util/u_tile.c \ + util/u_transfer.c \ + util/u_resource.c \ + util/u_upload_mgr.c \ + util/u_vbuf_mgr.c \ + vl/vl_csc.c \ + vl/vl_compositor.c \ + vl/vl_decoder.c \ + vl/vl_mpeg12_decoder.c \ + vl/vl_mpeg12_bitstream.c \ + vl/vl_zscan.c \ + vl/vl_idct.c \ + vl/vl_mc.c \ + vl/vl_vertex_buffers.c \ + vl/vl_video_buffer.c + +GENERATED_SOURCES := \ + indices/u_indices_gen.c \ + indices/u_unfilled_gen.c \ + util/u_format_srgb.c \ + util/u_format_table.c \ + util/u_half.c + +GALLIVM_SOURCES := \ + gallivm/lp_bld_arit.c \ + gallivm/lp_bld_assert.c \ + gallivm/lp_bld_bitarit.c \ + gallivm/lp_bld_const.c \ + gallivm/lp_bld_conv.c \ + gallivm/lp_bld_flow.c \ + gallivm/lp_bld_format_aos.c \ + gallivm/lp_bld_format_soa.c \ + gallivm/lp_bld_format_yuv.c \ + gallivm/lp_bld_gather.c \ + gallivm/lp_bld_init.c \ + gallivm/lp_bld_intr.c \ + gallivm/lp_bld_logic.c \ + gallivm/lp_bld_pack.c \ + gallivm/lp_bld_printf.c \ + gallivm/lp_bld_quad.c \ + gallivm/lp_bld_sample.c \ + gallivm/lp_bld_sample_aos.c \ + gallivm/lp_bld_sample_soa.c \ + gallivm/lp_bld_struct.c \ + gallivm/lp_bld_swizzle.c \ + gallivm/lp_bld_tgsi_aos.c \ + gallivm/lp_bld_tgsi_info.c \ + gallivm/lp_bld_tgsi_soa.c \ + gallivm/lp_bld_type.c \ + draw/draw_llvm.c \ + draw/draw_llvm_sample.c \ + draw/draw_llvm_translate.c \ + draw/draw_vs_llvm.c \ + draw/draw_pt_fetch_shade_pipeline_llvm.c + +GALLIVM_CPP_SOURCES := \ + gallivm/lp_bld_debug.cpp \ + gallivm/lp_bld_misc.cpp diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index d18f55f1644..07c420e138d 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -47,201 +47,20 @@ env.Depends('util/u_format_table.c', [ 'util/u_format_pack.py', ]) -source = [ - 'cso_cache/cso_cache.c', - 'cso_cache/cso_context.c', - 'cso_cache/cso_hash.c', - 'draw/draw_context.c', - 'draw/draw_fs.c', - 'draw/draw_gs.c', - 'draw/draw_pipe.c', - 'draw/draw_pipe_aaline.c', - 'draw/draw_pipe_aapoint.c', - 'draw/draw_pipe_clip.c', - 'draw/draw_pipe_cull.c', - 'draw/draw_pipe_flatshade.c', - 'draw/draw_pipe_offset.c', - 'draw/draw_pipe_pstipple.c', - 'draw/draw_pipe_stipple.c', - 'draw/draw_pipe_twoside.c', - 'draw/draw_pipe_unfilled.c', - 'draw/draw_pipe_util.c', - 'draw/draw_pipe_validate.c', - 'draw/draw_pipe_vbuf.c', - 'draw/draw_pipe_wide_line.c', - 'draw/draw_pipe_wide_point.c', - 'draw/draw_pt.c', - 'draw/draw_pt_emit.c', - 'draw/draw_pt_fetch.c', - 'draw/draw_pt_fetch_emit.c', - 'draw/draw_pt_fetch_shade_emit.c', - 'draw/draw_pt_fetch_shade_pipeline.c', - 'draw/draw_pt_post_vs.c', - 'draw/draw_pt_so_emit.c', - 'draw/draw_pt_util.c', - 'draw/draw_pt_vsplit.c', - 'draw/draw_vertex.c', - 'draw/draw_vs.c', - 'draw/draw_vs_aos.c', - 'draw/draw_vs_aos_io.c', - 'draw/draw_vs_aos_machine.c', - 'draw/draw_vs_exec.c', - 'draw/draw_vs_ppc.c', - 'draw/draw_vs_sse.c', - 'draw/draw_vs_variant.c', - #'indices/u_indices.c', - #'indices/u_unfilled_indices.c', - 'indices/u_indices_gen.c', - 'indices/u_unfilled_gen.c', - 'os/os_misc.c', - 'os/os_stream.c', - 'os/os_stream_log.c', - 'os/os_stream_null.c', - 'os/os_stream_stdc.c', - 'os/os_stream_str.c', - 'os/os_time.c', - 'pipebuffer/pb_buffer_fenced.c', - 'pipebuffer/pb_buffer_malloc.c', - 'pipebuffer/pb_bufmgr_alt.c', - 'pipebuffer/pb_bufmgr_cache.c', - 'pipebuffer/pb_bufmgr_debug.c', - 'pipebuffer/pb_bufmgr_mm.c', - 'pipebuffer/pb_bufmgr_ondemand.c', - 'pipebuffer/pb_bufmgr_pool.c', - 'pipebuffer/pb_bufmgr_slab.c', - 'pipebuffer/pb_validate.c', - 'rbug/rbug_connection.c', - 'rbug/rbug_context.c', - 'rbug/rbug_core.c', - 'rbug/rbug_demarshal.c', - 'rbug/rbug_shader.c', - 'rbug/rbug_texture.c', - 'rtasm/rtasm_cpu.c', - 'rtasm/rtasm_execmem.c', - 'rtasm/rtasm_ppc.c', - 'rtasm/rtasm_ppc_spe.c', - 'rtasm/rtasm_x86sse.c', - 'tgsi/tgsi_build.c', - 'tgsi/tgsi_dump.c', - 'tgsi/tgsi_exec.c', - 'tgsi/tgsi_info.c', - 'tgsi/tgsi_iterate.c', - 'tgsi/tgsi_parse.c', - 'tgsi/tgsi_ppc.c', - 'tgsi/tgsi_sanity.c', - 'tgsi/tgsi_scan.c', - 'tgsi/tgsi_sse2.c', - 'tgsi/tgsi_text.c', - 'tgsi/tgsi_transform.c', - 'tgsi/tgsi_ureg.c', - 'tgsi/tgsi_util.c', - 'translate/translate.c', - 'translate/translate_cache.c', - 'translate/translate_generic.c', - 'translate/translate_sse.c', - 'util/u_bitmask.c', - 'util/u_blit.c', - 'util/u_blitter.c', - 'util/u_cache.c', - 'util/u_caps.c', - 'util/u_cpu_detect.c', - 'util/u_debug.c', - 'util/u_debug_describe.c', - 'util/u_debug_memory.c', - 'util/u_debug_refcnt.c', - 'util/u_debug_stack.c', - 'util/u_debug_symbol.c', - 'util/u_dump_defines.c', - 'util/u_dump_state.c', - 'util/u_dl.c', - 'util/u_draw.c', - 'util/u_draw_quad.c', - 'util/u_format.c', - 'util/u_format_other.c', - 'util/u_format_latc.c', - 'util/u_format_s3tc.c', - 'util/u_format_rgtc.c', - 'util/u_format_srgb.c', - 'util/u_format_table.c', - 'util/u_format_tests.c', - 'util/u_format_yuv.c', - 'util/u_format_zs.c', - 'util/u_framebuffer.c', - 'util/u_gen_mipmap.c', - 'util/u_half.c', - 'util/u_handle_table.c', - 'util/u_hash.c', - 'util/u_hash_table.c', - 'util/u_index_modify.c', - 'util/u_keymap.c', - 'util/u_linear.c', - 'util/u_linkage.c', - 'util/u_network.c', - 'util/u_math.c', - 'util/u_mm.c', - 'util/u_pstipple.c', - 'util/u_rect.c', - 'util/u_resource.c', - 'util/u_ringbuffer.c', - 'util/u_sampler.c', - 'util/u_simple_shaders.c', - 'util/u_slab.c', - 'util/u_snprintf.c', - 'util/u_staging.c', - 'util/u_surface.c', - 'util/u_surfaces.c', - 'util/u_texture.c', - 'util/u_tile.c', - 'util/u_transfer.c', - 'util/u_upload_mgr.c', - 'util/u_vbuf_mgr.c', - 'vl/vl_csc.c', - 'vl/vl_compositor.c', - 'vl/vl_decoder.c', - 'vl/vl_mpeg12_decoder.c', - 'vl/vl_mpeg12_bitstream.c', - 'vl/vl_zscan.c', - 'vl/vl_idct.c', - 'vl/vl_mc.c', - 'vl/vl_vertex_buffers.c', - 'vl/vl_video_buffer.c', -] +source = env.ParseSourceList('Makefile.sources', [ + 'C_SOURCES', + 'GENERATED_SOURCES' +]) if env['llvm']: - source += [ - 'gallivm/lp_bld_arit.c', - 'gallivm/lp_bld_assert.c', - 'gallivm/lp_bld_bitarit.c', - 'gallivm/lp_bld_const.c', - 'gallivm/lp_bld_conv.c', - 'gallivm/lp_bld_debug.cpp', - 'gallivm/lp_bld_flow.c', - 'gallivm/lp_bld_format_aos.c', - 'gallivm/lp_bld_format_soa.c', - 'gallivm/lp_bld_format_yuv.c', - 'gallivm/lp_bld_gather.c', - 'gallivm/lp_bld_init.c', - 'gallivm/lp_bld_intr.c', - 'gallivm/lp_bld_logic.c', - 'gallivm/lp_bld_misc.cpp', - 'gallivm/lp_bld_pack.c', - 'gallivm/lp_bld_printf.c', - 'gallivm/lp_bld_quad.c', - 'gallivm/lp_bld_sample.c', - 'gallivm/lp_bld_sample_aos.c', - 'gallivm/lp_bld_sample_soa.c', - 'gallivm/lp_bld_struct.c', - 'gallivm/lp_bld_swizzle.c', - 'gallivm/lp_bld_tgsi_aos.c', - 'gallivm/lp_bld_tgsi_info.c', - 'gallivm/lp_bld_tgsi_soa.c', - 'gallivm/lp_bld_type.c', - 'draw/draw_llvm.c', - 'draw/draw_llvm_sample.c', - 'draw/draw_llvm_translate.c', - 'draw/draw_pt_fetch_shade_pipeline_llvm.c', - 'draw/draw_vs_llvm.c' - ] + source += env.ParseSourceList('Makefile.sources', [ + 'GALLIVM_SOURCES', + 'GALLIVM_CPP_SOURCES' + ]) + + if env['toolchain'] == 'crossmingw': + # compile lp_bld_misc.cpp without -gstabs option + source = env.compile_without_gstabs(source, "gallivm/lp_bld_misc.cpp") gallium = env.ConvenienceLibrary( target = 'gallium', diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8bb87440497..996e295e4b5 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -96,7 +96,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); * Create LLVM type for struct draw_jit_texture */ static LLVMTypeRef -create_jit_texture_type(struct gallivm_state *gallivm) +create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef texture_type; @@ -120,13 +120,21 @@ create_jit_texture_type(struct gallivm_state *gallivm) elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); +#if HAVE_LLVM >= 0x0300 + texture_type = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(texture_type, elem_types, + Elements(elem_types), 0); +#else texture_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, texture_type); + /* Make sure the target's struct layout cache doesn't return * stale/invalid data. */ LLVMInvalidateStructLayout(gallivm->target, texture_type); +#endif LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, target, texture_type, @@ -176,7 +184,7 @@ create_jit_texture_type(struct gallivm_state *gallivm) */ static LLVMTypeRef create_jit_context_type(struct gallivm_state *gallivm, - LLVMTypeRef texture_type) + LLVMTypeRef texture_type, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); @@ -189,11 +197,17 @@ create_jit_context_type(struct gallivm_state *gallivm, elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */ elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_VERTEX_SAMPLERS); /* textures */ - +#if HAVE_LLVM >= 0x0300 + context_type = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(context_type, elem_types, + Elements(elem_types), 0); +#else context_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, context_type); LLVMInvalidateStructLayout(gallivm->target, context_type); +#endif LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, target, context_type, 0); @@ -215,7 +229,7 @@ create_jit_context_type(struct gallivm_state *gallivm, * Create LLVM type for struct pipe_vertex_buffer */ static LLVMTypeRef -create_jit_vertex_buffer_type(struct gallivm_state *gallivm) +create_jit_vertex_buffer_type(struct gallivm_state *gallivm, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef elem_types[3]; @@ -225,10 +239,17 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm) elem_types[1] = LLVMInt32TypeInContext(gallivm->context); elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ +#if HAVE_LLVM >= 0x0300 + vb_type = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(vb_type, elem_types, + Elements(elem_types), 0); +#else vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, vb_type); LLVMInvalidateStructLayout(gallivm->target, vb_type); +#endif LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, target, vb_type, 0); @@ -258,10 +279,17 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) elem_types[1] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); elem_types[2] = LLVMArrayType(elem_types[1], data_elems); +#if HAVE_LLVM >= 0x0300 + vertex_header = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(vertex_header, elem_types, + Elements(elem_types), 0); +#else vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, vertex_header); LLVMInvalidateStructLayout(gallivm->target, vertex_header); +#endif /* these are bit-fields and we can't take address of them LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, @@ -284,8 +312,6 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) target, vertex_header, DRAW_JIT_VERTEX_DATA); - LLVMAddTypeName(gallivm->module, struct_name, vertex_header); - return vertex_header; } @@ -299,19 +325,15 @@ create_jit_types(struct draw_llvm *llvm) struct gallivm_state *gallivm = llvm->gallivm; LLVMTypeRef texture_type, context_type, buffer_type, vb_type; - texture_type = create_jit_texture_type(gallivm); - LLVMAddTypeName(gallivm->module, "texture", texture_type); + texture_type = create_jit_texture_type(gallivm, "texture"); - context_type = create_jit_context_type(gallivm, texture_type); - LLVMAddTypeName(gallivm->module, "draw_jit_context", context_type); + context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context"); llvm->context_ptr_type = LLVMPointerType(context_type, 0); buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0); - LLVMAddTypeName(gallivm->module, "buffer", buffer_type); llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0); - vb_type = create_jit_vertex_buffer_type(gallivm); - LLVMAddTypeName(gallivm->module, "pipe_vertex_buffer", vb_type); + vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer"); llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); } diff --git a/src/gallium/auxiliary/gallivm/f.cpp b/src/gallium/auxiliary/gallivm/f.cpp index 5eb09c01ab3..6b9c35b3ce5 100644 --- a/src/gallium/auxiliary/gallivm/f.cpp +++ b/src/gallium/auxiliary/gallivm/f.cpp @@ -15,8 +15,9 @@ * * How to use this source: * - * - Download and abuild the NTL library from - * http://shoup.net/ntl/download.html + * - Download and build the NTL library from + * http://shoup.net/ntl/download.html , or install libntl-dev package if on + * Debian. * * - Download boost source code matching to your distro. * @@ -24,22 +25,32 @@ * * - Build as * - * g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a -lboost_math_tr1 + * g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a * * - Run as * * ./minimax * - * - For example, to compute exp2 5th order polynomial between [0, 1] do: + * - For example, to compute log2 5th order polynomial between [1, 2] do: + * + * variant 0 + * range 1 2 + * order 5 0 + * step 200 + * info + * + * and take the coefficients from the P = { ... } array. + * + * - To compute exp2 5th order polynomial between [0, 1] do: * * variant 1 * range 0 1 * order 5 0 - * steps 200 + * step 200 * info * * - For more info see - * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html + * http://www.boost.org/doc/libs/1_47_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html */ #define L22 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 02b3bde7893..2be8598704e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -61,7 +61,7 @@ #include "lp_bld_arit.h" -#define EXP_POLY_DEGREE 3 +#define EXP_POLY_DEGREE 5 #define LOG_POLY_DEGREE 5 @@ -1645,7 +1645,7 @@ lp_build_rsqrt(struct lp_build_context *bld, assert(type.floating); if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { - const unsigned num_iterations = 0; + const unsigned num_iterations = 1; LLVMValueRef res; unsigned i; @@ -2151,7 +2151,7 @@ lp_build_exp(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); - return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); + return lp_build_exp2(bld, lp_build_mul(bld, log2e, x)); } @@ -2168,7 +2168,7 @@ lp_build_log(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); - return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); + return lp_build_mul(bld, log2, lp_build_log2(bld, x)); } @@ -2218,18 +2218,18 @@ lp_build_polynomial(struct lp_build_context *bld, */ const double lp_build_exp2_polynomial[] = { #if EXP_POLY_DEGREE == 5 - 0.999999999690134838155, - 0.583974334321735217258, - 0.164553105719676828492, - 0.0292811063701710962255, - 0.00354944426657875141846, - 0.000296253726543423377365 + 0.999999925063526176901, + 0.693153073200168932794, + 0.240153617044375388211, + 0.0558263180532956664775, + 0.00898934009049466391101, + 0.00187757667519147912699 #elif EXP_POLY_DEGREE == 4 - 1.00000001502262084505, - 0.563586057338685991394, - 0.150436017652442413623, - 0.0243220604213317927308, - 0.0025359088446580436489 + 1.00000259337069434683, + 0.693003834469974940458, + 0.24144275689150793076, + 0.0520114606103070150235, + 0.0135341679161270268764 #elif EXP_POLY_DEGREE == 3 0.999925218562710312959, 0.695833540494823811697, @@ -2465,6 +2465,12 @@ lp_build_log2_approx(struct lp_build_context *bld, assert(type.floating && type.width == 32); + /* + * We don't explicitly handle denormalized numbers. They will yield a + * result in the neighbourhood of -127, which appears to be adequate + * enough. + */ + i = LLVMBuildBitCast(builder, x, int_vec_type, ""); /* exp = (float) exponent(x) */ diff --git a/src/gallium/auxiliary/os/os_mman.h b/src/gallium/auxiliary/os/os_mman.h new file mode 100644 index 00000000000..b48eb053023 --- /dev/null +++ b/src/gallium/auxiliary/os/os_mman.h @@ -0,0 +1,87 @@ +/************************************************************************** + * + * Copyright 2011 LunarG, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent memory mapping (with large file support). + * + * @author Chia-I Wu <[email protected]> + */ + +#ifndef _OS_MMAN_H_ +#define _OS_MMAN_H_ + + +#include "pipe/p_config.h" +#include "pipe/p_compiler.h" + +#if defined(PIPE_OS_UNIX) +# ifndef _FILE_OFFSET_BITS +# error _FILE_OFFSET_BITS must be defined to 64 +# endif +# include <sys/mman.h> +#else +# error Unsupported OS +#endif + +#if defined(PIPE_OS_ANDROID) +# include <errno.h> /* for EINVAL */ +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined(PIPE_OS_ANDROID) + +extern void *__mmap2(void *, size_t, int, int, int, size_t); + +static INLINE void *os_mmap(void *addr, size_t length, int prot, int flags, int fd, loff_t offset) +{ + /* offset must be aligned to 4096 (not necessarily the page size) */ + if (unlikely(offset & 4095)) { + errno = EINVAL; + return MAP_FAILED; + } + + return __mmap2(addr, length, prot, flags, fd, (size_t) (offset >> 12)); +} + +#else +/* assume large file support exists */ +# define os_mmap(addr, length, prot, flags, fd, offset) mmap(addr, length, prot, flags, fd, offset) +#endif + +#define os_munmap(addr, length) munmap(addr, length) + + +#ifdef __cplusplus +} +#endif + +#endif /* _OS_MMAN_H_ */ diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index 8f1245bff55..d8301298b7f 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -314,7 +314,7 @@ typedef int64_t pipe_condvar; * pipe_barrier */ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) +#if (defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU)) && !defined(PIPE_OS_ANDROID) typedef pthread_barrier_t pipe_barrier; diff --git a/src/gallium/auxiliary/postprocess/ADDING b/src/gallium/auxiliary/postprocess/ADDING new file mode 100644 index 00000000000..3735835142a --- /dev/null +++ b/src/gallium/auxiliary/postprocess/ADDING @@ -0,0 +1,87 @@ +How to add a new post-processing filter +======================================= + +The Gallium post-processing queue works by passing the current screen to a fragment shader. +These shaders may be written in any supported language, but are added here in TGSI text +assembly. + +You can translate GLSL/ARB fairly easily via llvmpipe (LP_DEBUG=tgsi). I don't know the +status of the D3D state tracker, but if/when that works, I'd assume HLSL would be possible +too. + + + +Steps +===== + +1. Add it to PP +2. Make it known to PP +3. Make it known to driconf +4. ???? +5. Profit + + + + +1. Add it to PP +--------------- + +Once you have the shader(s) in TGSI asm, put them to static const char arrays in a header +file (see pp_colors.h). + +Add the filter's prototypes (main and init functions) to postprocess.h. This is mostly a +copy-paste job with only changing the name. + +Then create a file containing empty main and init functions, named as you specified above. +See pp_colors.c for an example. + + + +2. Make it known to PP +---------------------- + +Add your filter to filters.h, in a correct place. Placement is important, AA should usually +be the last effect in the queue for example. + +Name is the config option your filter will be enabled by, both in driconf and as an env var. + +Inner temp means an intermediate framebuffer you may use in your filter to store +results between passes. If you have a single-pass filter, request 0 of those. + +Shaders is the number of shaders your filter needs. The minimum is 2. + + +You could also write the init and main functions now. If your filter is single-pass without +a vertex shader and any other input than the main screen, you can use pp_nocolor as your +main function as is. + + + +3. Make it known to driconf +--------------------------- + +First time outside of auxiliary/postprocess. First, add a suitable description to +drivers/dri/common/xmlpool/t_options.h, and regenerate options.h by running make in that +directory. Use the name you put into filters.h as the config option name. + +With driconf aware of the option, make Gallium aware of it too. Add it to +state_trackers/dri/common/dri_screen.c in a proper section, specifying its default value and +the accepted range (if applicable). + +Do check that __driNConfigOptions is still correct after the addition. + + + +4. ???? +------- + +Testing, praying, hookers, blow, sacrificial lambs... + + + +5. Profit +--------- + +Assuming you got here, sharing is caring. Send your filter to mesa-dev. + + diff --git a/src/gallium/auxiliary/postprocess/filters.h b/src/gallium/auxiliary/postprocess/filters.h new file mode 100644 index 00000000000..2454088707d --- /dev/null +++ b/src/gallium/auxiliary/postprocess/filters.h @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PP_EXTERNAL_FILTERS_H +#define PP_EXTERNAL_FILTERS_H + +#include "postprocess/postprocess.h" + +typedef void (*pp_init_func) (struct pp_queue_t *, unsigned int, + unsigned int); + +struct pp_filter_t +{ + const char *name; /* Config name */ + unsigned int inner_tmps; /* Request how many inner temps */ + unsigned int shaders; /* Request how many shaders */ + unsigned int verts; /* How many are vertex shaders */ + pp_init_func init; /* Init function */ + pp_func main; /* Run function */ +}; + +/* Order matters. Put new filters in a suitable place. */ + +static const struct pp_filter_t pp_filters[PP_FILTERS] = { +/* name inner shaders verts init run */ + { "pp_noblue", 0, 2, 1, pp_noblue_init, pp_nocolor }, + { "pp_nogreen", 0, 2, 1, pp_nogreen_init, pp_nocolor }, + { "pp_nored", 0, 2, 1, pp_nored_init, pp_nocolor }, + { "pp_celshade", 0, 2, 1, pp_celshade_init, pp_nocolor }, + { "pp_jimenezmlaa", 2, 5, 2, pp_jimenezmlaa_init, pp_jimenezmlaa }, + { "pp_jimenezmlaa_color", 2, 5, 2, pp_jimenezmlaa_init_color, pp_jimenezmlaa_color }, +}; + +#endif diff --git a/src/gallium/auxiliary/postprocess/postprocess.h b/src/gallium/auxiliary/postprocess/postprocess.h new file mode 100644 index 00000000000..ef94f79997a --- /dev/null +++ b/src/gallium/auxiliary/postprocess/postprocess.h @@ -0,0 +1,100 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef POSTPROCESS_H +#define POSTPROCESS_H + +#include "postprocess/pp_program.h" + +#define PP_FILTERS 6 /* Increment this if you add filters */ +#define PP_MAX_PASSES 6 + +struct pp_queue_t; /* Forward definition */ + +/* Less typing later on */ +typedef void (*pp_func) (struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, unsigned int); +/** +* The main post-processing queue. +*/ +struct pp_queue_t +{ + pp_func *pp_queue; /* An array of pp_funcs */ + unsigned int n_filters; /* Number of enabled filters */ + + struct pipe_resource *tmp[2]; /* Two temp FBOs for the queue */ + struct pipe_resource *inner_tmp[3]; /* Three for filter use */ + + unsigned int n_tmp, n_inner_tmp; + + struct pipe_resource *depth; /* depth of original input */ + struct pipe_resource *stencil; /* stencil shared by inner_tmps */ + + struct pipe_surface *tmps[2], *inner_tmps[3], *stencils; + + void ***shaders; /* Shaders in TGSI form */ + unsigned int *verts; + struct program *p; + + bool fbos_init; +}; + +/* Main functions */ + +struct pp_queue_t *pp_init(struct pipe_screen *, const unsigned int *); +void pp_run(struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, struct pipe_resource *); +void pp_free(struct pp_queue_t *); +void pp_free_fbos(struct pp_queue_t *); +void pp_debug(const char *, ...); +struct program *pp_init_prog(struct pp_queue_t *, struct pipe_screen *); +void pp_init_fbos(struct pp_queue_t *, unsigned int, unsigned int, + struct pipe_resource *); + +/* The filters */ + +void pp_nocolor(struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, unsigned int); + +void pp_jimenezmlaa(struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, unsigned int); +void pp_jimenezmlaa_color(struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, unsigned int); + +/* The filter init functions */ + +void pp_celshade_init(struct pp_queue_t *, unsigned int, unsigned int); + +void pp_nored_init(struct pp_queue_t *, unsigned int, unsigned int); +void pp_nogreen_init(struct pp_queue_t *, unsigned int, unsigned int); +void pp_noblue_init(struct pp_queue_t *, unsigned int, unsigned int); + +void pp_jimenezmlaa_init(struct pp_queue_t *, unsigned int, unsigned int); +void pp_jimenezmlaa_init_color(struct pp_queue_t *, unsigned int, + unsigned int); + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_celshade.c b/src/gallium/auxiliary/postprocess/pp_celshade.c new file mode 100644 index 00000000000..4454764ea84 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_celshade.c @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "postprocess/postprocess.h" +#include "postprocess/pp_celshade.h" +#include "postprocess/pp_filters.h" + +/** Init function */ +void +pp_celshade_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + ppq->shaders[n][1] = + pp_tgsi_to_state(ppq->p->pipe, celshade, false, "celshade"); +} diff --git a/src/gallium/auxiliary/postprocess/pp_celshade.h b/src/gallium/auxiliary/postprocess/pp_celshade.h new file mode 100644 index 00000000000..536ac7f1f1c --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_celshade.h @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELSHADE_H +#define CELSHADE_H + +static const char celshade[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0..4]\n" + "IMM FLT32 { 0.2126, 0.7152, 0.0722, 4.0000}\n" + "IMM FLT32 { 0.5000, 2.0000, 1.0000, -0.1250}\n" + "IMM FLT32 { 0.2500, 0.1000, 0.1250, 3.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n" + " 1: DP3 TEMP[1].x, TEMP[0].xyzz, IMM[0]\n" + " 2: MUL TEMP[3].x, TEMP[1].xxxx, IMM[0].wwww\n" + " 3: ROUND TEMP[2].x, TEMP[3].xxxx\n" + " 4: MUL TEMP[3].x, TEMP[2].xxxx, IMM[2].xxxx\n" + " 5: MOV TEMP[2].x, TEMP[3].xxxx\n" + " 6: ADD TEMP[4].x, TEMP[1].xxxx, -TEMP[3].xxxx\n" + " 7: SGT TEMP[1].w, TEMP[4].xxxx, IMM[2].yyyy\n" + " 8: IF TEMP[1].wwww :19\n" + " 9: ADD TEMP[4].y, TEMP[3].xxxx, IMM[2].yyyy\n" + " 10: ADD TEMP[1].z, TEMP[1].xxxx, -TEMP[4].yyyy\n" + " 11: ADD TEMP[1].y, TEMP[3].xxxx, IMM[2].zzzz\n" + " 12: ADD TEMP[2].x, TEMP[1].yyyy, -TEMP[4].yyyy\n" + " 13: RCP TEMP[4].y, TEMP[2].xxxx\n" + " 14: MUL TEMP[2].x, TEMP[1].zzzz, TEMP[4].yyyy\n" + " 15: MAD TEMP[1].y, -IMM[1].yyyy, TEMP[2].xxxx, IMM[2].wwww\n" + " 16: MUL TEMP[1].z, TEMP[2].xxxx, TEMP[1].yyyy\n" + " 17: MUL TEMP[1].y, TEMP[2].xxxx, TEMP[1].zzzz\n" + " 18: MAD TEMP[2].x, TEMP[1].yyyy, IMM[2].zzzz, TEMP[3].xxxx\n" + " 19: ENDIF\n" + " 20: SLT TEMP[3].x, TEMP[4].xxxx, -IMM[2].yyyy\n" + " 21: IF TEMP[3].xxxx :34\n" + " 22: ADD TEMP[3].x, TEMP[2].xxxx, -IMM[2].zzzz\n" + " 23: ADD TEMP[4].x, TEMP[1].xxxx, -TEMP[3].xxxx\n" + " 24: ADD TEMP[1].x, TEMP[2].xxxx, -IMM[2].yyyy\n" + " 25: ADD TEMP[4].y, TEMP[1].xxxx, -TEMP[3].xxxx\n" + " 26: RCP TEMP[3].x, TEMP[4].yyyy\n" + " 27: MUL TEMP[1].x, TEMP[4].xxxx, TEMP[3].xxxx\n" + " 28: MAD TEMP[4].x, -IMM[1].yyyy, TEMP[1].xxxx, IMM[2].wwww\n" + " 29: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx\n" + " 30: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[3].xxxx\n" + " 31: ADD TEMP[3].x, IMM[1].zzzz, -TEMP[4].xxxx\n" + " 32: MAD TEMP[1].x, TEMP[3].xxxx, -IMM[2].zzzz, TEMP[2].xxxx\n" + " 33: MOV TEMP[2].x, TEMP[1].xxxx\n" + " 34: ENDIF\n" + " 35: MAD TEMP[1].x, TEMP[2].xxxx, IMM[1].yyyy, IMM[2].yyyy\n" + " 36: MUL OUT[0], TEMP[0], TEMP[1].xxxx\n" + " 37: END\n"; + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_colors.c b/src/gallium/auxiliary/postprocess/pp_colors.c new file mode 100644 index 00000000000..36bb1f552f5 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_colors.c @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "postprocess/postprocess.h" +#include "postprocess/pp_colors.h" +#include "postprocess/pp_filters.h" + +/** The run function of the color filters */ +void +pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, unsigned int n) +{ + + struct program *p = ppq->p; + + pp_filter_setup_in(p, in); + pp_filter_setup_out(p, out); + + pp_filter_set_fb(p); + pp_filter_misc_state(p); + + cso_single_sampler(p->cso, 0, &p->sampler_point); + cso_single_sampler_done(p->cso); + cso_set_fragment_sampler_views(p->cso, 1, &p->view); + + cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]); + cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][1]); + + pp_filter_draw(p); + pp_filter_end_pass(p); +} + + +/* Init functions */ + +void +pp_nored_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, nored, false, "nored"); +} + + +void +pp_nogreen_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + ppq->shaders[n][1] = + pp_tgsi_to_state(ppq->p->pipe, nogreen, false, "nogreen"); +} + + +void +pp_noblue_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + ppq->shaders[n][1] = + pp_tgsi_to_state(ppq->p->pipe, noblue, false, "noblue"); +} diff --git a/src/gallium/auxiliary/postprocess/pp_colors.h b/src/gallium/auxiliary/postprocess/pp_colors.h new file mode 100644 index 00000000000..588cd2f0c52 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_colors.h @@ -0,0 +1,69 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PP_COLORS_H +#define PP_COLORS_H + +static const char nored[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0]\n" + "IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n" + " 1: MOV TEMP[0].x, IMM[0].xxxx\n" + " 2: MOV OUT[0], TEMP[0]\n" + " 3: END\n"; + + +static const char nogreen[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0]\n" + "IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n" + " 1: MOV TEMP[0].y, IMM[0].xxxx\n" + " 2: MOV OUT[0], TEMP[0]\n" + " 3: END\n"; + + +static const char noblue[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0]\n" + "IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n" + " 1: MOV TEMP[0].z, IMM[0].xxxx\n" + " 2: MOV OUT[0], TEMP[0]\n" + " 3: END\n"; + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_filters.h b/src/gallium/auxiliary/postprocess/pp_filters.h new file mode 100644 index 00000000000..0e34bb6d20f --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_filters.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PP_FILTERS_H +#define PP_FILTERS_H + +/* Internal include, mainly for the filters */ + +#include "cso_cache/cso_context.h" +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_text.h" +#include "util/u_memory.h" +#include "util/u_draw_quad.h" + +#define PP_MAX_TOKENS 2048 + + +/* Helper functions for the filters */ + +void pp_filter_setup_in(struct program *, struct pipe_resource *); +void pp_filter_setup_out(struct program *, struct pipe_resource *); +void pp_filter_end_pass(struct program *); +void *pp_tgsi_to_state(struct pipe_context *, const char *, bool, + const char *); +void pp_filter_misc_state(struct program *); +void pp_filter_draw(struct program *); +void pp_filter_set_fb(struct program *); +void pp_filter_set_clear_fb(struct program *); + + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_init.c b/src/gallium/auxiliary/postprocess/pp_init.c new file mode 100644 index 00000000000..75417999b7e --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_init.c @@ -0,0 +1,283 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> + +#include "postprocess/filters.h" + +#include "pipe/p_screen.h" +#include "util/u_inlines.h" +#include "util/u_blit.h" +#include "util/u_math.h" +#include "cso_cache/cso_context.h" + +/** Initialize the post-processing queue. */ +struct pp_queue_t * +pp_init(struct pipe_screen *pscreen, const unsigned int *enabled) +{ + + unsigned int curpos = 0, i, tmp_req = 0; + struct pp_queue_t *ppq; + pp_func *tmp_q; + + pp_debug("Initializing the post-processing queue.\n"); + + /* How many filters were requested? */ + for (i = 0; i < PP_FILTERS; i++) { + if (enabled[i]) + curpos++; + } + if (!curpos) + return NULL; + + ppq = calloc(1, sizeof(struct pp_queue_t)); + tmp_q = calloc(curpos, sizeof(pp_func)); + ppq->shaders = calloc(curpos, sizeof(void *)); + ppq->verts = calloc(curpos, sizeof(unsigned int)); + + if (!tmp_q || !ppq || !ppq->shaders || !ppq->verts) + goto error; + + ppq->p = pp_init_prog(ppq, pscreen); + if (!ppq->p) + goto error; + + /* Add the enabled filters to the queue, in order */ + curpos = 0; + ppq->pp_queue = tmp_q; + for (i = 0; i < PP_FILTERS; i++) { + if (enabled[i]) { + ppq->pp_queue[curpos] = pp_filters[i].main; + tmp_req = MAX2(tmp_req, pp_filters[i].inner_tmps); + + if (pp_filters[i].shaders) { + ppq->shaders[curpos] = + calloc(pp_filters[i].shaders + 1, sizeof(void *)); + ppq->verts[curpos] = pp_filters[i].verts; + if (!ppq->shaders[curpos]) + goto error; + } + pp_filters[i].init(ppq, curpos, enabled[i]); + + curpos++; + } + } + + ppq->p->blitctx = util_create_blit(ppq->p->pipe, ppq->p->cso); + if (!ppq->p->blitctx) + goto error; + + ppq->n_filters = curpos; + ppq->n_tmp = (curpos > 2 ? 2 : 1); + ppq->n_inner_tmp = tmp_req; + + ppq->fbos_init = false; + + for (i = 0; i < curpos; i++) + ppq->shaders[i][0] = ppq->p->passvs; + + pp_debug("Queue successfully allocated. %u filter(s).\n", curpos); + + return ppq; + + error: + pp_debug("Error setting up pp\n"); + + if (ppq) + free(ppq->p); + free(ppq); + free(tmp_q); + + return NULL; +} + +/** Free any allocated FBOs (temp buffers). Called after resizing for example. */ +void +pp_free_fbos(struct pp_queue_t *ppq) +{ + + unsigned int i; + + if (!ppq->fbos_init) + return; + + for (i = 0; i < ppq->n_tmp; i++) { + pipe_surface_reference(&ppq->tmps[i], NULL); + pipe_resource_reference(&ppq->tmp[i], NULL); + } + for (i = 0; i < ppq->n_inner_tmp; i++) { + pipe_surface_reference(&ppq->inner_tmps[i], NULL); + pipe_resource_reference(&ppq->inner_tmp[i], NULL); + } + pipe_surface_reference(&ppq->stencils, NULL); + pipe_resource_reference(&ppq->stencil, NULL); + + ppq->fbos_init = false; +} + +/** Free the pp queue. Called on context termination. */ +void +pp_free(struct pp_queue_t *ppq) +{ + + unsigned int i, j; + + pp_free_fbos(ppq); + + util_destroy_blit(ppq->p->blitctx); + + cso_set_fragment_sampler_views(ppq->p->cso, 0, NULL); + cso_release_all(ppq->p->cso); + + for (i = 0; i < ppq->n_filters; i++) { + for (j = 0; j < PP_MAX_PASSES && ppq->shaders[i][j]; j++) { + if (j >= ppq->verts[i]) { + ppq->p->pipe->delete_fs_state(ppq->p->pipe, ppq->shaders[i][j]); + ppq->shaders[i][j] = NULL; + } + else if (ppq->shaders[i][j] != ppq->p->passvs) { + ppq->p->pipe->delete_vs_state(ppq->p->pipe, ppq->shaders[i][j]); + ppq->shaders[i][j] = NULL; + } + } + } + + cso_destroy_context(ppq->p->cso); + ppq->p->pipe->destroy(ppq->p->pipe); + + free(ppq->p); + free(ppq->pp_queue); + free(ppq); + + pp_debug("Queue taken down.\n"); +} + +/** Internal debug function. Should be available to final users. */ +void +pp_debug(const char *fmt, ...) +{ + va_list ap; + + if (!getenv("PP_DEBUG")) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +/** Allocate the temp FBOs. Called on makecurrent and resize. */ +void +pp_init_fbos(struct pp_queue_t *ppq, const unsigned int w, + const unsigned int h, struct pipe_resource *indepth) +{ + + struct program *p = ppq->p; /* The lazy will inherit the earth */ + + unsigned int i; + struct pipe_resource tmp_res; + + if (ppq->fbos_init) + return; + + pp_debug("Initializing FBOs, size %ux%u\n", w, h); + pp_debug("Requesting %u temps and %u inner temps\n", ppq->n_tmp, + ppq->n_inner_tmp); + + memset(&tmp_res, 0, sizeof(tmp_res)); + tmp_res.target = PIPE_TEXTURE_2D; + tmp_res.format = p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM; + tmp_res.width0 = w; + tmp_res.height0 = h; + tmp_res.depth0 = 1; + tmp_res.array_size = 1; + tmp_res.last_level = 0; + tmp_res.bind = p->surf.usage = PIPE_BIND_RENDER_TARGET; + + if (!p->screen->is_format_supported(p->screen, tmp_res.format, + tmp_res.target, 1, tmp_res.bind)) + pp_debug("Temp buffers' format fail\n"); + + for (i = 0; i < ppq->n_tmp; i++) { + ppq->tmp[i] = p->screen->resource_create(p->screen, &tmp_res); + ppq->tmps[i] = p->pipe->create_surface(p->pipe, ppq->tmp[i], &p->surf); + + if (!ppq->tmp[i] || !ppq->tmps[i]) + goto error; + } + + for (i = 0; i < ppq->n_inner_tmp; i++) { + ppq->inner_tmp[i] = p->screen->resource_create(p->screen, &tmp_res); + ppq->inner_tmps[i] = p->pipe->create_surface(p->pipe, + ppq->inner_tmp[i], + &p->surf); + + if (!ppq->inner_tmp[i] || !ppq->inner_tmps[i]) + goto error; + } + + tmp_res.format = p->surf.format = indepth->format; + tmp_res.bind = p->surf.usage = PIPE_BIND_DEPTH_STENCIL; + ppq->depth = indepth; + if (!ppq->depth) + goto error; + + tmp_res.format = p->surf.format = PIPE_FORMAT_S8_USCALED_Z24_UNORM; + + if (!p->screen->is_format_supported(p->screen, tmp_res.format, + tmp_res.target, 1, tmp_res.bind)) { + + tmp_res.format = p->surf.format = PIPE_FORMAT_Z24_UNORM_S8_USCALED; + + if (!p->screen->is_format_supported(p->screen, tmp_res.format, + tmp_res.target, 1, tmp_res.bind)) + pp_debug("Temp Sbuffer format fail\n"); + } + + ppq->stencil = p->screen->resource_create(p->screen, &tmp_res); + ppq->stencils = p->pipe->create_surface(p->pipe, ppq->stencil, &p->surf); + if (!ppq->stencil || !ppq->stencils) + goto error; + + + p->framebuffer.width = w; + p->framebuffer.height = h; + + p->viewport.scale[0] = p->viewport.translate[0] = (float) w / 2.0; + p->viewport.scale[1] = p->viewport.translate[1] = (float) h / 2.0; + p->viewport.scale[3] = 1.0f; + p->viewport.translate[3] = 0.0f; + + ppq->fbos_init = true; + + return; + + error: + pp_debug("Failed to allocate temp buffers!\n"); +} diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.c b/src/gallium/auxiliary/postprocess/pp_mlaa.c new file mode 100644 index 00000000000..476502fca93 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_mlaa.c @@ -0,0 +1,304 @@ +/** + * Copyright (C) 2010 Jorge Jimenez ([email protected]) + * Copyright (C) 2010 Belen Masia ([email protected]) + * Copyright (C) 2010 Jose I. Echevarria ([email protected]) + * Copyright (C) 2010 Fernando Navarro ([email protected]) + * Copyright (C) 2010 Diego Gutierrez ([email protected]) + * Copyright (C) 2011 Lauri Kasanen ([email protected]) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the following statement: + * + * "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia, + * Jose I. Echevarria, Fernando Navarro and Diego Gutierrez." + * + * Only for use in the Mesa project, this point 2 is filled by naming the + * technique Jimenez's MLAA in the Mesa config options. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of the copyright holders. + */ + +#include <stdio.h> +#include <string.h> +#include "postprocess/postprocess.h" +#include "postprocess/pp_mlaa.h" +#include "postprocess/pp_filters.h" +#include "util/u_blit.h" +#include "util/u_box.h" +#include "util/u_sampler.h" +#include "util/u_inlines.h" +#include "pipe/p_screen.h" + +#define IMM_SPACE 80 + +static float constants[] = { 1, 1, 0, 0 }; +static unsigned int dimensions[2] = { 0, 0 }; + +static struct pipe_resource *constbuf, *areamaptex; + +/** Upload the constants. */ +static void +up_consts(struct pipe_context *pipe) +{ + struct pipe_box box; + + u_box_2d(0, 0, sizeof(constants), 1, &box); + pipe->transfer_inline_write(pipe, constbuf, 0, PIPE_TRANSFER_WRITE, + &box, constants, sizeof(constants), + sizeof(constants)); +} + +/** Run function of the MLAA filter. */ +static void +pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, unsigned int n, bool iscolor) +{ + + struct program *p = ppq->p; + + struct pipe_depth_stencil_alpha_state mstencil; + struct pipe_sampler_view v_tmp, *arr[3]; + + unsigned int w = p->framebuffer.width; + unsigned int h = p->framebuffer.height; + + const struct pipe_stencil_ref ref = { {1} }; + memset(&mstencil, 0, sizeof(mstencil)); + cso_set_stencil_ref(p->cso, &ref); + + /* Init the pixel size constant */ + if (dimensions[0] != p->framebuffer.width || + dimensions[1] != p->framebuffer.height) { + constants[0] = 1.0 / p->framebuffer.width; + constants[1] = 1.0 / p->framebuffer.height; + + up_consts(p->pipe); + dimensions[0] = p->framebuffer.width; + dimensions[1] = p->framebuffer.height; + } + + p->pipe->set_constant_buffer(p->pipe, PIPE_SHADER_VERTEX, 0, constbuf); + p->pipe->set_constant_buffer(p->pipe, PIPE_SHADER_FRAGMENT, 0, constbuf); + + mstencil.stencil[0].enabled = 1; + mstencil.stencil[0].valuemask = mstencil.stencil[0].writemask = ~0; + mstencil.stencil[0].func = PIPE_FUNC_ALWAYS; + mstencil.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; + mstencil.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; + mstencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + + p->framebuffer.zsbuf = ppq->stencils; + + /* First pass: depth edge detection */ + if (iscolor) + pp_filter_setup_in(p, in); + else + pp_filter_setup_in(p, ppq->depth); + + pp_filter_setup_out(p, ppq->inner_tmp[0]); + + pp_filter_set_fb(p); + pp_filter_misc_state(p); + cso_set_depth_stencil_alpha(p->cso, &mstencil); + p->pipe->clear(p->pipe, PIPE_CLEAR_STENCIL | PIPE_CLEAR_COLOR, + p->clear_color, 0, 0); + + cso_single_sampler(p->cso, 0, &p->sampler_point); + cso_single_sampler_done(p->cso); + cso_set_fragment_sampler_views(p->cso, 1, &p->view); + + cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]); /* offsetvs */ + cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][2]); + + pp_filter_draw(p); + pp_filter_end_pass(p); + + + /* Second pass: blend weights */ + /* Sampler order: areamap, edgesmap, edgesmapL (reversed, thx compiler) */ + mstencil.stencil[0].func = PIPE_FUNC_EQUAL; + mstencil.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP; + cso_set_depth_stencil_alpha(p->cso, &mstencil); + + pp_filter_setup_in(p, areamaptex); + pp_filter_setup_out(p, ppq->inner_tmp[1]); + + u_sampler_view_default_template(&v_tmp, ppq->inner_tmp[0], + ppq->inner_tmp[0]->format); + arr[1] = arr[2] = p->pipe->create_sampler_view(p->pipe, + ppq->inner_tmp[0], &v_tmp); + + pp_filter_set_clear_fb(p); + + cso_single_sampler(p->cso, 0, &p->sampler_point); + cso_single_sampler(p->cso, 1, &p->sampler_point); + cso_single_sampler(p->cso, 2, &p->sampler); + cso_single_sampler_done(p->cso); + + arr[0] = p->view; + cso_set_fragment_sampler_views(p->cso, 3, arr); + + cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]); /* passvs */ + cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][3]); + + pp_filter_draw(p); + pp_filter_end_pass(p); + pipe_sampler_view_reference(&arr[1], NULL); + + + /* Third pass: smoothed edges */ + /* Sampler order: colormap, blendmap (wtf compiler) */ + pp_filter_setup_in(p, ppq->inner_tmp[1]); + pp_filter_setup_out(p, out); + + pp_filter_set_fb(p); + + /* Blit the input to the output */ + util_blit_pixels(p->blitctx, in, 0, 0, 0, + w, h, 0, p->framebuffer.cbufs[0], + 0, 0, w, h, 0, PIPE_TEX_MIPFILTER_NEAREST); + + u_sampler_view_default_template(&v_tmp, in, in->format); + arr[0] = p->pipe->create_sampler_view(p->pipe, in, &v_tmp); + + cso_single_sampler(p->cso, 0, &p->sampler_point); + cso_single_sampler(p->cso, 1, &p->sampler_point); + cso_single_sampler_done(p->cso); + + arr[1] = p->view; + cso_set_fragment_sampler_views(p->cso, 2, arr); + + cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]); /* offsetvs */ + cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][4]); + + p->blend.rt[0].blend_enable = 1; + cso_set_blend(p->cso, &p->blend); + + pp_filter_draw(p); + pp_filter_end_pass(p); + pipe_sampler_view_reference(&arr[0], NULL); + + p->blend.rt[0].blend_enable = 0; + p->framebuffer.zsbuf = NULL; +} + +/** The init function of the MLAA filter. */ +static void +pp_jimenezmlaa_init_run(struct pp_queue_t *ppq, unsigned int n, + unsigned int val, bool iscolor) +{ + + struct pipe_box box; + struct pipe_resource res; + + char *tmp_text = calloc(sizeof(blend2fs_1) + sizeof(blend2fs_2) + + IMM_SPACE, sizeof(char)); + + constbuf = pipe_buffer_create(ppq->p->screen, PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STATIC, sizeof(constants)); + if (!constbuf) { + pp_debug("Failed to allocate constant buffer\n"); + return; + } + + + pp_debug("mlaa: using %u max search steps\n", val); + + if (!tmp_text) { + pp_debug("Failed to allocate shader space\n"); + return; + } + sprintf(tmp_text, "%s" + "IMM FLT32 { %.8f, 0.0000, 0.0000, 0.0000}\n" + "%s\n", blend2fs_1, (float) val, blend2fs_2); + + memset(&res, 0, sizeof(res)); + + res.target = PIPE_TEXTURE_2D; + res.format = PIPE_FORMAT_R8G8_UNORM; + res.width0 = res.height0 = 165; + res.bind = PIPE_BIND_SAMPLER_VIEW; + res.usage = PIPE_USAGE_STATIC; + res.depth0 = res.array_size = res.nr_samples = 1; + + if (!ppq->p->screen->is_format_supported(ppq->p->screen, res.format, + res.target, 1, res.bind)) + pp_debug("Areamap format not supported\n"); + + areamaptex = ppq->p->screen->resource_create(ppq->p->screen, &res); + u_box_2d(0, 0, 165, 165, &box); + + ppq->p->pipe->transfer_inline_write(ppq->p->pipe, areamaptex, 0, + PIPE_TRANSFER_WRITE, &box, + areamap, 165 * 2, sizeof(areamap)); + + + + ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, offsetvs, true, + "offsetvs"); + if (iscolor) + ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, color1fs, + false, "color1fs"); + else + ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, depth1fs, + false, "depth1fs"); + ppq->shaders[n][3] = pp_tgsi_to_state(ppq->p->pipe, tmp_text, false, + "blend2fs"); + ppq->shaders[n][4] = pp_tgsi_to_state(ppq->p->pipe, neigh3fs, false, + "neigh3fs"); + + free(tmp_text); +} + +/** Short wrapper to init the depth version. */ +void +pp_jimenezmlaa_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + + pp_jimenezmlaa_init_run(ppq, n, val, false); +} + +/** Short wrapper to init the color version. */ +void +pp_jimenezmlaa_init_color(struct pp_queue_t *ppq, unsigned int n, + unsigned int val) +{ + + pp_jimenezmlaa_init_run(ppq, n, val, true); +} + +/** Short wrapper to run the depth version. */ +void +pp_jimenezmlaa(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, unsigned int n) +{ + pp_jimenezmlaa_run(ppq, in, out, n, false); +} + +/** Short wrapper to run the color version. */ +void +pp_jimenezmlaa_color(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, unsigned int n) +{ + pp_jimenezmlaa_run(ppq, in, out, n, true); +} diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.h b/src/gallium/auxiliary/postprocess/pp_mlaa.h new file mode 100644 index 00000000000..9972d59c6a6 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_mlaa.h @@ -0,0 +1,342 @@ +/** + * Copyright (C) 2010 Jorge Jimenez ([email protected]) + * Copyright (C) 2010 Belen Masia ([email protected]) + * Copyright (C) 2010 Jose I. Echevarria ([email protected]) + * Copyright (C) 2010 Fernando Navarro ([email protected]) + * Copyright (C) 2010 Diego Gutierrez ([email protected]) + * Copyright (C) 2011 Lauri Kasanen ([email protected]) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the following statement: + * + * "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia, + * Jose I. Echevarria, Fernando Navarro and Diego Gutierrez." + * + * Only for use in the Mesa project, this point 2 is filled by naming the + * technique Jimenez's MLAA in the Mesa config options. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of the copyright holders. + */ + +#ifndef PP_MLAA_H +#define PP_MLAA_H + +#include "postprocess/pp_mlaa_areamap.h" + +static const char depth1fs[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL IN[1], GENERIC[10], PERSPECTIVE\n" + "DCL IN[2], GENERIC[11], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0..2]\n" + "IMM FLT32 { 0.0030, 0.0000, 1.0000, 0.0000}\n" + " 0: TEX TEMP[0].x, IN[1].xyyy, SAMP[0], 2D\n" + " 1: MOV TEMP[1].x, TEMP[0].xxxx\n" + " 2: TEX TEMP[0].x, IN[1].zwww, SAMP[0], 2D\n" + " 3: MOV TEMP[1].y, TEMP[0].xxxx\n" + " 4: TEX TEMP[0].x, IN[2].xyyy, SAMP[0], 2D\n" + " 5: MOV TEMP[1].z, TEMP[0].xxxx\n" + " 6: TEX TEMP[0].x, IN[2].zwww, SAMP[0], 2D\n" + " 7: MOV TEMP[1].w, TEMP[0].xxxx\n" + " 8: TEX TEMP[0].x, IN[0].xyyy, SAMP[0], 2D\n" + " 9: ADD TEMP[2], TEMP[0].xxxx, -TEMP[1]\n" + " 10: ABS TEMP[0], TEMP[2]\n" + " 11: SGE TEMP[2], TEMP[0], IMM[0].xxxx\n" + " 12: DP4 TEMP[0].x, TEMP[2], IMM[0].zzzz\n" + " 13: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy\n" + " 14: IF TEMP[1].xxxx :16\n" + " 15: KILP\n" + " 16: ENDIF\n" + " 17: MOV OUT[0], TEMP[2]\n" + " 18: END\n"; + + +static const char color1fs[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL IN[1], GENERIC[10], PERSPECTIVE\n" + "DCL IN[2], GENERIC[11], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0..2]\n" + "IMM FLT32 { 0.2126, 0.7152, 0.0722, 0.1000}\n" + "IMM FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}\n" + " 0: TEX TEMP[1].xyz, IN[1].xyyy, SAMP[0], 2D\n" + " 1: DP3 TEMP[0].x, TEMP[1].xyzz, IMM[0]\n" + " 2: TEX TEMP[1].xyz, IN[1].zwww, SAMP[0], 2D\n" + " 3: DP3 TEMP[0].y, TEMP[1].xyzz, IMM[0].xyzz\n" + " 4: TEX TEMP[1].xyz, IN[2].xyyy, SAMP[0], 2D\n" + " 5: DP3 TEMP[0].z, TEMP[1].xyzz, IMM[0].xyzz\n" + " 6: TEX TEMP[1].xyz, IN[2].zwww, SAMP[0], 2D\n" + " 7: DP3 TEMP[0].w, TEMP[1].xyzz, IMM[0].xyzz\n" + " 8: TEX TEMP[1].xyz, IN[0].xyyy, SAMP[0], 2D\n" + " 9: DP3 TEMP[2].x, TEMP[1].xyzz, IMM[0].xyzz\n" + " 10: ADD TEMP[1], TEMP[2].xxxx, -TEMP[0]\n" + " 11: ABS TEMP[0], TEMP[1]\n" + " 12: SGE TEMP[2], TEMP[0], IMM[0].wwww\n" + " 13: DP4 TEMP[0].x, TEMP[2], IMM[1].xxxx\n" + " 14: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[1].yyyy\n" + " 15: IF TEMP[1].xxxx :17\n" + " 16: KILP\n" + " 17: ENDIF\n" + " 18: MOV OUT[0], TEMP[2]\n" + " 19: END\n"; + + +static const char neigh3fs[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL IN[1], GENERIC[10], PERSPECTIVE\n" + "DCL IN[2], GENERIC[11], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL SAMP[1]\n" + "DCL TEMP[0..8]\n" + "IMM FLT32 { 1.0000, 0.00001, 0.0000, 0.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[1], 2D\n" + " 1: MOV TEMP[1].x, TEMP[0].xxxx\n" + " 2: TEX TEMP[2].y, IN[2].zwww, SAMP[1], 2D\n" + " 3: MOV TEMP[1].y, TEMP[2].yyyy\n" + " 4: MOV TEMP[1].z, TEMP[0].zzzz\n" + " 5: TEX TEMP[1].w, IN[2].xyyy, SAMP[1], 2D\n" + " 6: MUL TEMP[4], TEMP[1], TEMP[1]\n" + " 7: MUL TEMP[5], TEMP[4], TEMP[1]\n" + " 8: DP4 TEMP[1].x, TEMP[5], IMM[0].xxxx\n" + " 9: SLT TEMP[4].x, TEMP[1].xxxx, IMM[0].yyyy\n" + " 10: IF TEMP[4].xxxx :12\n" + " 11: KILP\n" + " 12: ENDIF\n" + " 13: TEX TEMP[4], IN[0].xyyy, SAMP[0], 2D\n" + " 14: TEX TEMP[6], IN[1].zwww, SAMP[0], 2D\n" + " 15: ADD TEMP[7].x, IMM[0].xxxx, -TEMP[0].xxxx\n" + " 16: MUL TEMP[8], TEMP[4], TEMP[7].xxxx\n" + " 17: MAD TEMP[7], TEMP[6], TEMP[0].xxxx, TEMP[8]\n" + " 18: MUL TEMP[6], TEMP[7], TEMP[5].xxxx\n" + " 19: TEX TEMP[7], IN[2].zwww, SAMP[0], 2D\n" + " 20: ADD TEMP[8].x, IMM[0].xxxx, -TEMP[2].yyyy\n" + " 21: MUL TEMP[3], TEMP[4], TEMP[8].xxxx\n" + " 22: MAD TEMP[8], TEMP[7], TEMP[2].yyyy, TEMP[3]\n" + " 23: MAD TEMP[2], TEMP[8], TEMP[5].yyyy, TEMP[6]\n" + " 24: TEX TEMP[6], IN[1].xyyy, SAMP[0], 2D\n" + " 25: ADD TEMP[7].x, IMM[0].xxxx, -TEMP[0].zzzz\n" + " 26: MUL TEMP[8], TEMP[4], TEMP[7].xxxx\n" + " 27: MAD TEMP[7], TEMP[6], TEMP[0].zzzz, TEMP[8]\n" + " 28: MAD TEMP[0], TEMP[7], TEMP[5].zzzz, TEMP[2]\n" + " 29: TEX TEMP[2], IN[2].xyyy, SAMP[0], 2D\n" + " 30: ADD TEMP[6].x, IMM[0].xxxx, -TEMP[1].wwww\n" + " 31: MUL TEMP[7], TEMP[4], TEMP[6].xxxx\n" + " 32: MAD TEMP[4], TEMP[2], TEMP[1].wwww, TEMP[7]\n" + " 33: MAD TEMP[2], TEMP[4], TEMP[5].wwww, TEMP[0]\n" + " 34: RCP TEMP[0].x, TEMP[1].xxxx\n" + " 35: MUL OUT[0], TEMP[2], TEMP[0].xxxx\n" + " 36: END\n"; + + +static const char offsetvs[] = "VERT\n" + "DCL IN[0]\n" + "DCL IN[1]\n" + "DCL OUT[0], POSITION\n" + "DCL OUT[1], GENERIC[0]\n" + "DCL OUT[2], GENERIC[10]\n" + "DCL OUT[3], GENERIC[11]\n" + "DCL CONST[0]\n" + "IMM FLT32 { 1.0000, 0.0000, -1.0000, 0.0000}\n" + " 0: MOV OUT[0], IN[0]\n" + " 1: MOV OUT[1], IN[1]\n" + " 2: MAD OUT[2], CONST[0].xyxy, IMM[0].zyyz, IN[1].xyxy\n" + " 3: MAD OUT[3], CONST[0].xyxy, IMM[0].xyyx, IN[1].xyxy\n" + " 4: END\n"; + + +static const char blend2fs_1[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL SAMP[1]\n" + "DCL SAMP[2]\n" + "DCL CONST[0]\n" + "DCL TEMP[0..6]\n" + "IMM FLT32 { 0.0000, -0.2500, 0.00609756, 0.5000}\n" + "IMM FLT32 { -1.5000, -2.0000, 0.9000, 1.5000}\n" + "IMM FLT32 { 2.0000, 1.0000, 4.0000, 33.0000}\n"; + +static const char blend2fs_2[] = + " 0: MOV TEMP[0], IMM[0].xxxx\n" + " 1: TEX TEMP[1], IN[0].xyyy, SAMP[1], 2D\n" + " 2: MOV TEMP[2].x, TEMP[1]\n" + " 3: SNE TEMP[3].x, TEMP[1].yyyy, IMM[0].xxxx\n" + " 4: IF TEMP[3].xxxx :76\n" + " 5: MOV TEMP[1].xy, IN[0].xyxx\n" + " 6: MOV TEMP[4].x, IMM[1].xxxx\n" + " 7: BGNLOOP :24\n" + " 8: MUL TEMP[5].x, IMM[1].yyyy, IMM[3].xxxx\n" + " 9: SLE TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx\n" + " 10: IF TEMP[6].xxxx :12\n" + " 11: BRK\n" + " 12: ENDIF\n" + " 13: MOV TEMP[4].y, IMM[0].xxxx\n" + " 14: MAD TEMP[3].xyz, CONST[0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n" + " 15: MOV TEMP[3].w, IMM[0].xxxx\n" + " 16: TXL TEMP[5], TEMP[3], SAMP[2], 2D\n" + " 17: MOV TEMP[3].x, TEMP[5].yyyy\n" + " 18: SLT TEMP[6].x, TEMP[5].yyyy, IMM[1].zzzz\n" + " 19: IF TEMP[6].xxxx :21\n" + " 20: BRK\n" + " 21: ENDIF\n" + " 22: ADD TEMP[6].x, TEMP[4].xxxx, IMM[1].yyyy\n" + " 23: MOV TEMP[4].x, TEMP[6].xxxx\n" + " 24: ENDLOOP :7\n" + " 25: ADD TEMP[1].x, TEMP[4].xxxx, IMM[1].wwww\n" + " 26: MAD TEMP[6].x, -IMM[2].xxxx, TEMP[3].xxxx, TEMP[1].xxxx\n" + " 27: MUL TEMP[1].x, IMM[1].yyyy, IMM[3].xxxx\n" + " 28: MAX TEMP[4].x, TEMP[6].xxxx, TEMP[1].xxxx\n" + " 29: MOV TEMP[1].x, TEMP[4].xxxx\n" + " 30: MOV TEMP[3].xy, IN[0].xyxx\n" + " 31: MOV TEMP[5].x, IMM[1].wwww\n" + " 32: BGNLOOP :49\n" + " 33: MUL TEMP[6].x, IMM[2].xxxx, IMM[3].xxxx\n" + " 34: SGE TEMP[4].x, TEMP[5].xxxx, TEMP[6].xxxx\n" + " 35: IF TEMP[4].xxxx :37\n" + " 36: BRK\n" + " 37: ENDIF\n" + " 38: MOV TEMP[5].y, IMM[0].xxxx\n" + " 39: MAD TEMP[4].xyz, CONST[0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n" + " 40: MOV TEMP[4].w, IMM[0].xxxx\n" + " 41: TXL TEMP[6].xy, TEMP[4], SAMP[2], 2D\n" + " 42: MOV TEMP[4].x, TEMP[6].yyyy\n" + " 43: SLT TEMP[0].x, TEMP[6].yyyy, IMM[1].zzzz\n" + " 44: IF TEMP[0].xxxx :46\n" + " 45: BRK\n" + " 46: ENDIF\n" + " 47: ADD TEMP[6].x, TEMP[5].xxxx, IMM[2].xxxx\n" + " 48: MOV TEMP[5].x, TEMP[6].xxxx\n" + " 49: ENDLOOP :32\n" + " 50: ADD TEMP[3].x, TEMP[5].xxxx, IMM[1].xxxx\n" + " 51: MAD TEMP[5].x, IMM[2].xxxx, TEMP[4].xxxx, TEMP[3].xxxx\n" + " 52: MUL TEMP[3].x, IMM[2].xxxx, IMM[3].xxxx\n" + " 53: MIN TEMP[4].x, TEMP[5].xxxx, TEMP[3].xxxx\n" + " 54: MOV TEMP[3].x, TEMP[1].xxxx\n" + " 55: MOV TEMP[3].y, TEMP[4].xxxx\n" + " 56: MOV TEMP[5].yw, IMM[0].yyyy\n" + " 57: MOV TEMP[5].x, TEMP[1].xxxx\n" + " 58: ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].yyyy\n" + " 59: MOV TEMP[5].z, TEMP[1].xxxx\n" + " 60: MAD TEMP[1], TEMP[5], CONST[0].xyxy, IN[0].xyxy\n" + " 61: MOV TEMP[4], TEMP[1].xyyy\n" + " 62: MOV TEMP[4].w, IMM[0].xxxx\n" + " 63: TXL TEMP[5].x, TEMP[4], SAMP[2], 2D\n" + " 64: MOV TEMP[4].x, TEMP[5].xxxx\n" + " 65: MOV TEMP[5], TEMP[1].zwww\n" + " 66: MOV TEMP[5].w, IMM[0].xxxx\n" + " 67: TXL TEMP[1].x, TEMP[5], SAMP[2], 2D\n" + " 68: MOV TEMP[4].y, TEMP[1].xxxx\n" + " 69: MUL TEMP[5].xy, IMM[2].zzzz, TEMP[4].xyyy\n" + " 70: ROUND TEMP[1].xy, TEMP[5].xyyy\n" + " 71: ABS TEMP[4].xy, TEMP[3].xyyy\n" + " 72: MAD TEMP[3].xy, IMM[2].wwww, TEMP[1].xyyy, TEMP[4].xyyy\n" + " 73: MUL TEMP[5].xyz, TEMP[3].xyyy, IMM[0].zzzz\n" + " 74: MOV TEMP[5].w, IMM[0].xxxx\n" + " 75: TXL TEMP[0].xy, TEMP[5], SAMP[0], 2D\n" + " 76: ENDIF\n" + " 77: SNE TEMP[1].x, TEMP[2].xxxx, IMM[0].xxxx\n" + " 78: IF TEMP[1].xxxx :151\n" + " 79: MOV TEMP[1].xy, IN[0].xyxx\n" + " 80: MOV TEMP[3].x, IMM[1].xxxx\n" + " 81: BGNLOOP :98\n" + " 82: MUL TEMP[4].x, IMM[1].yyyy, IMM[3].xxxx\n" + " 83: SLE TEMP[5].x, TEMP[3].xxxx, TEMP[4].xxxx\n" + " 84: IF TEMP[5].xxxx :86\n" + " 85: BRK\n" + " 86: ENDIF\n" + " 87: MOV TEMP[3].y, IMM[0].xxxx\n" + " 88: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n" + " 89: MOV TEMP[5].w, IMM[0].xxxx\n" + " 90: TXL TEMP[4], TEMP[5], SAMP[2], 2D\n" + " 91: MOV TEMP[2].x, TEMP[4].xxxx\n" + " 92: SLT TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz\n" + " 93: IF TEMP[5].xxxx :95\n" + " 94: BRK\n" + " 95: ENDIF\n" + " 96: ADD TEMP[4].x, TEMP[3].xxxx, IMM[1].yyyy\n" + " 97: MOV TEMP[3].x, TEMP[4].xxxx\n" + " 98: ENDLOOP :81\n" + " 99: ADD TEMP[1].x, TEMP[3].xxxx, IMM[1].wwww\n" + "100: MAD TEMP[6].x, -IMM[2].xxxx, TEMP[2].xxxx, TEMP[1].xxxx\n" + "101: MUL TEMP[1].x, IMM[1].yyyy, IMM[3].xxxx\n" + "102: MAX TEMP[3].x, TEMP[6].xxxx, TEMP[1].xxxx\n" + "103: MOV TEMP[1].x, TEMP[3].xxxx\n" + "104: MOV TEMP[2].xy, IN[0].xyxx\n" + "105: MOV TEMP[4].x, IMM[1].wwww\n" + "106: BGNLOOP :123\n" + "107: MUL TEMP[5].x, IMM[2].xxxx, IMM[3].xxxx\n" + "108: SGE TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx\n" + "109: IF TEMP[6].xxxx :111\n" + "110: BRK\n" + "111: ENDIF\n" + "112: MOV TEMP[4].y, IMM[0].xxxx\n" + "113: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n" + "114: MOV TEMP[5].w, IMM[0].xxxx\n" + "115: TXL TEMP[6], TEMP[5], SAMP[2], 2D\n" + "116: MOV TEMP[3].x, TEMP[6].xxxx\n" + "117: SLT TEMP[5].x, TEMP[6].xxxx, IMM[1].zzzz\n" + "118: IF TEMP[5].xxxx :120\n" + "119: BRK\n" + "120: ENDIF\n" + "121: ADD TEMP[6].x, TEMP[4].xxxx, IMM[2].xxxx\n" + "122: MOV TEMP[4].x, TEMP[6].xxxx\n" + "123: ENDLOOP :106\n" + "124: ADD TEMP[2].x, TEMP[4].xxxx, IMM[1].xxxx\n" + "125: MAD TEMP[4].x, IMM[2].xxxx, TEMP[3].xxxx, TEMP[2].xxxx\n" + "126: MUL TEMP[2].x, IMM[2].xxxx, IMM[3].xxxx\n" + "127: MIN TEMP[3].x, TEMP[4].xxxx, TEMP[2].xxxx\n" + "128: MOV TEMP[2].x, TEMP[1].xxxx\n" + "129: MOV TEMP[2].y, TEMP[3].xxxx\n" + "130: MOV TEMP[4].xz, IMM[0].yyyy\n" + "131: MOV TEMP[4].y, TEMP[1].xxxx\n" + "132: ADD TEMP[1].x, TEMP[3].xxxx, IMM[2].yyyy\n" + "133: MOV TEMP[4].w, TEMP[1].xxxx\n" + "134: MAD TEMP[1], TEMP[4], CONST[0].xyxy, IN[0].xyxy\n" + "135: MOV TEMP[3], TEMP[1].xyyy\n" + "136: MOV TEMP[3].w, IMM[0].xxxx\n" + "137: TXL TEMP[4].y, TEMP[3], SAMP[2], 2D\n" + "138: MOV TEMP[3].x, TEMP[4].yyyy\n" + "139: MOV TEMP[4], TEMP[1].zwww\n" + "140: MOV TEMP[4].w, IMM[0].xxxx\n" + "141: TXL TEMP[1].y, TEMP[4], SAMP[2], 2D\n" + "142: MOV TEMP[3].y, TEMP[1].yyyy\n" + "143: MUL TEMP[4].xy, IMM[2].zzzz, TEMP[3].xyyy\n" + "144: ROUND TEMP[1].xy, TEMP[4].xyyy\n" + "145: ABS TEMP[3].xy, TEMP[2].xyyy\n" + "146: MAD TEMP[2].xy, IMM[2].wwww, TEMP[1].xyyy, TEMP[3].xyyy\n" + "147: MUL TEMP[3].xyz, TEMP[2].xyyy, IMM[0].zzzz\n" + "148: MOV TEMP[3].w, IMM[0].xxxx\n" + "149: TXL TEMP[1].xy, TEMP[3], SAMP[0], 2D\n" + "150: MOV TEMP[0].zw, TEMP[1].yyxy\n" + "151: ENDIF\n" + "152: MOV OUT[0], TEMP[0]\n" + "153: END\n"; + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h b/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h new file mode 100644 index 00000000000..1446ff2cdf0 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h @@ -0,0 +1,2821 @@ +/** + * Copyright (C) 2010 Jorge Jimenez ([email protected]) + * Copyright (C) 2010 Belen Masia ([email protected]) + * Copyright (C) 2010 Jose I. Echevarria ([email protected]) + * Copyright (C) 2010 Fernando Navarro ([email protected]) + * Copyright (C) 2010 Diego Gutierrez ([email protected]) + * Copyright (C) 2011 Lauri Kasanen ([email protected]) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the following statement: + * + * "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia, + * Jose I. Echevarria, Fernando Navarro and Diego Gutierrez." + * + * Only for use in the Mesa project, this point 2 is filled by naming the + * technique Jimenez's MLAA in the Mesa config options. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of the copyright holders. + */ + +#ifndef PP_MLAA_AREAMAP_H +#define PP_MLAA_AREAMAP_H + +static const unsigned char areamap[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, + 31, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, + 0, 21, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, 0, 21, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, + 63, 0, 36, 0, 15, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, + 0, 47, 0, 28, 0, 12, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, + 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, + 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, + 9, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, + 0, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 0, + 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, + 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, + 23, 0, 15, 0, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, + 0, 21, 0, 13, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0, + 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, + 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, + 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, + 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117, 0, + 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, + 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, + 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, + 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, + 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, + 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, + 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, + 14, 0, 9, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, + 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, + 0, 13, 0, 8, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, + 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, + 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, + 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, + 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, + 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, + 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, + 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, + 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, + 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, + 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, + 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, + 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, + 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, + 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, + 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, + 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, + 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, + 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, + 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, + 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, + 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, + 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, + 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, + 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, + 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, + 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, + 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, + 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, + 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, + 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, + 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, + 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, + 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, + 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, + 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, + 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, + 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, + 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, + 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, + 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, + 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, + 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, + 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, + 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, + 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, + 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, + 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, + 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, + 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, + 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, + 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, + 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, + 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, + 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, + 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, + 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, + 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, + 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, + 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, + 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, + 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, + 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, + 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, + 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, + 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, + 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, + 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, + 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, + 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, + 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, + 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 63, 0, 85, 0, 95, 0, 102, + 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, + 119, + 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, + 122, + 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 62, 0, + 63, + 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, + 116, + 0, 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, + 121, + 0, 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, + 123, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 31, 31, 0, 63, 0, 85, 0, 95, 0, 102, 0, 106, + 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0, + 119, + 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0, + 122, + 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 31, 31, 0, 63, 0, + 85, + 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, + 117, + 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, + 121, + 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, + 123, + 0, 0, 0, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, + 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, + 109, + 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, + 115, + 0, 115, 0, 115, 0, 116, 0, 63, 0, 20, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, + 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, + 0, 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, + 113, + 0, 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, + 10, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, + 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, + 110, + 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0, + 115, + 0, 115, 0, 116, 63, 0, 10, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, + 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, + 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, + 114, + 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 6, 0, 21, 0, 36, + 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, + 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, + 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 85, 0, + 31, + 0, 12, 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, + 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, + 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, + 108, + 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 6, 6, 0, 21, 0, 36, 0, 47, + 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, + 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, + 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 85, 0, 31, 0, 6, 6, + 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, + 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, + 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, + 109, + 0, 0, 0, 0, 0, 0, 0, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, + 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, + 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, + 0, 101, 0, 102, 0, 102, 0, 95, 0, 51, 0, 21, 0, 8, 0, 15, 0, 28, 0, 38, + 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, + 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, + 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, + 51, 0, 21, 0, 4, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, + 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, + 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, + 0, 102, 0, 102, 95, 0, 51, 0, 21, 0, 4, 4, 0, 15, 0, 28, 0, 38, 0, 46, + 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, + 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, + 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, + 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, + 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, + 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 102, 0, 63, + 0, 36, 0, 15, 0, 6, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, + 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, + 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, + 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 3, 3, 0, 12, + 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, + 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, + 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 102, 0, 63, 0, 36, 0, + 15, 0, 3, 3, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, + 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, + 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 10, 0, 19, 0, 27, 0, 34, + 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, + 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, + 0, 88, 0, 89, 0, 90, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 4, 0, 10, + 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, + 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, + 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, + 72, 0, 47, 0, 28, 0, 12, 0, 2, 2, 0, 10, 0, 19, 0, 27, 0, 34, 0, 39, + 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, + 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, + 0, 89, 0, 90, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 2, 0, 10, 0, 19, + 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, + 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, + 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, + 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, + 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 109, 0, 79, + 0, 56, 0, 38, 0, 23, 0, 10, 0, 4, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, + 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, + 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, + 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, + 2, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, + 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, + 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 109, 0, 79, 0, 56, 0, + 38, 0, 23, 0, 10, 0, 2, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, + 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, + 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 15, + 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, + 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, + 0, 77, 0, 78, 0, 79, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, + 0, 4, 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, + 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, + 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, + 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 2, 0, 7, 0, 15, 0, 21, + 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, + 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, + 0, 78, 0, 79, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 2, + 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, + 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, + 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, + 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, + 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 113, 0, 89, + 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 2, 0, 7, 0, 13, 0, 19, + 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, + 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, + 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, + 17, 0, 7, 0, 1, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, + 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, + 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 113, 0, 89, 0, 69, 0, + 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 1, 0, 7, 0, 13, 0, 19, 0, 24, + 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, + 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, + 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, + 0, 66, 0, 68, 0, 69, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, + 0, 15, 0, 7, 0, 2, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, + 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, + 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, + 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 1, 1, 0, 6, + 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, + 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, + 0, 68, 0, 69, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, + 7, 0, 1, 1, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, + 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, + 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 0, 11, 0, 15, 0, 20, + 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, + 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 115, 0, 95, + 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 2, 0, 5, + 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, + 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, + 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, + 30, 0, 21, 0, 13, 0, 6, 0, 1, 1, 0, 5, 0, 11, 0, 15, 0, 20, 0, 24, + 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, + 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 115, 0, 95, 0, 78, 0, + 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 1, 0, 5, 0, 11, + 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, + 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, + 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, + 0, 57, 0, 59, 0, 60, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, + 0, 26, 0, 19, 0, 12, 0, 5, 0, 2, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, + 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, + 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, + 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, + 1, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, + 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, + 0, 59, 0, 60, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, + 19, 0, 12, 0, 5, 0, 1, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, + 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, + 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 9, + 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, + 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 117, 0, 100, + 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, + 0, 2, 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, + 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, + 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, + 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 1, 0, 4, 0, 9, 0, 13, + 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, + 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 117, 0, 100, 0, 85, 0, + 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 1, + 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, + 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, + 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, + 0, 49, 0, 51, 0, 52, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, + 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 2, 0, 4, 0, 8, 0, 12, + 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, + 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, + 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, + 10, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, + 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, + 0, 51, 0, 52, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, + 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 12, 0, 16, + 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, + 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, + 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 119, 0, 103, + 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, + 0, 9, 0, 4, 0, 2, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, + 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, + 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, + 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 1, 1, 0, 4, + 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, + 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 119, 0, 103, 0, 89, 0, + 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, + 4, 0, 1, 1, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, + 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 7, 0, 11, 0, 14, + 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, + 0, 41, 0, 43, 0, 45, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, + 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 2, 0, 3, + 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, + 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, + 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, + 18, 0, 13, 0, 8, 0, 4, 0, 1, 1, 0, 3, 0, 7, 0, 11, 0, 14, 0, 17, + 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, + 0, 43, 0, 45, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, + 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 1, 0, 3, 0, 7, + 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, + 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, + 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 120, 0, 106, + 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, + 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, + 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, + 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, + 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, + 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, + 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 120, 0, 106, 0, 93, 0, + 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, + 12, 0, 8, 0, 3, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, + 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, + 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, + 0, 34, 0, 36, 0, 38, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, + 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, + 0, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, + 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, + 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 10, + 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, + 0, 36, 0, 38, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, + 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, + 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, + 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, + 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 120, 0, 108, + 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, + 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, + 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, + 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, + 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, + 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, + 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 120, 0, 108, 0, 97, 0, + 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, + 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, + 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, + 0, 28, 0, 30, 0, 31, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, + 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, + 0, 6, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, + 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, + 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, + 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, + 0, 30, 0, 31, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, + 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, + 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, + 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, + 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 121, 0, 110, + 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, + 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, + 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, + 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, + 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, + 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, + 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 121, 0, 110, 0, 99, 0, + 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, + 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 5, + 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, + 0, 22, 0, 24, 0, 25, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, + 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, + 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, + 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, + 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, + 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, + 0, 24, 0, 25, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, + 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, + 9, 0, 6, 0, 3, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, + 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, + 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 121, 0, 111, + 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, + 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, + 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, + 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, + 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, + 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, + 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 121, 0, 111, 0, 102, 0, + 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, + 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, + 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, + 0, 16, 0, 18, 0, 20, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, + 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, + 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, + 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, + 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, + 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, + 0, 18, 0, 20, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, + 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, + 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, + 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 122, 0, 112, + 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, + 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, + 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, + 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, + 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, + 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, + 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 122, 0, 112, 0, 103, 0, + 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, + 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, + 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, + 0, 11, 0, 13, 0, 15, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, + 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, + 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, + 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, + 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, + 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, + 0, 13, 0, 15, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, + 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, + 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, + 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 122, 0, 113, + 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, + 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, + 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, + 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, + 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, + 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, + 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 122, 0, 113, 0, 105, 0, + 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, + 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, + 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, + 0, 6, 0, 8, 0, 10, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, + 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, + 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, + 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, + 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, + 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, + 0, 8, 0, 10, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, + 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, + 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 123, 0, 114, + 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, + 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, + 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, + 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, + 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, + 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, + 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 123, 0, 114, 0, 106, 0, + 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, + 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, + 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 4, 0, 6, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, + 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, + 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, + 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, + 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, + 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, + 0, 4, 0, 6, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, + 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, + 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, + 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 123, 0, 115, + 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, + 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, + 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, + 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, + 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, + 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 123, 0, 115, 0, 108, 0, + 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, + 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, + 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, + 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, + 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, + 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, + 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, + 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, + 0, 0, 0, 1, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, + 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, + 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, + 6, 0, 4, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, + 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, + 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, + 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, + 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, + 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, + 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 123, 0, 116, 0, 109, 0, + 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, + 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, + 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 63, 0, 85, 0, 95, 0, 102, 0, + 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, + 119, 0, + 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, + 122, 0, + 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 31, 31, 63, + 0, + 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, + 0, + 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, + 121, 0, + 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, + 123, 0, + 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, + 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0, + 119, 0, + 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0, + 122, 0, + 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 31, 31, 63, 0, 85, + 0, + 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, + 0, + 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, + 121, 0, + 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, + 123, 0, + 0, 0, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, + 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, + 0, + 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, + 115, 0, + 115, 0, 115, 0, 116, 0, 0, 63, 10, 10, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, + 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, + 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, + 113, 0, + 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, + 20, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, + 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, 110, + 0, + 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0, + 115, 0, + 115, 0, 116, 0, 0, 63, 10, 10, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, + 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, + 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, + 114, 0, + 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 6, 0, 21, 0, 36, 0, + 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, + 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, + 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 85, 0, + 31, + 6, 6, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, + 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, + 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, + 108, 0, + 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 12, 0, 21, 0, 36, 0, 47, 0, + 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, + 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, + 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 85, 0, 31, 6, 6, + 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, + 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, + 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, + 109, 0, + 0, 0, 0, 0, 0, 0, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, + 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, + 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, + 101, 0, 102, 0, 102, 0, 0, 95, 0, 51, 0, 21, 4, 4, 15, 0, 28, 0, 38, 0, + 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, + 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, + 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, + 51, 0, 21, 0, 8, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, + 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, + 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, + 102, 0, 102, 0, 0, 95, 0, 51, 0, 21, 4, 4, 15, 0, 28, 0, 38, 0, 46, 0, + 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, + 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, + 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, + 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, + 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, + 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 102, 0, 63, + 0, 36, 0, 15, 3, 3, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, + 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, + 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, + 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 6, 0, 12, 0, + 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, + 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, + 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 102, 0, 63, 0, 36, + 0, 15, 3, 3, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, + 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, + 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 10, 0, 19, 0, 27, 0, 34, 0, + 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, + 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, + 88, 0, 89, 0, 90, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 2, 2, 10, 0, + 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, + 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, + 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, + 72, 0, 47, 0, 28, 0, 12, 0, 4, 0, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, + 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, + 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, + 89, 0, 90, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 2, 2, 10, 0, 19, 0, + 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, + 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, + 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, + 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, + 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 109, 0, 79, + 0, 56, 0, 38, 0, 23, 0, 10, 2, 2, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, + 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, + 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, + 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, + 4, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, + 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, + 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 109, 0, 79, 0, 56, + 0, 38, 0, 23, 0, 10, 2, 2, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, + 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, + 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 15, 0, + 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, + 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, + 77, 0, 78, 0, 79, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, + 2, 2, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, + 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, + 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, + 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 4, 0, 7, 0, 15, 0, 21, 0, + 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, + 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, + 78, 0, 79, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 2, 2, + 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, + 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, + 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, + 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, + 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 113, 0, 89, + 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 1, 1, 7, 0, 13, 0, 19, 0, + 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, + 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, + 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, + 17, 0, 7, 0, 2, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, + 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, + 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 113, 0, 89, 0, 69, + 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 1, 1, 7, 0, 13, 0, 19, 0, 24, 0, + 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, + 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, + 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, + 66, 0, 68, 0, 69, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, + 0, 15, 0, 7, 1, 1, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, + 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, + 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, + 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 2, 0, 6, 0, + 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, + 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, + 68, 0, 69, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, + 0, 7, 1, 1, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, + 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, + 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 0, 11, 0, 15, 0, 20, 0, + 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, + 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 115, 0, 95, + 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 1, 1, 5, 0, + 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, + 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, + 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, + 30, 0, 21, 0, 13, 0, 6, 0, 2, 0, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, + 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, + 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 115, 0, 95, 0, 78, + 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 1, 1, 5, 0, 11, 0, + 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, + 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, + 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, + 57, 0, 59, 0, 60, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, + 0, 26, 0, 19, 0, 12, 0, 5, 1, 1, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, + 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, + 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, + 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, + 2, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, + 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, + 59, 0, 60, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, + 0, 19, 0, 12, 0, 5, 1, 1, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, + 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, + 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 9, 0, + 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, + 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 117, 0, 100, + 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, + 1, 1, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, + 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, + 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, + 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 2, 0, 4, 0, 9, 0, 13, 0, + 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, + 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 117, 0, 100, 0, 85, + 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 1, 1, + 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, + 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, + 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, + 49, 0, 51, 0, 52, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, + 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 1, 1, 4, 0, 8, 0, 12, 0, + 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, + 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, + 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, + 10, 0, 4, 0, 2, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, + 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, + 51, 0, 52, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, + 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 1, 1, 4, 0, 8, 0, 12, 0, 16, 0, + 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, + 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, + 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 119, 0, 103, + 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, + 0, 9, 0, 4, 1, 1, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, + 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, + 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, + 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 2, 0, 4, 0, + 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, + 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 119, 0, 103, 0, 89, + 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, + 0, 4, 1, 1, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, + 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 7, 0, 11, 0, 14, 0, + 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, + 41, 0, 43, 0, 45, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, + 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 1, 1, 3, 0, + 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, + 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, + 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, + 18, 0, 13, 0, 8, 0, 4, 0, 2, 0, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, + 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, + 43, 0, 45, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, + 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 1, 1, 3, 0, 7, 0, + 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, + 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, + 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 120, 0, 106, + 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, + 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, + 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, + 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, + 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, + 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, + 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 120, 0, 106, 0, 93, + 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, + 0, 12, 0, 8, 0, 3, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, + 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, + 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, + 34, 0, 36, 0, 38, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, + 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, + 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, + 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, + 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 10, 0, + 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, + 36, 0, 38, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, + 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, + 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, + 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, + 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 120, 0, 108, + 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, + 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, + 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, + 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, + 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, + 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, + 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 120, 0, 108, 0, 97, + 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, + 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, + 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, + 28, 0, 30, 0, 31, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, + 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, + 0, 6, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, + 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, + 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, 0, + 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, + 30, 0, 31, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, + 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, + 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, + 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, + 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 121, 0, 110, + 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, + 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, + 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, + 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, + 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, + 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, + 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 121, 0, 110, 0, 99, + 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, + 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 5, 0, + 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, + 22, 0, 24, 0, 25, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, + 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, + 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, + 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, + 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, + 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, + 24, 0, 25, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, + 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, + 0, 9, 0, 6, 0, 3, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, + 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, + 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 121, 0, 111, + 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, + 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, + 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, + 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, + 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, + 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, + 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 121, 0, 111, 0, 102, + 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, + 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, + 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, + 16, 0, 18, 0, 20, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, + 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, + 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, + 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, + 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, + 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, + 18, 0, 20, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, + 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, + 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, + 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 122, 0, 112, + 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, + 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, + 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, + 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, + 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, + 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 2, 0, + 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 122, 0, 112, 0, 103, + 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, + 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, + 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, + 11, 0, 13, 0, 15, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, + 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, + 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, + 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, + 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, + 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, + 13, 0, 15, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, + 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, + 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, + 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 122, 0, 113, + 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, + 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, + 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, + 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, + 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, + 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, + 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 122, 0, 113, 0, 105, + 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, + 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, + 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, + 6, 0, 8, 0, 10, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, + 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, + 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, + 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, + 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, + 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, + 8, 0, 10, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, + 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, + 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, + 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 123, 0, 114, + 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, + 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, + 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, + 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, + 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, + 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, + 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 123, 0, 114, 0, 106, + 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, + 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, + 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 4, 0, 6, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, + 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, + 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, + 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, + 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, + 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, 0, + 4, 0, 6, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, + 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, + 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, + 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 0, 123, 0, 115, + 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, + 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, + 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, + 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, + 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, + 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 0, 123, 0, 115, 0, 108, + 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, + 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, + 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, + 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, + 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, + 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, + 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, + 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, + 0, 0, 1, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, + 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, + 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, + 0, 6, 0, 4, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, + 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, + 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, + 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, + 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, + 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, + 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 123, 0, 116, 0, 109, + 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, + 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, + 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 31, 31, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, + 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0, 119, 0, + 120, 0, + 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0, 122, 0, + 122, 0, + 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 31, + 0, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, + 115, + 0, 116, 0, 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, + 121, + 0, 121, 0, 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, + 123, + 0, 123, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 10, 10, + 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, + 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, 110, 0, + 110, 0, + 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0, 115, 0, + 115, 0, + 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, 10, 0, 31, 0, 51, 0, 63, 0, 72, + 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, + 0, 106, 0, 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, + 113, + 0, 113, 0, 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 6, 6, 21, 0, 36, 0, 47, 0, 56, 0, + 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, + 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, + 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, + 31, 0, 6, 6, 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, + 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, + 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, + 108, + 0, 108, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, + 0, 21, 4, 4, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, + 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, + 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, + 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, 0, 21, 0, 4, 4, 0, 15, 0, 28, + 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, + 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, + 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 3, 3, 12, 0, 23, 0, + 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, + 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, + 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, + 63, 0, 36, 0, 15, 0, 3, 3, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, + 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, + 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, + 0, 95, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, + 0, 47, 0, 28, 0, 12, 2, 2, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, + 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, + 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, + 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 2, + 0, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, + 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, + 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 2, 2, + 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, + 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, + 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, + 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, 2, 0, 9, 0, 17, 0, 23, 0, 30, + 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, + 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, + 0, 83, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, + 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 2, 2, 7, 0, 15, 0, 21, 0, 26, 0, + 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, + 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, + 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, + 9, 0, 2, 2, 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, + 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, + 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, + 0, 7, 1, 1, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, + 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, + 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 0, + 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 1, 0, 7, 0, 13, + 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, + 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, + 0, 73, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, + 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 1, 1, 6, 0, 12, 0, + 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, + 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, + 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, + 23, 0, 15, 0, 7, 0, 1, 1, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, + 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, + 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, + 0, 21, 0, 13, 0, 6, 1, 1, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, + 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, + 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0, + 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 1, + 0, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, + 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, + 0, 63, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, + 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 1, 1, + 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, + 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, + 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, + 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, 1, 0, 5, 0, 10, 0, 14, 0, 18, + 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, + 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, + 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 1, 1, 4, 0, 9, 0, 13, 0, 17, 0, + 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, + 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117, 0, + 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, + 5, 0, 1, 1, 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, + 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, + 0, 55, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, + 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, + 0, 4, 1, 1, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, + 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, + 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 1, 0, 4, 0, 8, + 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, + 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, + 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 1, 1, 4, 0, 8, 0, + 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, + 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, + 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, + 14, 0, 9, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, + 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, + 0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, + 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, + 0, 13, 0, 8, 0, 4, 1, 1, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, + 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, + 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, + 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 1, + 0, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, + 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, + 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, + 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, + 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, + 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, + 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, + 0, 39, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, + 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, + 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, + 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, + 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, + 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, + 3, 0, 0, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, + 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, + 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, + 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, + 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, + 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, + 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, + 0, 33, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, + 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, + 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 6, 0, + 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, + 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, + 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, + 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, + 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, + 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, + 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, + 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, + 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, + 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, + 0, 26, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, + 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, + 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, + 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, + 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, + 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, + 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, + 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, + 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, + 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, + 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, + 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, + 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, + 0, 21, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, + 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, + 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, + 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, + 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, + 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, + 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, + 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, + 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, + 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, + 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, + 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, + 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, + 0, 15, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, + 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, + 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, + 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, + 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, + 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, + 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, + 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, + 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, + 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, + 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, + 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, + 0, 10, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, + 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, + 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, + 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, + 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, + 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, + 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, + 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, + 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, + 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, + 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, + 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, + 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, + 0, 6, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, + 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, + 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, + 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, + 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, + 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, + 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, + 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, + 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, + 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, + 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, + 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, + 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, + 0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, + 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, + 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, + 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, + 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, + 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, + 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, + 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, + 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, + 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, + 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, + 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_program.c b/src/gallium/auxiliary/postprocess/pp_program.c new file mode 100644 index 00000000000..b92ac80a5db --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_program.c @@ -0,0 +1,139 @@ +/************************************************************************** + * + * Copyright 2010 Jakob Bornecrantz + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "postprocess/postprocess.h" +#include "cso_cache/cso_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" +#include "util/u_simple_shaders.h" + +/** Initialize the internal details */ +struct program * +pp_init_prog(struct pp_queue_t *ppq, struct pipe_screen *pscreen) +{ + + struct program *p = calloc(1, sizeof(struct program)); + + pp_debug("Initializing program\n"); + if (!pscreen) + return NULL; + + if (!p) + return NULL; + + p->screen = pscreen; + p->pipe = pscreen->context_create(pscreen, NULL); + p->cso = cso_create_context(p->pipe); + + { + static const float verts[4][2][4] = { + { + {1.0f, 1.0f, 0.0f, 1.0f}, + {1.0f, 1.0f, 0.0f, 1.0f} + }, + { + {-1.0f, 1.0f, 0.0f, 1.0f}, + {0.0f, 1.0f, 0.0f, 1.0f} + }, + { + {-1.0f, -1.0f, 0.0f, 1.0f}, + {0.0f, 0.0f, 0.0f, 1.0f} + }, + { + {1.0f, -1.0f, 0.0f, 1.0f}, + {1.0f, 0.0f, 0.0f, 1.0f} + } + }; + + p->vbuf = pipe_buffer_create(pscreen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, sizeof(verts)); + pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(verts), verts); + } + + p->blend.rt[0].colormask = PIPE_MASK_RGBA; + p->blend.rt[0].rgb_src_factor = p->blend.rt[0].alpha_src_factor = + PIPE_BLENDFACTOR_SRC_ALPHA; + p->blend.rt[0].rgb_dst_factor = p->blend.rt[0].alpha_dst_factor = + PIPE_BLENDFACTOR_INV_SRC_ALPHA; + + p->rasterizer.cull_face = PIPE_FACE_NONE; + p->rasterizer.gl_rasterization_rules = 1; + + p->sampler.wrap_s = p->sampler.wrap_t = p->sampler.wrap_r = + PIPE_TEX_WRAP_CLAMP_TO_EDGE; + + p->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + p->sampler.min_img_filter = p->sampler.mag_img_filter = + PIPE_TEX_FILTER_LINEAR; + p->sampler.normalized_coords = 1; + + p->sampler_point.wrap_s = p->sampler_point.wrap_t = + p->sampler_point.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + p->sampler_point.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + p->sampler_point.min_img_filter = p->sampler_point.mag_img_filter = + PIPE_TEX_FILTER_NEAREST; + p->sampler_point.normalized_coords = 1; + + p->velem[0].src_offset = 0; + p->velem[0].instance_divisor = 0; + p->velem[0].vertex_buffer_index = 0; + p->velem[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + p->velem[1].src_offset = 1 * 4 * sizeof(float); + p->velem[1].instance_divisor = 0; + p->velem[1].vertex_buffer_index = 0; + p->velem[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + + if (!p->screen->is_format_supported(p->screen, + PIPE_FORMAT_R32G32B32A32_FLOAT, + PIPE_BUFFER, 1, + PIPE_BIND_VERTEX_BUFFER)) + pp_debug("Vertex buf format fail\n"); + + + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC + }; + const uint semantic_indexes[] = { 0, 0 }; + p->passvs = util_make_vertex_passthrough_shader(p->pipe, 2, + semantic_names, + semantic_indexes); + } + + p->framebuffer.nr_cbufs = 1; + + p->surf.usage = PIPE_BIND_RENDER_TARGET; + p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM; + + p->pipe->set_sample_mask(p->pipe, ~0); + + return p; +} diff --git a/src/gallium/auxiliary/postprocess/pp_program.h b/src/gallium/auxiliary/postprocess/pp_program.h new file mode 100644 index 00000000000..2749b35b372 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_program.h @@ -0,0 +1,64 @@ +/************************************************************************** + * + * Copyright 2010 Jakob Bornecrantz + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PP_PROGRAM_H +#define PP_PROGRAM_H + +#include "pipe/p_state.h" + +/** +* Internal control details. +*/ +struct program +{ + struct pipe_screen *screen; + struct pipe_context *pipe; + struct cso_context *cso; + + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state sampler; /* bilinear */ + struct pipe_sampler_state sampler_point; /* point */ + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_vertex_element velem[2]; + + float clear_color[4]; + + void *passvs; + + struct pipe_resource *vbuf; + struct pipe_surface surf; + struct pipe_sampler_view *view; + + struct blit_state *blitctx; +}; + + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c new file mode 100644 index 00000000000..ce671aea360 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_run.c @@ -0,0 +1,188 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "postprocess.h" + +#include "postprocess/pp_filters.h" +#include "util/u_blit.h" +#include "util/u_inlines.h" +#include "util/u_sampler.h" + +/** +* Main run function of the PP queue. Called on swapbuffers/flush. +* +* Runs all requested filters in order and handles shuffling the temp +* buffers in between. +*/ +void +pp_run(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, struct pipe_resource *indepth) +{ + + unsigned int i; + + if (in->width0 != ppq->p->framebuffer.width || + in->height0 != ppq->p->framebuffer.height) { + pp_debug("Resizing the temp pp buffers\n"); + pp_free_fbos(ppq); + pp_init_fbos(ppq, in->width0, in->height0, indepth); + } + + if (in == out && ppq->n_filters == 1) { + /* Make a copy of in to tmp[0] in this case. */ + unsigned int w = ppq->p->framebuffer.width; + unsigned int h = ppq->p->framebuffer.height; + + util_blit_pixels(ppq->p->blitctx, in, 0, 0, 0, + w, h, 0, ppq->tmps[0], + 0, 0, w, h, 0, PIPE_TEX_MIPFILTER_NEAREST); + + in = ppq->tmp[0]; + } + + switch (ppq->n_filters) { + case 1: /* No temp buf */ + ppq->pp_queue[0] (ppq, in, out, 0); + break; + case 2: /* One temp buf */ + + ppq->pp_queue[0] (ppq, in, ppq->tmp[0], 0); + ppq->pp_queue[1] (ppq, ppq->tmp[0], out, 1); + + break; + default: /* Two temp bufs */ + ppq->pp_queue[0] (ppq, in, ppq->tmp[0], 0); + + for (i = 1; i < (ppq->n_filters - 1); i++) { + if (i % 2 == 0) + ppq->pp_queue[i] (ppq, ppq->tmp[1], ppq->tmp[0], i); + + else + ppq->pp_queue[i] (ppq, ppq->tmp[0], ppq->tmp[1], i); + } + + if (i % 2 == 0) + ppq->pp_queue[i] (ppq, ppq->tmp[1], out, i); + + else + ppq->pp_queue[i] (ppq, ppq->tmp[0], out, i); + + break; + } +} + + +/* Utility functions for the filters. You're not forced to use these if */ +/* your filter is more complicated. */ + +/** Setup this resource as the filter input. */ +void +pp_filter_setup_in(struct program *p, struct pipe_resource *in) +{ + struct pipe_sampler_view v_tmp; + u_sampler_view_default_template(&v_tmp, in, in->format); + p->view = p->pipe->create_sampler_view(p->pipe, in, &v_tmp); +} + +/** Setup this resource as the filter output. */ +void +pp_filter_setup_out(struct program *p, struct pipe_resource *out) +{ + p->surf.format = out->format; + p->surf.usage = PIPE_BIND_RENDER_TARGET; + + p->framebuffer.cbufs[0] = p->pipe->create_surface(p->pipe, out, &p->surf); +} + +/** Clean up the input and output set with the above. */ +void +pp_filter_end_pass(struct program *p) +{ + pipe_surface_reference(&p->framebuffer.cbufs[0], NULL); + pipe_sampler_view_reference(&p->view, NULL); +} + +/** +* Convert the TGSI assembly to a runnable shader. +* +* We need not care about geometry shaders. All we have is screen quads. +*/ +void * +pp_tgsi_to_state(struct pipe_context *pipe, const char *text, bool isvs, + const char *name) +{ + struct pipe_shader_state state; + struct tgsi_token tokens[PP_MAX_TOKENS]; + + if (tgsi_text_translate(text, tokens, Elements(tokens)) == FALSE) { + pp_debug("Failed to translate %s\n", name); + return NULL; + } + + state.tokens = tokens; + + if (isvs) + return pipe->create_vs_state(pipe, &state); + else + return pipe->create_fs_state(pipe, &state); +} + +/** Setup misc state for the filter. */ +void +pp_filter_misc_state(struct program *p) +{ + cso_set_blend(p->cso, &p->blend); + cso_set_depth_stencil_alpha(p->cso, &p->depthstencil); + cso_set_rasterizer(p->cso, &p->rasterizer); + cso_set_viewport(p->cso, &p->viewport); + + cso_set_vertex_elements(p->cso, 2, p->velem); +} + +/** Draw with the filter to the set output. */ +void +pp_filter_draw(struct program *p) +{ + util_draw_vertex_buffer(p->pipe, p->cso, p->vbuf, 0, + PIPE_PRIM_QUADS, 4, 2); + p->pipe->flush(p->pipe, NULL); +} + +/** Set the framebuffer as active. */ +void +pp_filter_set_fb(struct program *p) +{ + cso_set_framebuffer(p->cso, &p->framebuffer); +} + +/** Set the framebuffer as active and clear it. */ +void +pp_filter_set_clear_fb(struct program *p) +{ + cso_set_framebuffer(p->cso, &p->framebuffer); + p->pipe->clear(p->pipe, PIPE_CLEAR_COLOR, p->clear_color, 0, 0); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 712e8aca794..38dc1efa551 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1594,6 +1594,9 @@ store_dest(struct tgsi_exec_machine *mach, #define FETCH(VAL,INDEX,CHAN)\ fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) +#define IFETCH(VAL,INDEX,CHAN)\ + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) + /** * Execute ARB-style KIL which is predicated by a src register. @@ -1921,6 +1924,86 @@ exec_txd(struct tgsi_exec_machine *mach, } +static void +exec_txf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_sampler *sampler; + const uint unit = inst->Src[1].Register.Index; + union tgsi_exec_channel r[4]; + uint chan; + float rgba[NUM_CHANNELS][QUAD_SIZE]; + int j; + + IFETCH(&r[3], 0, CHAN_W); + + switch(inst->Texture.Texture) { + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_2D_ARRAY: + IFETCH(&r[2], 0, CHAN_Z); + /* fallthrough */ + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_1D_ARRAY: + IFETCH(&r[1], 0, CHAN_Y); + /* fallthrough */ + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + IFETCH(&r[0], 0, CHAN_X); + break; + default: + assert(0); + break; + } + + sampler = mach->Samplers[unit]; + sampler->get_texel(sampler, r[0].i, r[1].i, r[2].i, r[3].i, rgba); + + for (j = 0; j < QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_txq(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_sampler *sampler; + const uint unit = inst->Src[1].Register.Index; + int result[4]; + union tgsi_exec_channel r[4], src; + uint chan; + int i,j; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_INT); + sampler = mach->Samplers[unit]; + + sampler->get_dims(sampler, src.i[0], result); + + for (i = 0; i < QUAD_SIZE; i++) { + for (j = 0; j < 4; j++) { + r[j].i[i] = result[j]; + } + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, + TGSI_EXEC_DATA_INT); + } + } +} static void exec_sample(struct tgsi_exec_machine *mach, @@ -2989,6 +3072,17 @@ micro_xor(union tgsi_exec_channel *dst, } static void +micro_mod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->i[0] = src0->i[0] % src1->i[0]; + dst->i[1] = src0->i[1] % src1->i[1]; + dst->i[2] = src0->i[2] % src1->i[2]; + dst->i[3] = src0->i[3] % src1->i[3]; +} + +static void micro_f2i(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -3691,7 +3785,7 @@ exec_instruction( break; case TGSI_OPCODE_MOD: - assert (0); + exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_XOR: @@ -3703,11 +3797,11 @@ exec_instruction( break; case TGSI_OPCODE_TXF: - assert (0); + exec_txf(mach, inst); break; case TGSI_OPCODE_TXQ: - assert (0); + exec_txq(mach, inst); break; case TGSI_OPCODE_EMIT: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 33f33aa82c7..3f6964c17fb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -90,6 +90,11 @@ struct tgsi_sampler const float c0[QUAD_SIZE], enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); + void (*get_dims)(struct tgsi_sampler *sampler, int level, + int dims[4]); + void (*get_texel)(struct tgsi_sampler *sampler, const int i[QUAD_SIZE], + const int j[QUAD_SIZE], const int k[QUAD_SIZE], + const int lod[QUAD_SIZE], float rgba[NUM_CHANNELS][QUAD_SIZE]); }; #define TGSI_EXEC_NUM_TEMPS 128 @@ -400,6 +405,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 1; default: return 0; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 83c6ac75e54..f165f8240e6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -200,19 +200,20 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_max[file] = MAX2(info->file_max[file], (int)reg); } break; + case TGSI_TOKEN_TYPE_PROPERTY: - { - const struct tgsi_full_property *fullprop - = &parse.FullToken.FullProperty; + { + const struct tgsi_full_property *fullprop + = &parse.FullToken.FullProperty; - info->properties[info->num_properties].name = - fullprop->Property.PropertyName; - memcpy(info->properties[info->num_properties].data, - fullprop->u, 8 * sizeof(unsigned));; + info->properties[info->num_properties].name = + fullprop->Property.PropertyName; + memcpy(info->properties[info->num_properties].data, + fullprop->u, 8 * sizeof(unsigned));; - ++info->num_properties; - } - break; + ++info->num_properties; + } + break; default: assert( 0 ); @@ -222,6 +223,23 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || info->opcode_count[TGSI_OPCODE_KILP]); + /* extract simple properties */ + for (i = 0; i < info->num_properties; ++i) { + switch (info->properties[i].name) { + case TGSI_PROPERTY_FS_COORD_ORIGIN: + info->origin_lower_left = info->properties[i].data[0]; + break; + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + info->pixel_center_integer = info->properties[i].data[0]; + break; + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: + info->color0_writes_all_cbufs = info->properties[i].data[0]; + break; + default: + ; + } + } + tgsi_parse_free (&parse); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 53ab3d509dd..d6e593b3968 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -68,6 +68,9 @@ struct tgsi_shader_info boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ boolean uses_instanceid; + boolean origin_lower_left; + boolean pixel_center_integer; + boolean color0_writes_all_cbufs; /** * Bitmask indicating which register files are accessed with diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 528f344a0f7..d8e46f07c88 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -26,8 +26,8 @@ /** * @file - * Blitter utility to facilitate acceleration of the clear, clear_render_target, clear_depth_stencil - * resource_copy_region functions. + * Blitter utility to facilitate acceleration of the clear, clear_render_target, + * clear_depth_stencil, and resource_copy_region functions. * * @author Marek Olšák */ @@ -197,8 +197,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) memset(&velem[0], 0, sizeof(velem[0]) * 2); for (i = 0; i < 2; i++) { velem[i].src_offset = i * 4 * sizeof(float); - velem[i].instance_divisor = 0; - velem[i].vertex_buffer_index = 0; velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; } ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]); @@ -288,26 +286,33 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) unsigned i; /* restore the state objects which are always required to be saved */ - pipe->bind_blend_state(pipe, ctx->base.saved_blend_state); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state); pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state); - pipe->bind_fs_state(pipe, ctx->base.saved_fs); pipe->bind_vs_state(pipe, ctx->base.saved_vs); pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state); - ctx->base.saved_blend_state = INVALID_PTR; - ctx->base.saved_dsa_state = INVALID_PTR; ctx->base.saved_rs_state = INVALID_PTR; - ctx->base.saved_fs = INVALID_PTR; ctx->base.saved_vs = INVALID_PTR; ctx->base.saved_velem_state = INVALID_PTR; + /* restore the state objects which are required to be saved for clear/copy + */ + if (ctx->base.saved_blend_state != INVALID_PTR) { + pipe->bind_blend_state(pipe, ctx->base.saved_blend_state); + ctx->base.saved_blend_state = INVALID_PTR; + } + if (ctx->base.saved_dsa_state != INVALID_PTR) { + pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state); + ctx->base.saved_dsa_state = INVALID_PTR; + } + if (ctx->base.saved_fs != INVALID_PTR) { + pipe->bind_fs_state(pipe, ctx->base.saved_fs); + ctx->base.saved_fs = INVALID_PTR; + } + pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref); pipe->set_viewport_state(pipe, &ctx->base.saved_viewport); pipe->set_clip_state(pipe, &ctx->base.saved_clip); - /* restore the state objects which are required to be saved before copy/fill - */ if (ctx->base.saved_fb_state.nr_cbufs != ~0) { pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state); util_unreference_framebuffer_state(&ctx->base.saved_fb_state); @@ -724,14 +729,14 @@ boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, return sx1 < dx2 && sx2 > dx1 && sy1 < dy2 && sy2 > dy1; } -void util_blitter_copy_region(struct blitter_context *blitter, - struct pipe_resource *dst, - unsigned dstlevel, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned srclevel, - const struct pipe_box *srcbox, - boolean ignore_stencil) +void util_blitter_copy_texture(struct blitter_context *blitter, + struct pipe_resource *dst, + unsigned dstlevel, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned srclevel, + const struct pipe_box *srcbox, + boolean ignore_stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 41470d92bba..df6f023a638 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -126,12 +126,15 @@ struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter) } /* - * These CSOs must be saved before any of the following functions is called: + * These states must be saved before any of the following functions is called: * - blend state * - depth stencil alpha state * - rasterizer state * - vertex shader + * - any other shader??? (XXX) * - fragment shader + * - vertex buffers + * - vertex elements */ /** @@ -169,14 +172,14 @@ void util_blitter_clear_depth_custom(struct blitter_context *blitter, * - fragment sampler states * - fragment sampler textures */ -void util_blitter_copy_region(struct blitter_context *blitter, - struct pipe_resource *dst, - unsigned dstlevel, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned srclevel, - const struct pipe_box *srcbox, - boolean ignore_stencil); +void util_blitter_copy_texture(struct blitter_context *blitter, + struct pipe_resource *dst, + unsigned dstlevel, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned srclevel, + const struct pipe_box *srcbox, + boolean ignore_stencil); /** * Clear a region of a (color) surface to a constant value. diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 004df439ff5..2d6193039a7 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -730,7 +730,7 @@ debug_dump_float_rgba_bmp(const char *filename, pixel.rgbRed = float_to_ubyte(ptr[x*4 + 0]); pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]); pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]); - pixel.rgbAlpha = 255; + pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]); os_stream_write(stream, &pixel, 4); } } diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index 9cbdd0a5b99..34922ab18ab 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -390,3 +390,53 @@ util_format_translate(enum pipe_format dst_format, FREE(tmp_row); } } + +void util_format_compose_swizzles(const unsigned char swz1[4], + const unsigned char swz2[4], + unsigned char dst[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ? + swz1[swz2[i]] : swz2[i]; + } +} + +void util_format_swizzle_4f(float *dst, const float *src, + const unsigned char swz[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + if (swz[i] <= UTIL_FORMAT_SWIZZLE_W) + dst[i] = src[swz[i]]; + else if (swz[i] == UTIL_FORMAT_SWIZZLE_0) + dst[i] = 0; + else if (swz[i] == UTIL_FORMAT_SWIZZLE_1) + dst[i] = 1; + } +} + +void util_format_unswizzle_4f(float *dst, const float *src, + const unsigned char swz[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + switch (swz[i]) { + case UTIL_FORMAT_SWIZZLE_X: + dst[0] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_Y: + dst[1] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_Z: + dst[2] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_W: + dst[3] = src[i]; + break; + } + } +} diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 347e2beb8dd..a3d2aae62c8 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -260,10 +260,10 @@ PIPE_FORMAT_R10G10B10X2_USCALED , plain, 1, 1, u10 , u10 , u10 , x2 , xyz1, r # A.k.a. D3DDECLTYPE_DEC3N PIPE_FORMAT_R10G10B10X2_SNORM , plain, 1, 1, sn10, sn10, sn10 , x2 , xyz1, rgb -PIPE_FORMAT_YV12 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_YV16 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_IYUV , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_NV12 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_NV21 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_IA44 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_AI44 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_YV12 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_YV16 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_IYUV , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_NV12 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_NV21 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_IA44 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_AI44 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index bb3ed72e932..566fa79e781 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -815,6 +815,25 @@ util_format_translate(enum pipe_format dst_format, unsigned src_x, unsigned src_y, unsigned width, unsigned height); +/* + * Swizzle operations. + */ + +/* Compose two sets of swizzles. + * If V is a 4D vector and the function parameters represent functions that + * swizzle vector components, this holds: + * swz2(swz1(V)) = dst(V) + */ +void util_format_compose_swizzles(const unsigned char swz1[4], + const unsigned char swz2[4], + unsigned char dst[4]); + +void util_format_swizzle_4f(float *dst, const float *src, + const unsigned char swz[4]); + +void util_format_unswizzle_4f(float *dst, const float *src, + const unsigned char swz[4]); + #ifdef __cplusplus } // extern "C" { #endif diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c index bb989c29d81..d8a7c0d453f 100644 --- a/src/gallium/auxiliary/util/u_format_s3tc.c +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -119,8 +119,15 @@ util_format_s3tc_init(void) library = util_dl_open(DXTN_LIBNAME); if (!library) { - debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " - "compression/decompression unavailable\n"); + if (getenv("force_s3tc_enable") && + !strcmp(getenv("force_s3tc_enable"), "true")) { + debug_printf("couldn't open " DXTN_LIBNAME ", enabling DXTn due to " + "force_s3tc_enable=true environment variable\n"); + util_format_s3tc_enabled = TRUE; + } else { + debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " + "compression/decompression unavailable\n"); + } return; } diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 0b5284428eb..46d9322932a 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -199,6 +199,16 @@ roundf(float x) #endif /* _MSC_VER */ +#ifdef PIPE_OS_ANDROID + +static INLINE +double log2(double d) +{ + return log(d) * (1.0 / M_LN2); +} + +#endif + @@ -409,7 +419,7 @@ unsigned ffs( unsigned u ) return i; } -#elif defined(__MINGW32__) +#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID) #define ffs __builtin_ffs #endif diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index f79a6938d1d..ac0df8c1a9c 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -52,6 +52,7 @@ #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" /** Approx number of new tokens for instructions in pstip_transform_inst() */ #define NUM_NEW_TOKENS 50 @@ -175,6 +176,7 @@ util_pstipple_create_sampler(struct pipe_context *pipe) */ struct pstip_transform_context { struct tgsi_transform_context base; + struct tgsi_shader_info info; uint tempsUsed; /**< bitmask */ int wincoordInput; int maxInput; @@ -183,12 +185,13 @@ struct pstip_transform_context { int texTemp; /**< temp registers */ int numImmed; boolean firstInstruction; + uint coordOrigin; }; /** * TGSI declaration transform callback. - * Look for a free sampler, a free input attrib, and two free temp regs. + * Track samplers used, temps used, inputs used. */ static void pstip_transform_decl(struct tgsi_transform_context *ctx, @@ -197,10 +200,11 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + /* XXX we can use tgsi_shader_info instead of some of this */ + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->Range.First; - i <= decl->Range.Last; i++) { + for (i = decl->Range.First; i <= decl->Range.Last; i++) { pctx->samplersUsed |= 1 << i; } } @@ -211,8 +215,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->Range.First; - i <= decl->Range.Last; i++) { + for (i = decl->Range.First; i <= decl->Range.Last; i++) { pctx->tempsUsed |= (1 << i); } } @@ -243,8 +246,16 @@ free_bit(uint bitfield) /** * TGSI instruction transform callback. - * Replace writes to result.color w/ a temp reg. - * Upon END instruction, insert texture sampling code for antialiasing. + * Before the first instruction, insert our new code to sample the + * stipple texture (using the fragment coord register) then kill the + * fragment if the stipple texture bit is off. + * + * Insert: + * declare new registers + * MUL texTemp, INPUT[wincoord], 1/32; + * TEX texTemp, texTemp, sampler; + * KIL -texTemp; # if -texTemp < 0, KILL fragment + * [...original code...] */ static void pstip_transform_inst(struct tgsi_transform_context *ctx, @@ -261,7 +272,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, uint i; int wincoordInput; - /* find free sampler */ + /* find free texture sampler */ pctx->freeSampler = free_bit(pctx->samplersUsed); if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; @@ -271,7 +282,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, else wincoordInput = pctx->wincoordInput; - /* find one free temp reg */ + /* find one free temp register */ for (i = 0; i < 32; i++) { if ((pctx->tempsUsed & (1 << i)) == 0) { /* found a free temp */ @@ -397,6 +408,7 @@ util_pstipple_create_fragment_shader(struct pipe_context *pipe, struct pipe_shader_state *new_fs; struct pstip_transform_context transform; const uint newLen = tgsi_num_tokens(fs->tokens) + NUM_NEW_TOKENS; + unsigned i; new_fs = MALLOC(sizeof(*new_fs)); if (!new_fs) @@ -408,22 +420,33 @@ util_pstipple_create_fragment_shader(struct pipe_context *pipe, return NULL; } + /* Setup shader transformation info/context. + */ memset(&transform, 0, sizeof(transform)); transform.wincoordInput = -1; transform.maxInput = -1; transform.texTemp = -1; transform.firstInstruction = TRUE; + transform.coordOrigin = TGSI_FS_COORD_ORIGIN_UPPER_LEFT; transform.base.transform_instruction = pstip_transform_inst; transform.base.transform_declaration = pstip_transform_decl; transform.base.transform_immediate = pstip_transform_immed; + tgsi_scan_shader(fs->tokens, &transform.info); + + /* find fragment coordinate origin property */ + for (i = 0; i < transform.info.num_properties; i++) { + if (transform.info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN) + transform.coordOrigin = transform.info.properties[i].data[0]; + } + tgsi_transform_shader(fs->tokens, (struct tgsi_token *) new_fs->tokens, newLen, &transform.base); #if 0 /* DEBUG */ tgsi_dump(fs->tokens, 0); - tgsi_dump(pstip_fs.tokens, 0); + tgsi_dump(new_fs->tokens, 0); #endif assert(transform.freeSampler < PIPE_MAX_SAMPLERS); diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c index 374fc336b83..d9b39e528bb 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.c +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -34,21 +34,6 @@ #include "translate/translate.h" #include "translate/translate_cache.h" -/* Hardware vertex fetcher limitations can be described by this structure. */ -struct u_vbuf_caps { - /* Vertex format CAPs. */ - /* TRUE if hardware supports it. */ - unsigned format_fixed32:1; /* PIPE_FORMAT_*32*_FIXED */ - unsigned format_float16:1; /* PIPE_FORMAT_*16*_FLOAT */ - unsigned format_float64:1; /* PIPE_FORMAT_*64*_FLOAT */ - unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */ - unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */ - - /* Whether vertex fetches don't have to be dword-aligned. */ - /* TRUE if hardware supports it. */ - unsigned fetch_dword_unaligned:1; -}; - struct u_vbuf_mgr_elements { unsigned count; struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; @@ -69,7 +54,6 @@ struct u_vbuf_mgr_elements { struct u_vbuf_mgr_priv { struct u_vbuf_mgr b; - struct u_vbuf_caps caps; struct pipe_context *pipe; struct translate_cache *translate_cache; @@ -79,6 +63,8 @@ struct u_vbuf_mgr_priv { void *saved_ve, *fallback_ve; boolean ve_binding_lock; + unsigned saved_buffer_offset[PIPE_MAX_ATTRIBS]; + boolean any_user_vbs; boolean incompatible_vb_layout; }; @@ -87,25 +73,25 @@ static void u_vbuf_mgr_init_format_caps(struct u_vbuf_mgr_priv *mgr) { struct pipe_screen *screen = mgr->pipe->screen; - mgr->caps.format_fixed32 = + mgr->b.caps.format_fixed32 = screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_float16 = + mgr->b.caps.format_float16 = screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_float64 = + mgr->b.caps.format_float64 = screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_norm32 = + mgr->b.caps.format_norm32 = screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER) && screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_scaled32 = + mgr->b.caps.format_scaled32 = screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER) && screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER, @@ -128,7 +114,7 @@ u_vbuf_mgr_create(struct pipe_context *pipe, upload_buffer_alignment, upload_buffer_bind); - mgr->caps.fetch_dword_unaligned = + mgr->b.caps.fetch_dword_unaligned = fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED; u_vbuf_mgr_init_format_caps(mgr); @@ -182,7 +168,7 @@ u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr, /* Check for support. */ if (mgr->ve->ve[i].src_format == mgr->ve->native_format[i] && - (mgr->caps.fetch_dword_unaligned || + (mgr->b.caps.fetch_dword_unaligned || (vb->buffer_offset % 4 == 0 && vb->stride % 4 == 0 && mgr->ve->ve[i].src_offset % 4 == 0))) { @@ -363,7 +349,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, /* Choose a native format. * For now we don't care about the alignment, that's going to * be sorted out later. */ - if (!mgr->caps.format_fixed32) { + if (!mgr->b.caps.format_fixed32) { switch (format) { FORMAT_REPLACE(R32_FIXED, R32_FLOAT); FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); @@ -372,7 +358,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_float16) { + if (!mgr->b.caps.format_float16) { switch (format) { FORMAT_REPLACE(R16_FLOAT, R32_FLOAT); FORMAT_REPLACE(R16G16_FLOAT, R32G32_FLOAT); @@ -381,7 +367,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_float64) { + if (!mgr->b.caps.format_float64) { switch (format) { FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); @@ -390,7 +376,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_norm32) { + if (!mgr->b.caps.format_norm32) { switch (format) { FORMAT_REPLACE(R32_UNORM, R32_FLOAT); FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); @@ -403,7 +389,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_scaled32) { + if (!mgr->b.caps.format_scaled32) { switch (format) { FORMAT_REPLACE(R32_USCALED, R32_FLOAT); FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); @@ -425,11 +411,11 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, ve->incompatible_layout = ve->incompatible_layout || ve->ve[i].src_format != ve->native_format[i] || - (!mgr->caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0); + (!mgr->b.caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0); } /* Align the formats to the size of DWORD if needed. */ - if (!mgr->caps.fetch_dword_unaligned) { + if (!mgr->b.caps.fetch_dword_unaligned) { for (i = 0; i < count; i++) { ve->native_format_size[i] = align(ve->native_format_size[i], 4); } @@ -470,7 +456,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, mgr->any_user_vbs = FALSE; mgr->incompatible_vb_layout = FALSE; - if (!mgr->caps.fetch_dword_unaligned) { + if (!mgr->b.caps.fetch_dword_unaligned) { /* Check if the strides and offsets are aligned to the size of DWORD. */ for (i = 0; i < count; i++) { if (bufs[i].buffer) { @@ -488,6 +474,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer); pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL); + mgr->saved_buffer_offset[i] = vb->buffer_offset; if (!vb->buffer) { continue; @@ -647,6 +634,13 @@ u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb, void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb) { struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + unsigned i; + + /* buffer offsets were modified in u_vbuf_upload_buffers */ + if (mgr->any_user_vbs) { + for (i = 0; i < mgr->b.nr_vertex_buffers; i++) + mgr->b.vertex_buffer[i].buffer_offset = mgr->saved_buffer_offset[i]; + } if (mgr->fallback_ve) { u_vbuf_translate_end(mgr); diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h index 4e6372435d8..c653ca4346d 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.h +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h @@ -37,6 +37,21 @@ #include "pipe/p_state.h" #include "util/u_transfer.h" +/* Hardware vertex fetcher limitations can be described by this structure. */ +struct u_vbuf_caps { + /* Vertex format CAPs. */ + /* TRUE if hardware supports it. */ + unsigned format_fixed32:1; /* PIPE_FORMAT_*32*_FIXED */ + unsigned format_float16:1; /* PIPE_FORMAT_*16*_FLOAT */ + unsigned format_float64:1; /* PIPE_FORMAT_*64*_FLOAT */ + unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */ + unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */ + + /* Whether vertex fetches don't have to be dword-aligned. */ + /* TRUE if hardware supports it. */ + unsigned fetch_dword_unaligned:1; +}; + /* The manager. * This structure should also be used to access vertex buffers * from a driver. */ @@ -63,6 +78,8 @@ struct u_vbuf_mgr { * - u_upload_buffer * - u_upload_flush */ struct u_upload_mgr *uploader; + + struct u_vbuf_caps caps; }; struct u_vbuf_resource { diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 3bd4af2e3e0..c73f9769446 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -231,6 +231,8 @@ init_pipe_state(struct vl_compositor *c) struct pipe_rasterizer_state rast; struct pipe_sampler_state sampler; struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + unsigned i; assert(c); @@ -289,6 +291,24 @@ init_pipe_state(struct vl_compositor *c) c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast); + memset(&dsa, 0, sizeof dsa); + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + for (i = 0; i < 2; ++i) { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref_value = 0; + c->dsa = c->pipe->create_depth_stencil_alpha_state(c->pipe, &dsa); + c->pipe->bind_depth_stencil_alpha_state(c->pipe, c->dsa); return true; } @@ -296,6 +316,11 @@ static void cleanup_pipe_state(struct vl_compositor *c) { assert(c); + /* Asserted in softpipe_delete_fs_state() for some reason */ + c->pipe->bind_vs_state(c->pipe, NULL); + c->pipe->bind_fs_state(c->pipe, NULL); + + c->pipe->delete_depth_stencil_alpha_state(c->pipe, c->dsa); c->pipe->delete_sampler_state(c->pipe, c->sampler_linear); c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest); c->pipe->delete_blend_state(c->pipe, c->blend); @@ -648,7 +673,6 @@ vl_compositor_set_rgba_layer(struct vl_compositor *c, void vl_compositor_render(struct vl_compositor *c, - enum pipe_mpeg12_picture_type picture_type, struct pipe_surface *dst_surface, struct pipe_video_rect *dst_area, struct pipe_video_rect *dst_clip) diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 87ad39be1be..207510092a0 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -68,6 +68,7 @@ struct vl_compositor void *sampler_nearest; void *blend; void *rast; + void *dsa; void *vertex_elems_state; void *vs; @@ -155,7 +156,6 @@ vl_compositor_set_rgba_layer(struct vl_compositor *compositor, */ void vl_compositor_render(struct vl_compositor *compositor, - enum pipe_mpeg12_picture_type picture_type, struct pipe_surface *dst_surface, struct pipe_video_rect *dst_area, struct pipe_video_rect *dst_clip); diff --git a/src/gallium/auxiliary/vl/vl_decoder.c b/src/gallium/auxiliary/vl/vl_decoder.c index fac03359a0f..b23827d300a 100644 --- a/src/gallium/auxiliary/vl/vl_decoder.c +++ b/src/gallium/auxiliary/vl/vl_decoder.c @@ -44,6 +44,19 @@ vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile } } +unsigned +vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile) +{ + assert(screen); + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: + return 4; + + default: + return 1; + } +} + struct pipe_video_decoder * vl_create_decoder(struct pipe_context *pipe, enum pipe_video_profile profile, diff --git a/src/gallium/auxiliary/vl/vl_decoder.h b/src/gallium/auxiliary/vl/vl_decoder.h index 0e9280dbfa2..fed529c9bc7 100644 --- a/src/gallium/auxiliary/vl/vl_decoder.h +++ b/src/gallium/auxiliary/vl/vl_decoder.h @@ -38,6 +38,12 @@ bool vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile); /** + * the desired number of buffers for optimal operation + */ +unsigned +vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile); + +/** * standard implementation of pipe->create_video_decoder */ struct pipe_video_decoder * diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index 645d06a0925..ad786145392 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -143,7 +143,7 @@ static void * create_mismatch_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; - struct ureg_src vrect, vpos; + struct ureg_src vpos; struct ureg_src scale; struct ureg_dst t_tex; struct ureg_dst o_vpos, o_addr[2]; @@ -152,7 +152,6 @@ create_mismatch_vert_shader(struct vl_idct *idct) if (!shader) return NULL; - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_tex = ureg_DECL_temporary(shader); diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index bd05205b52d..0b3723c9792 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -103,16 +103,15 @@ create_ref_vert_shader(struct vl_mc *r) { struct ureg_program *shader; struct ureg_src mv_scale; - struct ureg_src vrect, vmv[2]; + struct ureg_src vmv[2]; struct ureg_dst t_vpos; - struct ureg_dst o_vpos, o_vmv[2]; + struct ureg_dst o_vmv[2]; unsigned i; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP); vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM); @@ -121,7 +120,6 @@ create_ref_vert_shader(struct vl_mc *r) (float)MACROBLOCK_HEIGHT / r->buffer_height) ); - o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c index 9dd032e911d..db05b151f95 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c @@ -1,6 +1,7 @@ /************************************************************************** * - * Copyright 2011 Christian König. + * Copyright 2011 Maarten Lankhorst + * Copyright 2011 Christian König * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,1813 +26,967 @@ * **************************************************************************/ -/** - * This file is based uppon slice_xvmc.c and vlc.h from the xine project, - * which in turn is based on mpeg2dec. The following is the original copyright: - * - * Copyright (C) 2000-2002 Michel Lespinasse <[email protected]> - * Copyright (C) 1999-2000 Aaron Holtzman <[email protected]> - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <stdint.h> - -#include <pipe/p_compiler.h> -#include <pipe/p_video_state.h> +#include <pipe/p_video_decoder.h> +#include <util/u_memory.h> #include "vl_vlc.h" #include "vl_mpeg12_bitstream.h" -/* take num bits from the high part of bit_buf and zero extend them */ -#define UBITS(buf,num) (((uint32_t)(buf)) >> (32 - (num))) - -/* take num bits from the high part of bit_buf and sign extend them */ -#define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num))) - -/* macroblock modes */ -#define MACROBLOCK_INTRA 1 -#define MACROBLOCK_PATTERN 2 -#define MACROBLOCK_MOTION_BACKWARD 4 -#define MACROBLOCK_MOTION_FORWARD 8 -#define MACROBLOCK_QUANT 16 - -/* motion_type */ -#define MOTION_TYPE_MASK (3*64) -#define MOTION_TYPE_BASE 64 -#define MC_FIELD (1*64) -#define MC_FRAME (2*64) -#define MC_16X8 (2*64) -#define MC_DMV (3*64) - -/* picture structure */ -#define TOP_FIELD 1 -#define BOTTOM_FIELD 2 -#define FRAME_PICTURE 3 - -/* picture coding type (mpeg2 header) */ -#define I_TYPE 1 -#define P_TYPE 2 -#define B_TYPE 3 -#define D_TYPE 4 - -typedef struct { - uint8_t modes; - uint8_t len; -} MBtab; - -typedef struct { - uint8_t delta; - uint8_t len; -} MVtab; - -typedef struct { - int8_t dmv; - uint8_t len; -} DMVtab; - -typedef struct { - uint8_t cbp; - uint8_t len; -} CBPtab; - -typedef struct { - uint8_t size; - uint8_t len; -} DCtab; - -typedef struct { - uint8_t run; - uint8_t level; - uint8_t len; -} DCTtab; - -typedef struct { - uint8_t mba; - uint8_t len; -} MBAtab; - -#define INTRA MACROBLOCK_INTRA -#define QUANT MACROBLOCK_QUANT -#define MC MACROBLOCK_MOTION_FORWARD -#define CODED MACROBLOCK_PATTERN -#define FWD MACROBLOCK_MOTION_FORWARD -#define BWD MACROBLOCK_MOTION_BACKWARD -#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD - -static const MBtab MB_I [] = { - {INTRA|QUANT, 2}, {INTRA, 1} -}; - -static const MBtab MB_P [] = { - {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, - {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} -}; - -static const MBtab MB_B [] = { - {0, 0}, {INTRA|QUANT, 6}, - {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, - {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, - {INTRA, 5}, {INTRA, 5}, - {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, - {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} -}; - -#undef INTRA -#undef QUANT -#undef MC -#undef CODED -#undef FWD -#undef BWD -#undef INTER - -static const MVtab MV_4 [] = { - { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} -}; - -static const MVtab MV_10 [] = { - { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, - { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, - {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, - { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, - { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, - { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} -}; - -static const DMVtab DMV_2 [] = { - { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} -}; - -static const CBPtab CBP_7 [] = { - {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, - {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, - {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, - {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, - {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, - {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, - {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, - {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, - {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, - {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, - {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, - {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, - {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, - {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, - {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, - {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} -}; - -static const CBPtab CBP_9 [] = { - {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, - {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, - {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, - {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, - {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, - {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, - {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, - {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, - {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, - {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, - {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, - {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, - {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, - {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, - {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, - {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} +enum { + dct_End_of_Block = 0xFF, + dct_Escape = 0xFE, + dct_DC = 0xFD, + dct_AC = 0xFC }; -static const DCtab DC_lum_5 [] = { - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, - {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +struct dct_coeff +{ + uint8_t length; + uint8_t run; + int16_t level; }; -static const DCtab DC_chrom_5 [] = { - {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +struct dct_coeff_compressed +{ + uint32_t bitcode; + struct dct_coeff coeff; }; -static const DCtab DC_long [] = { - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, - {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +/* coding table as found in the spec annex B.5 table B-1 */ +static const struct vl_vlc_compressed macroblock_address_increment[] = { + { 0x8000, { 1, 1 } }, + { 0x6000, { 3, 2 } }, + { 0x4000, { 3, 3 } }, + { 0x3000, { 4, 4 } }, + { 0x2000, { 4, 5 } }, + { 0x1800, { 5, 6 } }, + { 0x1000, { 5, 7 } }, + { 0x0e00, { 7, 8 } }, + { 0x0c00, { 7, 9 } }, + { 0x0b00, { 8, 10 } }, + { 0x0a00, { 8, 11 } }, + { 0x0900, { 8, 12 } }, + { 0x0800, { 8, 13 } }, + { 0x0700, { 8, 14 } }, + { 0x0600, { 8, 15 } }, + { 0x05c0, { 10, 16 } }, + { 0x0580, { 10, 17 } }, + { 0x0540, { 10, 18 } }, + { 0x0500, { 10, 19 } }, + { 0x04c0, { 10, 20 } }, + { 0x0480, { 10, 21 } }, + { 0x0460, { 11, 22 } }, + { 0x0440, { 11, 23 } }, + { 0x0420, { 11, 24 } }, + { 0x0400, { 11, 25 } }, + { 0x03e0, { 11, 26 } }, + { 0x03c0, { 11, 27 } }, + { 0x03a0, { 11, 28 } }, + { 0x0380, { 11, 29 } }, + { 0x0360, { 11, 30 } }, + { 0x0340, { 11, 31 } }, + { 0x0320, { 11, 32 } }, + { 0x0300, { 11, 33 } } }; -static const DCTtab DCT_16 [] = { - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, - { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, - { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, - { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} -}; +#define Q PIPE_MPEG12_MB_TYPE_QUANT +#define F PIPE_MPEG12_MB_TYPE_MOTION_FORWARD +#define B PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD +#define P PIPE_MPEG12_MB_TYPE_PATTERN +#define I PIPE_MPEG12_MB_TYPE_INTRA -static const DCTtab DCT_15 [] = { - { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, - { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, - { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, - { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, - { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, - { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, - { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, - { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, - { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, - { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, - { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, - { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +/* coding table as found in the spec annex B.5 table B-2 */ +static const struct vl_vlc_compressed macroblock_type_i[] = { + { 0x8000, { 1, I } }, + { 0x4000, { 2, Q|I } } }; -static const DCTtab DCT_13 [] = { - { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, - { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, - { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, - { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, - { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, - { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, - { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, - { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, - { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, - { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, - { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, - { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +/* coding table as found in the spec annex B.5 table B-3 */ +static const struct vl_vlc_compressed macroblock_type_p[] = { + { 0x8000, { 1, F|P } }, + { 0x4000, { 2, P } }, + { 0x2000, { 3, F } }, + { 0x1800, { 5, I } }, + { 0x1000, { 5, Q|F|P } }, + { 0x0800, { 5, Q|P } }, + { 0x0400, { 6, Q|I } } }; -static const DCTtab DCT_B14_10 [] = { - { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, - { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +/* coding table as found in the spec annex B.5 table B-4 */ +static const struct vl_vlc_compressed macroblock_type_b[] = { + { 0x8000, { 2, F|B } }, + { 0xC000, { 2, F|B|P } }, + { 0x4000, { 3, B } }, + { 0x6000, { 3, B|P } }, + { 0x2000, { 4, F } }, + { 0x3000, { 4, F|P } }, + { 0x1800, { 5, I } }, + { 0x1000, { 5, Q|F|B|P } }, + { 0x0C00, { 6, Q|F|P } }, + { 0x0800, { 6, Q|B|P } }, + { 0x0400, { 6, Q|I } } }; -static const DCTtab DCT_B14_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, - { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, - { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, - { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, - { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +#undef Q +#undef F +#undef B +#undef P +#undef I + +/* coding table as found in the spec annex B.5 table B-9 */ +static const struct vl_vlc_compressed coded_block_pattern[] = { + { 0xE000, { 3, 60 } }, + { 0xD000, { 4, 4 } }, + { 0xC000, { 4, 8 } }, + { 0xB000, { 4, 16 } }, + { 0xA000, { 4, 32 } }, + { 0x9800, { 5, 12 } }, + { 0x9000, { 5, 48 } }, + { 0x8800, { 5, 20 } }, + { 0x8000, { 5, 40 } }, + { 0x7800, { 5, 28 } }, + { 0x7000, { 5, 44 } }, + { 0x6800, { 5, 52 } }, + { 0x6000, { 5, 56 } }, + { 0x5800, { 5, 1 } }, + { 0x5000, { 5, 61 } }, + { 0x4800, { 5, 2 } }, + { 0x4000, { 5, 62 } }, + { 0x3C00, { 6, 24 } }, + { 0x3800, { 6, 36 } }, + { 0x3400, { 6, 3 } }, + { 0x3000, { 6, 63 } }, + { 0x2E00, { 7, 5 } }, + { 0x2C00, { 7, 9 } }, + { 0x2A00, { 7, 17 } }, + { 0x2800, { 7, 33 } }, + { 0x2600, { 7, 6 } }, + { 0x2400, { 7, 10 } }, + { 0x2200, { 7, 18 } }, + { 0x2000, { 7, 34 } }, + { 0x1F00, { 8, 7 } }, + { 0x1E00, { 8, 11 } }, + { 0x1D00, { 8, 19 } }, + { 0x1C00, { 8, 35 } }, + { 0x1B00, { 8, 13 } }, + { 0x1A00, { 8, 49 } }, + { 0x1900, { 8, 21 } }, + { 0x1800, { 8, 41 } }, + { 0x1700, { 8, 14 } }, + { 0x1600, { 8, 50 } }, + { 0x1500, { 8, 22 } }, + { 0x1400, { 8, 42 } }, + { 0x1300, { 8, 15 } }, + { 0x1200, { 8, 51 } }, + { 0x1100, { 8, 23 } }, + { 0x1000, { 8, 43 } }, + { 0x0F00, { 8, 25 } }, + { 0x0E00, { 8, 37 } }, + { 0x0D00, { 8, 26 } }, + { 0x0C00, { 8, 38 } }, + { 0x0B00, { 8, 29 } }, + { 0x0A00, { 8, 45 } }, + { 0x0900, { 8, 53 } }, + { 0x0800, { 8, 57 } }, + { 0x0700, { 8, 30 } }, + { 0x0600, { 8, 46 } }, + { 0x0500, { 8, 54 } }, + { 0x0400, { 8, 58 } }, + { 0x0380, { 9, 31 } }, + { 0x0300, { 9, 47 } }, + { 0x0280, { 9, 55 } }, + { 0x0200, { 9, 59 } }, + { 0x0180, { 9, 27 } }, + { 0x0100, { 9, 39 } }, + { 0x0080, { 9, 0 } } }; -static const DCTtab DCT_B14AC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +/* coding table as found in the spec annex B.5 table B-10 */ +static const struct vl_vlc_compressed motion_code[] = { + { 0x0320, { 11, -16 } }, + { 0x0360, { 11, -15 } }, + { 0x03a0, { 11, -14 } }, + { 0x03e0, { 11, -13 } }, + { 0x0420, { 11, -12 } }, + { 0x0460, { 11, -11 } }, + { 0x04c0, { 10, -10 } }, + { 0x0540, { 10, -9 } }, + { 0x05c0, { 10, -8 } }, + { 0x0700, { 8, -7 } }, + { 0x0900, { 8, -6 } }, + { 0x0b00, { 8, -5 } }, + { 0x0e00, { 7, -4 } }, + { 0x1800, { 5, -3 } }, + { 0x3000, { 4, -2 } }, + { 0x6000, { 3, -1 } }, + { 0x8000, { 1, 0 } }, + { 0x4000, { 3, 1 } }, + { 0x2000, { 4, 2 } }, + { 0x1000, { 5, 3 } }, + { 0x0c00, { 7, 4 } }, + { 0x0a00, { 8, 5 } }, + { 0x0800, { 8, 6 } }, + { 0x0600, { 8, 7 } }, + { 0x0580, { 10, 8 } }, + { 0x0500, { 10, 9 } }, + { 0x0480, { 10, 10 } }, + { 0x0440, { 11, 11 } }, + { 0x0400, { 11, 12 } }, + { 0x03c0, { 11, 13 } }, + { 0x0380, { 11, 14 } }, + { 0x0340, { 11, 15 } }, + { 0x0300, { 11, 16 } } }; -static const DCTtab DCT_B14DC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +/* coding table as found in the spec annex B.5 table B-11 */ +static const struct vl_vlc_compressed dmvector[] = { + { 0x0000, { 1, 0 } }, + { 0x8000, { 2, 1 } }, + { 0xc000, { 2, -1 } } }; -static const DCTtab DCT_B15_10 [] = { - { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, - { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +/* coding table as found in the spec annex B.5 table B-12 */ +static const struct vl_vlc_compressed dct_dc_size_luminance[] = { + { 0x8000, { 3, 0 } }, + { 0x0000, { 2, 1 } }, + { 0x4000, { 2, 2 } }, + { 0xA000, { 3, 3 } }, + { 0xC000, { 3, 4 } }, + { 0xE000, { 4, 5 } }, + { 0xF000, { 5, 6 } }, + { 0xF800, { 6, 7 } }, + { 0xFC00, { 7, 8 } }, + { 0xFE00, { 8, 9 } }, + { 0xFF00, { 9, 10 } }, + { 0xFF80, { 9, 11 } } }; -static const DCTtab DCT_B15_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, - { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, - { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, - { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, - { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, - { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, - { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, - { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +/* coding table as found in the spec annex B.5 table B-13 */ +static const struct vl_vlc_compressed dct_dc_size_chrominance[] = { + { 0x0000, { 2, 0 } }, + { 0x4000, { 2, 1 } }, + { 0x8000, { 2, 2 } }, + { 0xC000, { 3, 3 } }, + { 0xE000, { 4, 4 } }, + { 0xF000, { 5, 5 } }, + { 0xF800, { 6, 6 } }, + { 0xFC00, { 7, 7 } }, + { 0xFE00, { 8, 8 } }, + { 0xFF00, { 9, 9 } }, + { 0xFF80, { 10, 10 } }, + { 0xFFC0, { 10, 11 } } }; -static const MBAtab MBA_5 [] = { - {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, - {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +/* coding table as found in the spec annex B.5 table B-14 */ +static const struct dct_coeff_compressed dct_coeff_tbl_zero[] = { + { 0x8000, { 2, dct_End_of_Block, 0 } }, + { 0x8000, { 1, dct_DC, 1 } }, + { 0xC000, { 2, dct_AC, 1 } }, + { 0x6000, { 3, 1, 1 } }, + { 0x4000, { 4, 0, 2 } }, + { 0x5000, { 4, 2, 1 } }, + { 0x2800, { 5, 0, 3 } }, + { 0x3800, { 5, 3, 1 } }, + { 0x3000, { 5, 4, 1 } }, + { 0x1800, { 6, 1, 2 } }, + { 0x1C00, { 6, 5, 1 } }, + { 0x1400, { 6, 6, 1 } }, + { 0x1000, { 6, 7, 1 } }, + { 0x0C00, { 7, 0, 4 } }, + { 0x0800, { 7, 2, 2 } }, + { 0x0E00, { 7, 8, 1 } }, + { 0x0A00, { 7, 9, 1 } }, + { 0x0400, { 6, dct_Escape, 0 } }, + { 0x2600, { 8, 0, 5 } }, + { 0x2100, { 8, 0, 6 } }, + { 0x2500, { 8, 1, 3 } }, + { 0x2400, { 8, 3, 2 } }, + { 0x2700, { 8, 10, 1 } }, + { 0x2300, { 8, 11, 1 } }, + { 0x2200, { 8, 12, 1 } }, + { 0x2000, { 8, 13, 1 } }, + { 0x0280, { 10, 0, 7 } }, + { 0x0300, { 10, 1, 4 } }, + { 0x02C0, { 10, 2, 3 } }, + { 0x03C0, { 10, 4, 2 } }, + { 0x0240, { 10, 5, 2 } }, + { 0x0380, { 10, 14, 1 } }, + { 0x0340, { 10, 15, 1 } }, + { 0x0200, { 10, 16, 1 } }, + { 0x01D0, { 12, 0, 8 } }, + { 0x0180, { 12, 0, 9 } }, + { 0x0130, { 12, 0, 10 } }, + { 0x0100, { 12, 0, 11 } }, + { 0x01B0, { 12, 1, 5 } }, + { 0x0140, { 12, 2, 4 } }, + { 0x01C0, { 12, 3, 3 } }, + { 0x0120, { 12, 4, 3 } }, + { 0x01E0, { 12, 6, 2 } }, + { 0x0150, { 12, 7, 2 } }, + { 0x0110, { 12, 8, 2 } }, + { 0x01F0, { 12, 17, 1 } }, + { 0x01A0, { 12, 18, 1 } }, + { 0x0190, { 12, 19, 1 } }, + { 0x0170, { 12, 20, 1 } }, + { 0x0160, { 12, 21, 1 } }, + { 0x00D0, { 13, 0, 12 } }, + { 0x00C8, { 13, 0, 13 } }, + { 0x00C0, { 13, 0, 14 } }, + { 0x00B8, { 13, 0, 15 } }, + { 0x00B0, { 13, 1, 6 } }, + { 0x00A8, { 13, 1, 7 } }, + { 0x00A0, { 13, 2, 5 } }, + { 0x0098, { 13, 3, 4 } }, + { 0x0090, { 13, 5, 3 } }, + { 0x0088, { 13, 9, 2 } }, + { 0x0080, { 13, 10, 2 } }, + { 0x00F8, { 13, 22, 1 } }, + { 0x00F0, { 13, 23, 1 } }, + { 0x00E8, { 13, 24, 1 } }, + { 0x00E0, { 13, 25, 1 } }, + { 0x00D8, { 13, 26, 1 } }, + { 0x007C, { 14, 0, 16 } }, + { 0x0078, { 14, 0, 17 } }, + { 0x0074, { 14, 0, 18 } }, + { 0x0070, { 14, 0, 19 } }, + { 0x006C, { 14, 0, 20 } }, + { 0x0068, { 14, 0, 21 } }, + { 0x0064, { 14, 0, 22 } }, + { 0x0060, { 14, 0, 23 } }, + { 0x005C, { 14, 0, 24 } }, + { 0x0058, { 14, 0, 25 } }, + { 0x0054, { 14, 0, 26 } }, + { 0x0050, { 14, 0, 27 } }, + { 0x004C, { 14, 0, 28 } }, + { 0x0048, { 14, 0, 29 } }, + { 0x0044, { 14, 0, 30 } }, + { 0x0040, { 14, 0, 31 } }, + { 0x0030, { 15, 0, 32 } }, + { 0x002E, { 15, 0, 33 } }, + { 0x002C, { 15, 0, 34 } }, + { 0x002A, { 15, 0, 35 } }, + { 0x0028, { 15, 0, 36 } }, + { 0x0026, { 15, 0, 37 } }, + { 0x0024, { 15, 0, 38 } }, + { 0x0022, { 15, 0, 39 } }, + { 0x0020, { 15, 0, 40 } }, + { 0x003E, { 15, 1, 8 } }, + { 0x003C, { 15, 1, 9 } }, + { 0x003A, { 15, 1, 10 } }, + { 0x0038, { 15, 1, 11 } }, + { 0x0036, { 15, 1, 12 } }, + { 0x0034, { 15, 1, 13 } }, + { 0x0032, { 15, 1, 14 } }, + { 0x0013, { 16, 1, 15 } }, + { 0x0012, { 16, 1, 16 } }, + { 0x0011, { 16, 1, 17 } }, + { 0x0010, { 16, 1, 18 } }, + { 0x0014, { 16, 6, 3 } }, + { 0x001A, { 16, 11, 2 } }, + { 0x0019, { 16, 12, 2 } }, + { 0x0018, { 16, 13, 2 } }, + { 0x0017, { 16, 14, 2 } }, + { 0x0016, { 16, 15, 2 } }, + { 0x0015, { 16, 16, 2 } }, + { 0x001F, { 16, 27, 1 } }, + { 0x001E, { 16, 28, 1 } }, + { 0x001D, { 16, 29, 1 } }, + { 0x001C, { 16, 30, 1 } }, + { 0x001B, { 16, 31, 1 } } }; -static const MBAtab MBA_11 [] = { - {32, 11}, {31, 11}, {30, 11}, {29, 11}, - {28, 11}, {27, 11}, {26, 11}, {25, 11}, - {24, 11}, {23, 11}, {22, 11}, {21, 11}, - {20, 10}, {20, 10}, {19, 10}, {19, 10}, - {18, 10}, {18, 10}, {17, 10}, {17, 10}, - {16, 10}, {16, 10}, {15, 10}, {15, 10}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +/* coding table as found in the spec annex B.5 table B-15 */ +static const struct dct_coeff_compressed dct_coeff_tbl_one[] = { + { 0x6000, { 4, dct_End_of_Block, 0 } }, + { 0x8000, { 2, 0, 1 } }, + { 0x4000, { 3, 1, 1 } }, + { 0xC000, { 3, 0, 2 } }, + { 0x2800, { 5, 2, 1 } }, + { 0x7000, { 4, 0, 3 } }, + { 0x3800, { 5, 3, 1 } }, + { 0x1800, { 6, 4, 1 } }, + { 0x3000, { 5, 1, 2 } }, + { 0x1C00, { 6, 5, 1 } }, + { 0x0C00, { 7, 6, 1 } }, + { 0x0800, { 7, 7, 1 } }, + { 0xE000, { 5, 0, 4 } }, + { 0x0E00, { 7, 2, 2 } }, + { 0x0A00, { 7, 8, 1 } }, + { 0xF000, { 7, 9, 1 } }, + { 0x0400, { 6, dct_Escape, 0 } }, + { 0xE800, { 5, 0, 5 } }, + { 0x1400, { 6, 0, 6 } }, + { 0xF200, { 7, 1, 3 } }, + { 0x2600, { 8, 3, 2 } }, + { 0xF400, { 7, 10, 1 } }, + { 0x2100, { 8, 11, 1 } }, + { 0x2500, { 8, 12, 1 } }, + { 0x2400, { 8, 13, 1 } }, + { 0x1000, { 6, 0, 7 } }, + { 0x2700, { 8, 1, 4 } }, + { 0xFC00, { 8, 2, 3 } }, + { 0xFD00, { 8, 4, 2 } }, + { 0x0200, { 9, 5, 2 } }, + { 0x0280, { 9, 14, 1 } }, + { 0x0380, { 9, 15, 1 } }, + { 0x0340, { 10, 16, 1 } }, + { 0xF600, { 7, 0, 8 } }, + { 0xF800, { 7, 0, 9 } }, + { 0x2300, { 8, 0, 10 } }, + { 0x2200, { 8, 0, 11 } }, + { 0x2000, { 8, 1, 5 } }, + { 0x0300, { 10, 2, 4 } }, + { 0x01C0, { 12, 3, 3 } }, + { 0x0120, { 12, 4, 3 } }, + { 0x01E0, { 12, 6, 2 } }, + { 0x0150, { 12, 7, 2 } }, + { 0x0110, { 12, 8, 2 } }, + { 0x01F0, { 12, 17, 1 } }, + { 0x01A0, { 12, 18, 1 } }, + { 0x0190, { 12, 19, 1 } }, + { 0x0170, { 12, 20, 1 } }, + { 0x0160, { 12, 21, 1 } }, + { 0xFA00, { 8, 0, 12 } }, + { 0xFB00, { 8, 0, 13 } }, + { 0xFE00, { 8, 0, 14 } }, + { 0xFF00, { 8, 0, 15 } }, + { 0x00B0, { 13, 1, 6 } }, + { 0x00A8, { 13, 1, 7 } }, + { 0x00A0, { 13, 2, 5 } }, + { 0x0098, { 13, 3, 4 } }, + { 0x0090, { 13, 5, 3 } }, + { 0x0088, { 13, 9, 2 } }, + { 0x0080, { 13, 10, 2 } }, + { 0x00F8, { 13, 22, 1 } }, + { 0x00F0, { 13, 23, 1 } }, + { 0x00E8, { 13, 24, 1 } }, + { 0x00E0, { 13, 25, 1 } }, + { 0x00D8, { 13, 26, 1 } }, + { 0x007C, { 14, 0, 16 } }, + { 0x0078, { 14, 0, 17 } }, + { 0x0074, { 14, 0, 18 } }, + { 0x0070, { 14, 0, 19 } }, + { 0x006C, { 14, 0, 20 } }, + { 0x0068, { 14, 0, 21 } }, + { 0x0064, { 14, 0, 22 } }, + { 0x0060, { 14, 0, 23 } }, + { 0x005C, { 14, 0, 24 } }, + { 0x0058, { 14, 0, 25 } }, + { 0x0054, { 14, 0, 26 } }, + { 0x0050, { 14, 0, 27 } }, + { 0x004C, { 14, 0, 28 } }, + { 0x0048, { 14, 0, 29 } }, + { 0x0044, { 14, 0, 30 } }, + { 0x0040, { 14, 0, 31 } }, + { 0x0030, { 15, 0, 32 } }, + { 0x002E, { 15, 0, 33 } }, + { 0x002C, { 15, 0, 34 } }, + { 0x002A, { 15, 0, 35 } }, + { 0x0028, { 15, 0, 36 } }, + { 0x0026, { 15, 0, 37 } }, + { 0x0024, { 15, 0, 38 } }, + { 0x0022, { 15, 0, 39 } }, + { 0x0020, { 15, 0, 40 } }, + { 0x003E, { 15, 1, 8 } }, + { 0x003C, { 15, 1, 9 } }, + { 0x003A, { 15, 1, 10 } }, + { 0x0038, { 15, 1, 11 } }, + { 0x0036, { 15, 1, 12 } }, + { 0x0034, { 15, 1, 13 } }, + { 0x0032, { 15, 1, 14 } }, + { 0x0013, { 16, 1, 15 } }, + { 0x0012, { 16, 1, 16 } }, + { 0x0011, { 16, 1, 17 } }, + { 0x0010, { 16, 1, 18 } }, + { 0x0014, { 16, 6, 3 } }, + { 0x001A, { 16, 11, 2 } }, + { 0x0019, { 16, 12, 2 } }, + { 0x0018, { 16, 13, 2 } }, + { 0x0017, { 16, 14, 2 } }, + { 0x0016, { 16, 15, 2 } }, + { 0x0015, { 16, 16, 2 } }, + { 0x001F, { 16, 27, 1 } }, + { 0x001E, { 16, 28, 1 } }, + { 0x001D, { 16, 29, 1 } }, + { 0x001C, { 16, 30, 1 } }, + { 0x001B, { 16, 31, 1 } } }; -static const int non_linear_quantizer_scale[] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 10, 12, 14, 16, 18, 20, 22, - 24, 28, 32, 36, 40, 44, 48, 52, - 56, 64, 72, 80, 88, 96, 104, 112 +/* q_scale_type */ +static const unsigned quant_scale[2][32] = { + { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, + 28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 96, 104, 112 } }; -static INLINE int -get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture) -{ - int macroblock_modes; - const MBtab * tab; - - switch (picture->picture_coding_type) { - case I_TYPE: - - tab = MB_I + vl_vlc_ubits(&bs->vlc, 1); - vl_vlc_dumpbits(&bs->vlc, tab->len); - macroblock_modes = tab->modes; - - return macroblock_modes; - - case P_TYPE: - - tab = MB_P + vl_vlc_ubits(&bs->vlc, 5); - vl_vlc_dumpbits(&bs->vlc, tab->len); - macroblock_modes = tab->modes; - - if (picture->picture_structure != FRAME_PICTURE) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; - vl_vlc_dumpbits(&bs->vlc, 2); - } - return macroblock_modes; - } else if (picture->frame_pred_frame_dct) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - macroblock_modes |= MC_FRAME; - return macroblock_modes; - } else { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; - vl_vlc_dumpbits(&bs->vlc, 2); - } - return macroblock_modes; - } - - case B_TYPE: - - tab = MB_B + vl_vlc_ubits(&bs->vlc, 6); - vl_vlc_dumpbits(&bs->vlc, tab->len); - macroblock_modes = tab->modes; - - if (picture->picture_structure != FRAME_PICTURE) { - if (! (macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; - vl_vlc_dumpbits(&bs->vlc, 2); - } - } else if (picture->frame_pred_frame_dct) { - macroblock_modes |= MC_FRAME; - } else if (!(macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; - vl_vlc_dumpbits(&bs->vlc, 2); - } - return macroblock_modes; - - case D_TYPE: +static struct vl_vlc_entry tbl_B1[1 << 11]; +static struct vl_vlc_entry tbl_B2[1 << 2]; +static struct vl_vlc_entry tbl_B3[1 << 6]; +static struct vl_vlc_entry tbl_B4[1 << 6]; +static struct vl_vlc_entry tbl_B9[1 << 9]; +static struct vl_vlc_entry tbl_B10[1 << 11]; +static struct vl_vlc_entry tbl_B11[1 << 2]; +static struct vl_vlc_entry tbl_B12[1 << 10]; +static struct vl_vlc_entry tbl_B13[1 << 10]; +static struct dct_coeff tbl_B14_DC[1 << 17]; +static struct dct_coeff tbl_B14_AC[1 << 17]; +static struct dct_coeff tbl_B15[1 << 17]; - vl_vlc_dumpbits(&bs->vlc, 1); - return MACROBLOCK_INTRA; - - default: - return 0; - } -} - -static INLINE enum pipe_mpeg12_dct_type -get_dct_type(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int macroblock_modes) +static INLINE void +init_dct_coeff_table(struct dct_coeff *dst, const struct dct_coeff_compressed *src, + unsigned size, bool is_DC) { - enum pipe_mpeg12_dct_type dct_type = PIPE_MPEG12_DCT_TYPE_FRAME; - - if ((picture->picture_structure == FRAME_PICTURE) && - (!picture->frame_pred_frame_dct) && - (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))) { + unsigned i; - dct_type = vl_vlc_ubits(&bs->vlc, 1) ? PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME; - vl_vlc_dumpbits(&bs->vlc, 1); + for (i=0;i<(1<<17);++i) { + dst[i].length = 0; + dst[i].level = 0; + dst[i].run = dct_End_of_Block; } - return dct_type; -} - -static INLINE int -get_quantizer_scale(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture) -{ - int quantizer_scale_code; - quantizer_scale_code = vl_vlc_ubits(&bs->vlc, 5); - vl_vlc_dumpbits(&bs->vlc, 5); + for(; size > 0; --size, ++src) { + struct dct_coeff coeff = src->coeff; + bool has_sign = true; - if (picture->q_scale_type) - return non_linear_quantizer_scale[quantizer_scale_code]; - else - return quantizer_scale_code << 1; -} - -static INLINE int -get_motion_delta(struct vl_mpg12_bs *bs, unsigned f_code) -{ - int delta; - int sign; - const MVtab * tab; + switch (coeff.run) { + case dct_End_of_Block: + if (is_DC) + continue; - if (bs->vlc.buf & 0x80000000) { - vl_vlc_dumpbits(&bs->vlc, 1); - return 0; - } else if (bs->vlc.buf >= 0x0c000000) { + has_sign = false; + break; - tab = MV_4 + vl_vlc_ubits(&bs->vlc, 4); - delta = (tab->delta << f_code) + 1; - bs->vlc.bits += tab->len + f_code + 1; - bs->vlc.buf <<= tab->len; + case dct_Escape: + has_sign = false; + break; - sign = vl_vlc_sbits(&bs->vlc, 1); - bs->vlc.buf <<= 1; + case dct_DC: + if (!is_DC) + continue; - if (f_code) - delta += vl_vlc_ubits(&bs->vlc, f_code); - bs->vlc.buf <<= f_code; + coeff.length += 1; + coeff.run = 1; + break; - return (delta ^ sign) - sign; + case dct_AC: + if (is_DC) + continue; - } else { + coeff.length += 1; + coeff.run = 1; + break; - tab = MV_10 + vl_vlc_ubits(&bs->vlc, 10); - delta = (tab->delta << f_code) + 1; - bs->vlc.bits += tab->len + 1; - bs->vlc.buf <<= tab->len; + default: + coeff.length += 1; + coeff.run += 1; + break; + } - sign = vl_vlc_sbits(&bs->vlc, 1); - bs->vlc.buf <<= 1; + for(i=0; i<(1 << (17 - coeff.length)); ++i) + dst[src->bitcode << 1 | i] = coeff; - if (f_code) { - vl_vlc_needbits(&bs->vlc); - delta += vl_vlc_ubits(&bs->vlc, f_code); - vl_vlc_dumpbits(&bs->vlc, f_code); + if (has_sign) { + coeff.level = -coeff.level; + for(; i<(1 << (18 - coeff.length)); ++i) + dst[src->bitcode << 1 | i] = coeff; } - - return (delta ^ sign) - sign; } } -static INLINE int -bound_motion_vector(int vec, unsigned f_code) +static INLINE void +init_tables() { -#if 1 - unsigned int limit; - int sign; - - limit = 16 << f_code; - - if ((unsigned int)(vec + limit) < 2 * limit) - return vec; - else { - sign = ((int32_t)vec) >> 31; - return vec - ((2 * limit) ^ sign) + sign; - } -#else - return ((int32_t)vec << (28 - f_code)) >> (28 - f_code); -#endif + vl_vlc_init_table(tbl_B1, Elements(tbl_B1), macroblock_address_increment, Elements(macroblock_address_increment)); + vl_vlc_init_table(tbl_B2, Elements(tbl_B2), macroblock_type_i, Elements(macroblock_type_i)); + vl_vlc_init_table(tbl_B3, Elements(tbl_B3), macroblock_type_p, Elements(macroblock_type_p)); + vl_vlc_init_table(tbl_B4, Elements(tbl_B4), macroblock_type_b, Elements(macroblock_type_b)); + vl_vlc_init_table(tbl_B9, Elements(tbl_B9), coded_block_pattern, Elements(coded_block_pattern)); + vl_vlc_init_table(tbl_B10, Elements(tbl_B10), motion_code, Elements(motion_code)); + vl_vlc_init_table(tbl_B11, Elements(tbl_B11), dmvector, Elements(dmvector)); + vl_vlc_init_table(tbl_B12, Elements(tbl_B12), dct_dc_size_luminance, Elements(dct_dc_size_luminance)); + vl_vlc_init_table(tbl_B13, Elements(tbl_B13), dct_dc_size_chrominance, Elements(dct_dc_size_chrominance)); + init_dct_coeff_table(tbl_B14_DC, dct_coeff_tbl_zero, Elements(dct_coeff_tbl_zero), true); + init_dct_coeff_table(tbl_B14_AC, dct_coeff_tbl_zero, Elements(dct_coeff_tbl_zero), false); + init_dct_coeff_table(tbl_B15, dct_coeff_tbl_one, Elements(dct_coeff_tbl_one), false); } static INLINE int -get_dmv(struct vl_mpg12_bs *bs) +DIV2DOWN(int todiv) { - const DMVtab * tab; - - tab = DMV_2 + vl_vlc_ubits(&bs->vlc, 2); - vl_vlc_dumpbits(&bs->vlc, tab->len); - return tab->dmv; + return (todiv&~1)/2; } static INLINE int -get_coded_block_pattern(struct vl_mpg12_bs *bs) +DIV2UP(int todiv) { - const CBPtab * tab; - - vl_vlc_needbits(&bs->vlc); - - if (bs->vlc.buf >= 0x20000000) { - - tab = CBP_7 + (vl_vlc_ubits(&bs->vlc, 7) - 16); - vl_vlc_dumpbits(&bs->vlc, tab->len); - return tab->cbp; - - } else { - - tab = CBP_9 + vl_vlc_ubits(&bs->vlc, 9); - vl_vlc_dumpbits(&bs->vlc, tab->len); - return tab->cbp; - } + return (todiv+1)/2; } -static INLINE int -get_luma_dc_dct_diff(struct vl_mpg12_bs *bs) +static INLINE void +motion_vector(struct vl_mpg12_bs *bs, int r, int s, int dmv, short delta[2], short dmvector[2]) { - const DCtab * tab; - int size; - int dc_diff; - - if (bs->vlc.buf < 0xf8000000) { - tab = DC_lum_5 + vl_vlc_ubits(&bs->vlc, 5); - size = tab->size; - if (size) { - bs->vlc.bits += tab->len + size; - bs->vlc.buf <<= tab->len; - dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); - bs->vlc.buf <<= size; - return dc_diff; - } else { - vl_vlc_dumpbits(&bs->vlc, 3); - return 0; - } - } else { - tab = DC_long + (vl_vlc_ubits(&bs->vlc, 9) - 0x1e0); - size = tab->size; - vl_vlc_dumpbits(&bs->vlc, tab->len); - vl_vlc_needbits(&bs->vlc); - dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); - vl_vlc_dumpbits(&bs->vlc, size); - return dc_diff; + int t; + for (t = 0; t < 2; ++t) { + int motion_code; + int r_size = bs->desc.f_code[s][t]; + + vl_vlc_fillbits(&bs->vlc); + motion_code = vl_vlc_get_vlclbf(&bs->vlc, tbl_B10, 11); + + assert(r_size >= 0); + if (r_size && motion_code) { + int residual = vl_vlc_get_uimsbf(&bs->vlc, r_size) + 1; + delta[t] = ((abs(motion_code) - 1) << r_size) + residual; + if (motion_code < 0) + delta[t] = -delta[t]; + } else + delta[t] = motion_code; + if (dmv) + dmvector[t] = vl_vlc_get_vlclbf(&bs->vlc, tbl_B11, 2); } } static INLINE int -get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs) +wrap(short f, int shift) { - const DCtab * tab; - int size; - int dc_diff; - - if (bs->vlc.buf < 0xf8000000) { - tab = DC_chrom_5 + vl_vlc_ubits(&bs->vlc, 5); - size = tab->size; - if (size) { - bs->vlc.bits += tab->len + size; - bs->vlc.buf <<= tab->len; - dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); - bs->vlc.buf <<= size; - return dc_diff; - } else { - vl_vlc_dumpbits(&bs->vlc, 2); - return 0; - } - } else { - tab = DC_long + (vl_vlc_ubits(&bs->vlc, 10) - 0x3e0); - size = tab->size; - vl_vlc_dumpbits(&bs->vlc, tab->len + 1); - vl_vlc_needbits(&bs->vlc); - dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); - vl_vlc_dumpbits(&bs->vlc, size); - return dc_diff; - } + if (f < (-16 << shift)) + return f + (32 << shift); + else if (f >= 16 << shift) + return f - (32 << shift); + else + return f; } static INLINE void -get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +motion_vector_frame(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb) { - int i, val; - const DCTtab *tab; - - i = 0; - - vl_vlc_needbits(&bs->vlc); - - while (1) { - if (bs->vlc.buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - - i += tab->run; - if (i >= 64) - break; /* end of block */ + int dmv = mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME; + short dmvector[2], delta[2]; - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = tab->level * quantizer_scale; + if (mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_FIELD) { + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s; + motion_vector(bs, 0, s, dmv, delta, dmvector); + mb->PMV[0][s][0] = wrap(mb->PMV[0][s][0] + delta[0], bs->desc.f_code[s][0]); + mb->PMV[0][s][1] = wrap(DIV2DOWN(mb->PMV[0][s][1]) + delta[1], bs->desc.f_code[s][1]) * 2; - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << (s + 2); + motion_vector(bs, 1, s, dmv, delta, dmvector); + mb->PMV[1][s][0] = wrap(mb->PMV[1][s][0] + delta[0], bs->desc.f_code[s][0]); + mb->PMV[1][s][1] = wrap(DIV2DOWN(mb->PMV[1][s][1]) + delta[1], bs->desc.f_code[s][1]) * 2; - dest[i] = val; - - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ + } else { + motion_vector(bs, 0, s, dmv, delta, dmvector); + mb->PMV[0][s][0] = wrap(mb->PMV[0][s][0] + delta[0], bs->desc.f_code[s][0]); + mb->PMV[0][s][1] = wrap(mb->PMV[0][s][1] + delta[1], bs->desc.f_code[s][1]); } - - vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ } static INLINE void -get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +motion_vector_field(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb) { - int i, val; - const DCTtab * tab; - - i = 0; - - vl_vlc_needbits(&bs->vlc); - - while (1) { - if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B15_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) { - - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = tab->level * quantizer_scale; - - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); + int dmv = mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME; + short dmvector[2], delta[2]; - dest[i] = val; + if (mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_16x8) { + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s; + motion_vector(bs, 0, s, dmv, delta, dmvector); - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } else { - - /* end of block. I commented out this code because if we */ - /* dont exit here we will still exit at the later test :) */ - - /* if (i >= 128) break; */ /* end of block */ - - /* escape code */ - - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check against buffer overflow */ - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - - continue; - - } - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B15_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << (s + 2); + motion_vector(bs, 1, s, dmv, delta, dmvector); + } else { + if (!dmv) + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s; + motion_vector(bs, 0, s, dmv, delta, dmvector); } - - vl_vlc_dumpbits(&bs->vlc, 4); /* dump end of block code */ } static INLINE void -get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) -{ - int i, val; - const DCTtab *tab; - - i = -1; - - vl_vlc_needbits(&bs->vlc); - if (bs->vlc.buf >= 0x28000000) { - tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bs->vlc.buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale) >> 1; - - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - - dest[i] = val; - - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } - - entry_2: - if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1; - val = (val * quantizer_scale) / 2; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ +reset_predictor(struct vl_mpg12_bs *bs) { + bs->pred_dc[0] = bs->pred_dc[1] = bs->pred_dc[2] = 0; } static INLINE void -get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +decode_dct(struct vl_mpg12_bs *bs, struct pipe_mpeg12_macroblock *mb, int scale) { - int i, val; - const DCTtab * tab; - - i = 0; + static const unsigned blk2cc[] = { 0, 0, 0, 0, 1, 2 }; + static const struct vl_vlc_entry *blk2dcsize[] = { + tbl_B12, tbl_B12, tbl_B12, tbl_B12, tbl_B13, tbl_B13 + }; - vl_vlc_needbits(&bs->vlc); + bool intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA; + const struct dct_coeff *table = intra ? bs->intra_dct_tbl : tbl_B14_AC; + const struct dct_coeff *entry; + int i, cbp, blk = 0; + short *dst = mb->blocks; - while (1) { - if (bs->vlc.buf >= 0x28000000) { + vl_vlc_fillbits(&bs->vlc); + mb->coded_block_pattern = cbp = intra ? 0x3F : vl_vlc_get_vlclbf(&bs->vlc, tbl_B9, 9); - tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); + goto entry; - i += tab->run; - if (i >= 64) - break; /* end of block */ + while(1) { + vl_vlc_eatbits(&bs->vlc, entry->length); + if (entry->run == dct_End_of_Block) { - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = tab->level * quantizer_scale; + dst += 64; + cbp <<= 1; + cbp &= 0x3F; + blk++; - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - - dest[i] = val; - - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ +entry: + if (!cbp) + break; - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = vl_vlc_sbits(&bs->vlc, 8); - if (! (val & 0x7f)) { - vl_vlc_dumpbits(&bs->vlc, 8); - val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; + while(!(cbp & 0x20)) { + cbp <<= 1; + blk++; } - val = val * quantizer_scale; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 8); - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ -} - -static INLINE void -get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) -{ - int i, val; - const DCTtab * tab; - - i = -1; - - vl_vlc_needbits(&bs->vlc); - if (bs->vlc.buf >= 0x28000000) { - tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bs->vlc.buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale) >> 1; + vl_vlc_fillbits(&bs->vlc); - /* oddification */ - val = (val - 1) | 1; + if (intra) { + unsigned cc = blk2cc[blk]; + unsigned size = vl_vlc_get_vlclbf(&bs->vlc, blk2dcsize[blk], 10); - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - - dest[i] = val; - - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } - - entry_2: - if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ + if (size) { + int dct_diff = vl_vlc_get_uimsbf(&bs->vlc, size); + int half_range = 1 << (size - 1); + if (dct_diff < half_range) + dct_diff = (dct_diff + 1) - (2 * half_range); + bs->pred_dc[cc] += dct_diff; + } - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ + dst[0] = bs->pred_dc[cc]; + i = 0; - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = vl_vlc_sbits(&bs->vlc, 8); - if (! (val & 0x7f)) { - vl_vlc_dumpbits(&bs->vlc, 8); - val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; + } else { + entry = tbl_B14_DC + vl_vlc_peekbits(&bs->vlc, 17); + i = -1; + continue; } - val = 2 * (val + SBITS (val, 1)) + 1; - val = (val * quantizer_scale) / 2; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 8); - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ -} - -static INLINE void -slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, - unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3]) -{ - short dest[64]; - - bs->ycbcr_stream[cc]->x = x; - bs->ycbcr_stream[cc]->y = y; - bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_INTRA; - bs->ycbcr_stream[cc]->coding = coding; - - vl_vlc_needbits(&bs->vlc); - - /* Get the intra DC coefficient and inverse quantize it */ - if (cc == 0) - dc_dct_pred[0] += get_luma_dc_dct_diff(bs); - else - dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs); - - memset(dest, 0, sizeof(int16_t) * 64); - dest[0] = dc_dct_pred[cc]; - if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) { - if (picture->picture_coding_type != D_TYPE) - get_mpeg1_intra_block(bs, quantizer_scale, dest); - } else if (picture->intra_vlc_format) - get_intra_block_B15(bs, quantizer_scale, dest); - else - get_intra_block_B14(bs, quantizer_scale, dest); - - memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); - - bs->num_ycbcr_blocks[cc]++; - bs->ycbcr_stream[cc]++; - bs->ycbcr_buffer[cc] += 64; -} - -static INLINE void -slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, - unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale) -{ - short dest[64]; - - bs->ycbcr_stream[cc]->x = x; - bs->ycbcr_stream[cc]->y = y; - bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_DELTA; - bs->ycbcr_stream[cc]->coding = coding; - - memset(dest, 0, sizeof(int16_t) * 64); - if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) - get_mpeg1_non_intra_block(bs, quantizer_scale, dest); - else - get_non_intra_block(bs, quantizer_scale, dest); - - memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); - - bs->num_ycbcr_blocks[cc]++; - bs->ycbcr_stream[cc]++; - bs->ycbcr_buffer[cc] += 64; -} - -static INLINE void -motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - motion_x = (mv->top.x + (get_motion_delta(bs, f_code[0]) << f_code[1])); - motion_x = bound_motion_vector (motion_x, f_code[0] + f_code[1]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = (mv->top.y + (get_motion_delta(bs, f_code[0]) << f_code[1])); - motion_y = bound_motion_vector (motion_y, f_code[0] + f_code[1]); - mv->top.y = mv->bottom.y = motion_y; -} - -static INLINE void -motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector(motion_x, f_code[0]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector(motion_y, f_code[1]); - mv->top.y = mv->bottom.y = motion_y; -} - -static INLINE void -motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - vl_vlc_needbits(&bs->vlc); - mv->top.field_select = vl_vlc_ubits(&bs->vlc, 1) ? - PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->top.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]); - /* motion_y = bound_motion_vector (motion_y, f_code[1]); */ - mv->top.y = motion_y << 1; - - vl_vlc_needbits(&bs->vlc); - mv->bottom.field_select = vl_vlc_ubits(&bs->vlc, 1) ? - PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = (mv->bottom.y >> 1) + get_motion_delta(bs, f_code[1]); - /* motion_y = bound_motion_vector (motion_y, f_code[1]); */ - mv->bottom.y = motion_y << 1; -} - -static INLINE void -motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - // TODO Implement dmv - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector(motion_x, f_code[0]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]); - /* motion_y = bound_motion_vector (motion_y, f_code[1]); */ - mv->top.y = mv->bottom.y = motion_y << 1; -} - -/* like motion_frame, but parsing without actual motion compensation */ -static INLINE void -motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int tmp; - - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - tmp = (mv->top.x + get_motion_delta(bs, f_code[0])); - tmp = bound_motion_vector (tmp, f_code[0]); - mv->top.x = mv->bottom.x = tmp; - - vl_vlc_needbits(&bs->vlc); - tmp = (mv->top.y + get_motion_delta(bs, f_code[1])); - tmp = bound_motion_vector (tmp, f_code[1]); - mv->top.y = mv->bottom.y = tmp; - - vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */ -} - -static INLINE void -motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - vl_vlc_needbits(&bs->vlc); - - // ref_field - //vl_vlc_ubits(&bs->vlc, 1); - - // TODO field select may need to do something here for bob (weave ok) - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector (motion_y, f_code[1]); - mv->top.y = mv->bottom.y = motion_y; -} - -static INLINE void -motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - vl_vlc_needbits(&bs->vlc); - - // ref_field - //vl_vlc_ubits(&bs->vlc, 1); - - // TODO field select may need to do something here bob (weave ok) - mv->top.field_select = PIPE_VIDEO_FRAME; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->top.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector (motion_y, f_code[1]); - mv->top.y = motion_y; - - vl_vlc_needbits(&bs->vlc); - // ref_field - //vl_vlc_ubits(&bs->vlc, 1); - - // TODO field select may need to do something here for bob (weave ok) - mv->bottom.field_select = PIPE_VIDEO_FRAME; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->bottom.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector (motion_y, f_code[1]); - mv->bottom.y = motion_y; -} - -static INLINE void -motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - // TODO field select may need to do something here for bob (weave ok) - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector (motion_y, f_code[1]); - mv->top.y = mv->bottom.y = motion_y; -} - - -static INLINE void -motion_fi_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int tmp; - vl_vlc_needbits(&bs->vlc); - vl_vlc_dumpbits(&bs->vlc, 1); /* remove field_select */ - - tmp = (mv->top.x + get_motion_delta(bs, f_code[0])); - tmp = bound_motion_vector(tmp, f_code[0]); - mv->top.x = mv->bottom.x = tmp; - - vl_vlc_needbits(&bs->vlc); - tmp = (mv->top.y + get_motion_delta(bs, f_code[1])); - tmp = bound_motion_vector(tmp, f_code[1]); - mv->top.y = mv->bottom.y = tmp; - - vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */ -} - -#define MOTION_CALL(routine, macroblock_modes) \ -do { \ - if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD) \ - routine(bs, picture->f_code[0], &mv_fwd); \ - if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD) \ - routine(bs, picture->f_code[1], &mv_bwd); \ -} while (0) - -static INLINE void -store_motionvectors(struct vl_mpg12_bs *bs, unsigned *mv_pos, - struct pipe_motionvector *mv_fwd, - struct pipe_motionvector *mv_bwd) -{ - bs->mv_stream[0][*mv_pos].top = mv_fwd->top; - bs->mv_stream[0][*mv_pos].bottom = - mv_fwd->top.field_select == PIPE_VIDEO_FRAME ? - mv_fwd->top : mv_fwd->bottom; - - bs->mv_stream[1][*mv_pos].top = mv_bwd->top; - bs->mv_stream[1][*mv_pos].bottom = - mv_bwd->top.field_select == PIPE_VIDEO_FRAME ? - mv_bwd->top : mv_bwd->bottom; - - (*mv_pos)++; -} - -static INLINE bool -slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, - int *quantizer_scale, unsigned *x, unsigned *y, unsigned *mv_pos) -{ - const MBAtab * mba; - - vl_vlc_need32bits(&bs->vlc); - while(bs->vlc.buf < 0x101 || bs->vlc.buf > 0x1AF) { - if(!vl_vlc_getbyte(&bs->vlc)) - return false; - } - *y = (bs->vlc.buf & 0xFF) - 1; - vl_vlc_restart(&bs->vlc); + } else if (entry->run == dct_Escape) { + i += vl_vlc_get_uimsbf(&bs->vlc, 6) + 1; + if (i > 64) + break; - *quantizer_scale = get_quantizer_scale(bs, picture); + dst[i] = vl_vlc_get_simsbf(&bs->vlc, 12) * scale; - /* ignore intra_slice and all the extra data */ - while (bs->vlc.buf & 0x80000000) { - vl_vlc_dumpbits(&bs->vlc, 9); - vl_vlc_needbits(&bs->vlc); - } + } else { + i += entry->run; + if (i > 64) + break; - /* decode initial macroblock address increment */ - *x = 0; - while (1) { - if (bs->vlc.buf >= 0x08000000) { - mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 6) - 2); - break; - } else if (bs->vlc.buf >= 0x01800000) { - mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 12) - 24); - break; - } else switch (vl_vlc_ubits(&bs->vlc, 12)) { - case 8: /* macroblock_escape */ - *x += 33; - vl_vlc_dumpbits(&bs->vlc, 11); - vl_vlc_needbits(&bs->vlc); - continue; - case 15: /* macroblock_stuffing (MPEG1 only) */ - bs->vlc.buf &= 0xfffff; - vl_vlc_dumpbits(&bs->vlc, 11); - vl_vlc_needbits(&bs->vlc); - continue; - default: /* error */ - return false; + dst[i] = entry->level * scale; } - } - vl_vlc_dumpbits(&bs->vlc, mba->len + 1); - *x += mba->mba; - while (*x >= bs->width) { - *x -= bs->width; - (*y)++; + vl_vlc_fillbits(&bs->vlc); + entry = table + vl_vlc_peekbits(&bs->vlc, 17); } - if (*y > bs->height) - return false; - - *mv_pos = *x + *y * bs->width; - - return true; } static INLINE bool -decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture) +decode_slice(struct vl_mpg12_bs *bs) { - enum pipe_video_field_select default_field_select; - struct pipe_motionvector mv_fwd, mv_bwd; - enum pipe_mpeg12_dct_type dct_type; - - /* predictor for DC coefficients in intra blocks */ - int dc_dct_pred[3] = { 0, 0, 0 }; - int quantizer_scale; + struct pipe_mpeg12_macroblock mb; + short dct_blocks[64*6]; + unsigned dct_scale; + signed x = -1; - unsigned x, y, mv_pos; + memset(&mb, 0, sizeof(mb)); + mb.base.codec = PIPE_VIDEO_CODEC_MPEG12; + mb.y = vl_vlc_get_uimsbf(&bs->vlc, 8) - 1; + mb.blocks = dct_blocks; - switch(picture->picture_structure) { - case TOP_FIELD: - default_field_select = PIPE_VIDEO_TOP_FIELD; - break; + reset_predictor(bs); + dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)]; - case BOTTOM_FIELD: - default_field_select = PIPE_VIDEO_BOTTOM_FIELD; - break; + if (vl_vlc_get_uimsbf(&bs->vlc, 1)) + while (vl_vlc_get_uimsbf(&bs->vlc, 9) & 1) + vl_vlc_fillbits(&bs->vlc); - default: - default_field_select = PIPE_VIDEO_FRAME; - break; - } - - if (!slice_init(bs, picture, &quantizer_scale, &x, &y, &mv_pos)) - return false; + do { + int inc = 0; - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select; + vl_vlc_fillbits(&bs->vlc); - mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; - mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select; + while (vl_vlc_peekbits(&bs->vlc, 11) == 15) { + vl_vlc_eatbits(&bs->vlc, 11); + vl_vlc_fillbits(&bs->vlc); + } - while (1) { - int macroblock_modes; - int mba_inc; - const MBAtab * mba; + while (vl_vlc_peekbits(&bs->vlc, 11) == 8) { + vl_vlc_eatbits(&bs->vlc, 11); + vl_vlc_fillbits(&bs->vlc); + inc += 33; + } + inc += vl_vlc_get_vlclbf(&bs->vlc, tbl_B1, 11); + if (x != -1) { + mb.num_skipped_macroblocks = inc - 1; + bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1); + } + mb.x = x += inc; - vl_vlc_needbits(&bs->vlc); + switch (bs->desc.picture_coding_type) { + case PIPE_MPEG12_PICTURE_CODING_TYPE_I: + mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B2, 2); + break; - macroblock_modes = get_macroblock_modes(bs, picture); - dct_type = get_dct_type(bs, picture, macroblock_modes); + case PIPE_MPEG12_PICTURE_CODING_TYPE_P: + mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B3, 6); + break; - switch(macroblock_modes & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD)) { - case (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD): - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF; - mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF; + case PIPE_MPEG12_PICTURE_CODING_TYPE_B: + mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B4, 6); break; default: - mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select; - mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select; + mb.macroblock_type = 0; + /* dumb gcc */ + assert(0); + } - /* fall through */ - case MACROBLOCK_MOTION_FORWARD: - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - break; + mb.macroblock_modes.value = 0; + if (mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) { + if (bs->desc.frame_pred_frame_dct == 0) + mb.macroblock_modes.bits.frame_motion_type = vl_vlc_get_uimsbf(&bs->vlc, 2); + else + mb.macroblock_modes.bits.frame_motion_type = 2; + } else + mb.macroblock_modes.bits.field_motion_type = vl_vlc_get_uimsbf(&bs->vlc, 2); - case MACROBLOCK_MOTION_BACKWARD: - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - break; + } else if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA) && bs->desc.concealment_motion_vectors) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) + mb.macroblock_modes.bits.frame_motion_type = 2; + else + mb.macroblock_modes.bits.field_motion_type = 1; } - /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ - if (macroblock_modes & MACROBLOCK_QUANT) - quantizer_scale = get_quantizer_scale(bs, picture); - - if (macroblock_modes & MACROBLOCK_INTRA) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME && + bs->desc.frame_pred_frame_dct == 0 && + mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_INTRA | PIPE_MPEG12_MB_TYPE_PATTERN)) + mb.macroblock_modes.bits.dct_type = vl_vlc_get_uimsbf(&bs->vlc, 1); - if (picture->concealment_motion_vectors) { - if (picture->picture_structure == FRAME_PICTURE) - motion_fr_conceal(bs, picture->f_code[0], &mv_fwd); - else - motion_fi_conceal(bs, picture->f_code[0], &mv_fwd); + if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_QUANT) + dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)]; - } else { - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; - } - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - - // unravaled loop of 6 block(i) calls in macroblock() - slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); - - if (picture->picture_coding_type == D_TYPE) { - vl_vlc_needbits(&bs->vlc); - vl_vlc_dumpbits(&bs->vlc, 1); - } + if (inc > 1 && bs->desc.picture_coding_type == PIPE_MPEG12_PICTURE_CODING_TYPE_P) + memset(mb.PMV, 0, sizeof(mb.PMV)); - } else { - if (picture->picture_structure == FRAME_PICTURE) - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FRAME: - if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) { - MOTION_CALL(motion_mp1, macroblock_modes); - } else { - MOTION_CALL(motion_fr_frame, macroblock_modes); - } - break; - - case MC_FIELD: - MOTION_CALL (motion_fr_field, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; - break; - } + mb.motion_vertical_field_select = 0; + if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_FORWARD) || + (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA && bs->desc.concealment_motion_vectors)) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) + motion_vector_frame(bs, 0, &mb); else - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FIELD: - MOTION_CALL (motion_fi_field, macroblock_modes); - break; - - case MC_16X8: - MOTION_CALL (motion_fi_16x8, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; - break; - } - - if (macroblock_modes & MACROBLOCK_PATTERN) { - int coded_block_pattern = get_coded_block_pattern(bs); - - // TODO optimize not fully used for idct accel only mc. - if (coded_block_pattern & 0x20) - slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0 - if (coded_block_pattern & 0x10) - slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1 - if (coded_block_pattern & 0x08) - slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2 - if (coded_block_pattern & 0x04) - slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3 - if (coded_block_pattern & 0x2) - slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma - if (coded_block_pattern & 0x1) - slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma - } - - dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0; + motion_vector_field(bs, 0, &mb); } - store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd); - if (++x >= bs->width) { - ++y; - if (y >= bs->height) - return false; - x -= bs->width; + if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) + motion_vector_frame(bs, 1, &mb); + else + motion_vector_field(bs, 1, &mb); } - vl_vlc_needbits(&bs->vlc); - mba_inc = 0; - while (1) { - if (bs->vlc.buf >= 0x10000000) { - mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 5) - 2); - break; - } else if (bs->vlc.buf >= 0x03000000) { - mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 11) - 24); - break; - } else switch (vl_vlc_ubits(&bs->vlc, 11)) { - case 8: /* macroblock_escape */ - mba_inc += 33; - /* pass through */ - case 15: /* macroblock_stuffing (MPEG1 only) */ - vl_vlc_dumpbits(&bs->vlc, 11); - vl_vlc_needbits(&bs->vlc); - continue; - default: /* end of slice, or error */ - return true; - } + if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA && bs->desc.concealment_motion_vectors) { + unsigned extra = vl_vlc_get_uimsbf(&bs->vlc, 1); + mb.PMV[1][0][0] = mb.PMV[0][0][0]; + mb.PMV[1][0][1] = mb.PMV[0][0][1]; + assert(extra); + } else if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA || + !(mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | + PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD))) { + memset(mb.PMV, 0, sizeof(mb.PMV)); } - vl_vlc_dumpbits(&bs->vlc, mba->len); - mba_inc += mba->mba; - if (mba_inc) { - //TODO conversion to signed format signed format - dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0; - - mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select; - mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select; - - if (picture->picture_coding_type == P_TYPE) { - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - } - x += mba_inc; - do { - store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd); - } while (--mba_inc); + if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_FORWARD && + mb.macroblock_modes.bits.frame_motion_type == 2) || + (mb.macroblock_modes.bits.frame_motion_type == 3)) { + mb.PMV[1][0][0] = mb.PMV[0][0][0]; + mb.PMV[1][0][1] = mb.PMV[0][0][1]; } - while (x >= bs->width) { - ++y; - if (y >= bs->height) - return false; - x -= bs->width; + + if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD && + mb.macroblock_modes.bits.frame_motion_type == 2) { + mb.PMV[1][1][0] = mb.PMV[0][1][0]; + mb.PMV[1][1][1] = mb.PMV[0][1][1]; } - } + + if (inc > 1 || !(mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA)) + reset_predictor(bs); + + if (mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_INTRA | PIPE_MPEG12_MB_TYPE_PATTERN)) { + memset(dct_blocks, 0, sizeof(dct_blocks)); + decode_dct(bs, &mb, dct_scale); + } else + mb.coded_block_pattern = 0; + + } while (vl_vlc_bytes_left(&bs->vlc) && vl_vlc_peekbits(&bs->vlc, 23)); + + mb.num_skipped_macroblocks = 0; + bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1); + return true; } void -vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height) +vl_mpg12_bs_init(struct vl_mpg12_bs *bs, struct pipe_video_decoder *decoder) { + static bool tables_initialized = false; + assert(bs); memset(bs, 0, sizeof(struct vl_mpg12_bs)); - bs->width = width; - bs->height = height; + bs->decoder = decoder; + + if (!tables_initialized) { + init_tables(); + tables_initialized = true; + } } void -vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES], - short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]) +vl_mpg12_bs_set_picture_desc(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture) { - unsigned i; - - assert(bs); - assert(ycbcr_stream && ycbcr_buffer); - assert(mv_stream); - - for (i = 0; i < VL_MAX_PLANES; ++i) { - bs->ycbcr_stream[i] = ycbcr_stream[i]; - bs->ycbcr_buffer[i] = ycbcr_buffer[i]; - } - for (i = 0; i < VL_MAX_REF_FRAMES; ++i) - bs->mv_stream[i] = mv_stream[i]; - - // TODO - for (i = 0; i < bs->width*bs->height; ++i) { - bs->mv_stream[0][i].top.x = bs->mv_stream[0][i].top.y = 0; - bs->mv_stream[0][i].top.field_select = PIPE_VIDEO_FRAME; - bs->mv_stream[0][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - bs->mv_stream[0][i].bottom.x = bs->mv_stream[0][i].bottom.y = 0; - bs->mv_stream[0][i].bottom.field_select = PIPE_VIDEO_FRAME; - bs->mv_stream[0][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - - bs->mv_stream[1][i].top.x = bs->mv_stream[1][i].top.y = 0; - bs->mv_stream[1][i].top.field_select = PIPE_VIDEO_FRAME; - bs->mv_stream[1][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - bs->mv_stream[1][i].bottom.x = bs->mv_stream[1][i].bottom.y = 0; - bs->mv_stream[1][i].bottom.field_select = PIPE_VIDEO_FRAME; - bs->mv_stream[1][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - } + bs->desc = *picture; + bs->intra_dct_tbl = picture->intra_vlc_format ? tbl_B15 : tbl_B14_AC; } void -vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer, - struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]) +vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer) { assert(bs); - assert(num_ycbcr_blocks); assert(buffer && num_bytes); - bs->num_ycbcr_blocks = num_ycbcr_blocks; + while(num_bytes > 2) { + if (buffer[0] == 0x00 && buffer[1] == 0x00 && buffer[2] == 0x01 && + buffer[3] >= 0x01 && buffer[3] < 0xAF) { + unsigned consumed; + + buffer += 3; + num_bytes -= 3; + + vl_vlc_init(&bs->vlc, buffer, num_bytes); + + if (!decode_slice(bs)) + return; - vl_vlc_init(&bs->vlc, buffer, num_bytes); + /* it's possible for the vlc to consume up to eight extra bytes */ + consumed = num_bytes - vl_vlc_bytes_left(&bs->vlc); + consumed = consumed > 8 ? consumed - 8 : 0; - while(decode_slice(bs, picture)); + /* crap, this is a bug we have consumed more bytes than left in the buffer */ + assert(consumed <= num_bytes); + + num_bytes -= consumed; + buffer += consumed; + + } else { + ++buffer; + --num_bytes; + } + } } diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h index 4e48a9faa2f..c3f14a17932 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h @@ -33,27 +33,22 @@ struct vl_mpg12_bs { - unsigned width, height; + struct pipe_video_decoder *decoder; - struct vl_vlc vlc; - - unsigned *num_ycbcr_blocks; + struct pipe_mpeg12_picture_desc desc; + struct dct_coeff *intra_dct_tbl; - struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; - short *ycbcr_buffer[VL_MAX_PLANES]; - - struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]; + struct vl_vlc vlc; + short pred_dc[3]; }; void -vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height); +vl_mpg12_bs_init(struct vl_mpg12_bs *bs, struct pipe_video_decoder *decoder); void -vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES], - short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]); +vl_mpg12_bs_set_picture_desc(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture); void -vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer, - struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]); +vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer); #endif /* vl_mpeg12_bitstream_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 61d947ca4c8..7d53168afe5 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -30,6 +30,7 @@ #include <util/u_memory.h> #include <util/u_rect.h> +#include <util/u_sampler.h> #include <util/u_video.h> #include "vl_mpeg12_decoder.h" @@ -75,36 +76,44 @@ static const struct format_config mc_format_config[] = { static const unsigned num_mc_format_configs = sizeof(mc_format_config) / sizeof(struct format_config); +static const unsigned const_empty_block_mask_420[3][2][2] = { + { { 0x20, 0x10 }, { 0x08, 0x04 } }, + { { 0x02, 0x02 }, { 0x02, 0x02 } }, + { { 0x01, 0x01 }, { 0x01, 0x01 } } +}; + static bool -init_zscan_buffer(struct vl_mpeg12_buffer *buffer) +init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer) { - enum pipe_format formats[3]; - - struct pipe_sampler_view **source; + struct pipe_resource *res, res_tmpl; + struct pipe_sampler_view sv_tmpl; struct pipe_surface **destination; - struct vl_mpeg12_decoder *dec; - unsigned i; - assert(buffer); - - dec = (struct vl_mpeg12_decoder*)buffer->base.decoder; + assert(dec && buffer); - formats[0] = formats[1] = formats[2] = dec->zscan_source_format; - buffer->zscan_source = vl_video_buffer_create_ex - ( - dec->base.context, - dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT, - align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line, - 1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC - ); + memset(&res_tmpl, 0, sizeof(res_tmpl)); + res_tmpl.target = PIPE_TEXTURE_2D; + res_tmpl.format = dec->zscan_source_format; + res_tmpl.width0 = dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT; + res_tmpl.height0 = align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line; + res_tmpl.depth0 = 1; + res_tmpl.array_size = 1; + res_tmpl.usage = PIPE_USAGE_STREAM; + res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; - if (!buffer->zscan_source) + res = dec->base.context->screen->resource_create(dec->base.context->screen, &res_tmpl); + if (!res) goto error_source; - source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source); - if (!source) + + memset(&sv_tmpl, 0, sizeof(sv_tmpl)); + u_sampler_view_default_template(&sv_tmpl, res, res->format); + sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = PIPE_SWIZZLE_RED; + buffer->zscan_source = dec->base.context->create_sampler_view(dec->base.context, res, &sv_tmpl); + pipe_resource_reference(&res, NULL); + if (!buffer->zscan_source) goto error_sampler; if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) @@ -117,7 +126,7 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer) for (i = 0; i < VL_MAX_PLANES; ++i) if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c, - &buffer->zscan[i], source[i], destination[i])) + &buffer->zscan[i], buffer->zscan_source, destination[i])) goto error_plane; return true; @@ -128,7 +137,7 @@ error_plane: error_surface: error_sampler: - buffer->zscan_source->destroy(buffer->zscan_source); + pipe_sampler_view_reference(&buffer->zscan_source, NULL); error_source: return false; @@ -143,21 +152,18 @@ cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer) for (i = 0; i < VL_MAX_PLANES; ++i) vl_zscan_cleanup_buffer(&buffer->zscan[i]); - buffer->zscan_source->destroy(buffer->zscan_source); + + pipe_sampler_view_reference(&buffer->zscan_source, NULL); } static bool -init_idct_buffer(struct vl_mpeg12_buffer *buffer) +init_idct_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer) { struct pipe_sampler_view **idct_source_sv, **mc_source_sv; - struct vl_mpeg12_decoder *dec; - unsigned i; - assert(buffer); - - dec = (struct vl_mpeg12_decoder*)buffer->base.decoder; + assert(dec && buffer); idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source); if (!idct_source_sv) @@ -187,27 +193,18 @@ error_source_sv: static void cleanup_idct_buffer(struct vl_mpeg12_buffer *buf) { - struct vl_mpeg12_decoder *dec; unsigned i; assert(buf); - dec = (struct vl_mpeg12_decoder*)buf->base.decoder; - assert(dec); - for (i = 0; i < 3; ++i) vl_idct_cleanup_buffer(&buf->idct[0]); } static bool -init_mc_buffer(struct vl_mpeg12_buffer *buf) +init_mc_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buf) { - struct vl_mpeg12_decoder *dec; - - assert(buf); - - dec = (struct vl_mpeg12_decoder*)buf->base.decoder; - assert(dec); + assert(dec && buf); if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0])) goto error_mc_y; @@ -241,183 +238,148 @@ cleanup_mc_buffer(struct vl_mpeg12_buffer *buf) vl_mc_cleanup_buffer(&buf->mc[i]); } -static void -vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer) +static INLINE void +MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned weights[2]) { - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - struct vl_mpeg12_decoder *dec; + assert(mb); - assert(buf); - - dec = (struct vl_mpeg12_decoder*)buf->base.decoder; - assert(dec); - - cleanup_zscan_buffer(buf); - - if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) - cleanup_idct_buffer(buf); - - cleanup_mc_buffer(buf); - - vl_vb_cleanup(&buf->vertex_stream); - - FREE(buf); -} - -static void -vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - struct vl_mpeg12_decoder *dec; - - struct pipe_sampler_view **sampler_views; - unsigned i; - - assert(buf); - - dec = (struct vl_mpeg12_decoder *)buf->base.decoder; - assert(dec); - - vl_vb_map(&buf->vertex_stream, dec->base.context); - - sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source); - - assert(sampler_views); - - for (i = 0; i < VL_MAX_PLANES; ++i) { - struct pipe_resource *tex = sampler_views[i]->texture; - struct pipe_box rect = - { - 0, 0, 0, - tex->width0, - tex->height0, - 1 - }; - - buf->tex_transfer[i] = dec->base.context->get_transfer - ( - dec->base.context, tex, - 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &rect - ); - - buf->texels[i] = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer[i]); - } - - if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { - struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; - struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]; - - for (i = 0; i < VL_MAX_PLANES; ++i) - ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i); + switch (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) { + case PIPE_MPEG12_MB_TYPE_MOTION_FORWARD: + weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX; + weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; + break; - for (i = 0; i < VL_MAX_REF_FRAMES; ++i) - mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i); + case (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD): + weights[0] = PIPE_VIDEO_MV_WEIGHT_HALF; + weights[1] = PIPE_VIDEO_MV_WEIGHT_HALF; + break; - vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream); - } else { + case PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD: + weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN; + weights[1] = PIPE_VIDEO_MV_WEIGHT_MAX; + break; - for (i = 0; i < VL_MAX_PLANES; ++i) - vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear); + default: + if (mb->macroblock_type & PIPE_MPEG12_MB_TYPE_PATTERN) { + /* patern without a motion vector, just copy the old frame content */ + weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX; + weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; + } else { + weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN; + weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; + } + break; } } -static void -vl_mpeg12_buffer_set_quant_matrix(struct pipe_video_decode_buffer *buffer, - const uint8_t intra_matrix[64], - const uint8_t non_intra_matrix[64]) +static INLINE struct vl_motionvector +MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector, + unsigned field_select_mask, unsigned weight) { - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - unsigned i; + struct vl_motionvector mv; + + assert(mb); + + if (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) { + switch (mb->macroblock_modes.bits.frame_motion_type) { + case PIPE_MPEG12_MO_TYPE_FRAME: + mv.top.x = mb->PMV[0][vector][0]; + mv.top.y = mb->PMV[0][vector][1]; + mv.top.field_select = PIPE_VIDEO_FRAME; + mv.top.weight = weight; + + mv.bottom.x = mb->PMV[0][vector][0]; + mv.bottom.y = mb->PMV[0][vector][1]; + mv.bottom.weight = weight; + mv.bottom.field_select = PIPE_VIDEO_FRAME; + break; + + case PIPE_MPEG12_MO_TYPE_FIELD: + mv.top.x = mb->PMV[0][vector][0]; + mv.top.y = mb->PMV[0][vector][1]; + mv.top.field_select = (mb->motion_vertical_field_select & field_select_mask) ? + PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; + mv.top.weight = weight; + + mv.bottom.x = mb->PMV[1][vector][0]; + mv.bottom.y = mb->PMV[1][vector][1]; + mv.bottom.field_select = (mb->motion_vertical_field_select & (field_select_mask << 2)) ? + PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; + mv.bottom.weight = weight; + break; + + default: // TODO: Support DUALPRIME and 16x8 + break; + } + } else { + mv.top.x = mv.top.y = 0; + mv.top.field_select = PIPE_VIDEO_FRAME; + mv.top.weight = weight; - for (i = 0; i < VL_MAX_PLANES; ++i) { - vl_zscan_upload_quant(&buf->zscan[i], intra_matrix, true); - vl_zscan_upload_quant(&buf->zscan[i], non_intra_matrix, false); + mv.bottom.x = mv.bottom.y = 0; + mv.bottom.field_select = PIPE_VIDEO_FRAME; + mv.bottom.weight = weight; } + return mv; } -static struct pipe_ycbcr_block * -vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - - assert(buf); - - return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component); -} - -static short * -vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - - assert(buf); - assert(component < VL_MAX_PLANES); - - return buf->texels[component]; -} - -static unsigned -vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer) +static INLINE void +UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec, + struct vl_mpeg12_buffer *buf, + const struct pipe_mpeg12_macroblock *mb) { - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + unsigned intra; + unsigned tb, x, y, num_blocks = 0; - assert(buf); + assert(dec && buf); + assert(mb); - return vl_vb_get_mv_stream_stride(&buf->vertex_stream); -} - -static struct pipe_motionvector * -vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - - assert(buf); + if (!mb->coded_block_pattern) + return; - return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame); -} + intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ? 1 : 0; -static void -vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer, - unsigned num_bytes, const void *data, - struct pipe_picture_desc *picture, - unsigned num_ycbcr_blocks[3]) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture; - - struct vl_mpeg12_decoder *dec; - unsigned i; + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x, ++tb) { + if (mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) { - assert(buf); + struct vl_ycbcr_block *stream = buf->ycbcr_stream[0]; + stream->x = mb->x * 2 + x; + stream->y = mb->y * 2 + y; + stream->intra = intra; + stream->coding = mb->macroblock_modes.bits.dct_type; + stream->block_num = buf->block_num++; - dec = (struct vl_mpeg12_decoder *)buf->base.decoder; - assert(dec); + buf->num_ycbcr_blocks[0]++; + buf->ycbcr_stream[0]++; - for (i = 0; i < VL_MAX_PLANES; ++i) - vl_zscan_set_layout(&buf->zscan[i], pic->alternate_scan ? dec->zscan_alternate : dec->zscan_normal); - - vl_mpg12_bs_decode(&buf->bs, num_bytes, data, pic, num_ycbcr_blocks); -} + num_blocks++; + } + } + } -static void -vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - struct vl_mpeg12_decoder *dec; - unsigned i; + /* TODO: Implement 422, 444 */ + //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); - assert(buf); + for (tb = 1; tb < 3; ++tb) { + if (mb->coded_block_pattern & const_empty_block_mask_420[tb][0][0]) { - dec = (struct vl_mpeg12_decoder *)buf->base.decoder; - assert(dec); + struct vl_ycbcr_block *stream = buf->ycbcr_stream[tb]; + stream->x = mb->x; + stream->y = mb->y; + stream->intra = intra; + stream->coding = 0; + stream->block_num = buf->block_num++; - vl_vb_unmap(&buf->vertex_stream, dec->base.context); + buf->num_ycbcr_blocks[tb]++; + buf->ycbcr_stream[tb]++; - for (i = 0; i < VL_MAX_PLANES; ++i) { - dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer[i]); - dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer[i]); + num_blocks++; + } } + + memcpy(buf->texels, mb->blocks, 64 * sizeof(short) * num_blocks); + buf->texels += 64 * num_blocks; } static void @@ -452,7 +414,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) pipe_resource_reference(&dec->quads.buffer, NULL); pipe_resource_reference(&dec->pos.buffer, NULL); - pipe_resource_reference(&dec->block_num.buffer, NULL); pipe_sampler_view_reference(&dec->zscan_linear, NULL); pipe_sampler_view_reference(&dec->zscan_normal, NULL); @@ -461,7 +422,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) FREE(dec); } -static struct pipe_video_decode_buffer * +static void * vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) { struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder; @@ -473,38 +434,25 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) if (buffer == NULL) return NULL; - buffer->base.decoder = decoder; - buffer->base.destroy = vl_mpeg12_buffer_destroy; - buffer->base.begin_frame = vl_mpeg12_buffer_begin_frame; - buffer->base.set_quant_matrix = vl_mpeg12_buffer_set_quant_matrix; - buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream; - buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer; - buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride; - buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream; - buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream; - buffer->base.end_frame = vl_mpeg12_buffer_end_frame; - if (!vl_vb_init(&buffer->vertex_stream, dec->base.context, dec->base.width / MACROBLOCK_WIDTH, dec->base.height / MACROBLOCK_HEIGHT)) goto error_vertex_buffer; - if (!init_mc_buffer(buffer)) + if (!init_mc_buffer(dec, buffer)) goto error_mc; if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) - if (!init_idct_buffer(buffer)) + if (!init_idct_buffer(dec, buffer)) goto error_idct; - if (!init_zscan_buffer(buffer)) + if (!init_zscan_buffer(dec, buffer)) goto error_zscan; if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) - vl_mpg12_bs_init(&buffer->bs, - dec->base.width / MACROBLOCK_WIDTH, - dec->base.height / MACROBLOCK_HEIGHT); + vl_mpg12_bs_init(&buffer->bs, decoder); - return &buffer->base; + return buffer; error_zscan: if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) @@ -522,76 +470,307 @@ error_vertex_buffer: } static void -vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, - unsigned num_ycbcr_blocks[3], - struct pipe_video_buffer *refs[2], - struct pipe_video_buffer *dst) +vl_mpeg12_destroy_buffer(struct pipe_video_decoder *decoder, void *buffer) { - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer; - struct vl_mpeg12_decoder *dec; + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder; + struct vl_mpeg12_buffer *buf = buffer; + + assert(dec && buf); + + cleanup_zscan_buffer(buf); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + cleanup_idct_buffer(buf); + + cleanup_mc_buffer(buf); + + vl_vb_cleanup(&buf->vertex_stream); + + FREE(buf); +} + +static void +vl_mpeg12_set_decode_buffer(struct pipe_video_decoder *decoder, void *buffer) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + + assert(dec && buffer); + + dec->current_buffer = buffer; +} + +static void +vl_mpeg12_set_picture_parameters(struct pipe_video_decoder *decoder, + struct pipe_picture_desc *picture) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture; + + assert(dec && pic); + + dec->picture_desc = *pic; +} + +static void +vl_mpeg12_set_quant_matrix(struct pipe_video_decoder *decoder, + const struct pipe_quant_matrix *matrix) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + const struct pipe_mpeg12_quant_matrix *m = (const struct pipe_mpeg12_quant_matrix *)matrix; + + assert(dec); + assert(matrix->codec == PIPE_VIDEO_CODEC_MPEG12); + + memcpy(dec->intra_matrix, m->intra_matrix, 64); + memcpy(dec->non_intra_matrix, m->non_intra_matrix, 64); +} - struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv; +static void +vl_mpeg12_set_decode_target(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *target) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; struct pipe_surface **surfaces; + unsigned i; + + assert(dec); + + surfaces = target->get_surfaces(target); + for (i = 0; i < VL_MAX_PLANES; ++i) + pipe_surface_reference(&dec->target_surfaces[i], surfaces[i]); +} + +static void +vl_mpeg12_set_reference_frames(struct pipe_video_decoder *decoder, + struct pipe_video_buffer **ref_frames, + unsigned num_ref_frames) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct pipe_sampler_view **sv; + unsigned i,j; + + assert(dec); + assert(num_ref_frames <= VL_MAX_REF_FRAMES); + + for (i = 0; i < num_ref_frames; ++i) { + sv = ref_frames[i]->get_sampler_view_planes(ref_frames[i]); + for (j = 0; j < VL_MAX_PLANES; ++j) + pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]); + } + + for (; i < VL_MAX_REF_FRAMES; ++i) + for (j = 0; j < VL_MAX_PLANES; ++j) + pipe_sampler_view_reference(&dec->ref_frames[i][j], NULL); +} + +static void +vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct vl_mpeg12_buffer *buf; + + struct pipe_resource *tex; + struct pipe_box rect = { 0, 0, 0, 1, 1, 1 }; + + unsigned i; + + assert(dec); + + buf = dec->current_buffer; + assert(buf); + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) + dec->intra_matrix[0] = 1 << (7 - dec->picture_desc.intra_dc_precision); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + vl_zscan_upload_quant(&buf->zscan[i], dec->intra_matrix, true); + vl_zscan_upload_quant(&buf->zscan[i], dec->non_intra_matrix, false); + } + + vl_vb_map(&buf->vertex_stream, dec->base.context); + + tex = buf->zscan_source->texture; + rect.width = tex->width0; + rect.height = tex->height0; + + buf->tex_transfer = dec->base.context->get_transfer + ( + dec->base.context, tex, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + + buf->block_num = 0; + buf->texels = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i); + buf->num_ycbcr_blocks[i] = 0; + } + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) + buf->mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i); + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + vl_mpg12_bs_set_picture_desc(&buf->bs, &dec->picture_desc); + + } else { + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear); + } +} + +static void +vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + const struct pipe_mpeg12_macroblock *mb = (const struct pipe_mpeg12_macroblock *)macroblocks; + struct vl_mpeg12_buffer *buf; + + unsigned i, j, mv_weights[2]; + + assert(dec && dec->current_buffer); + assert(macroblocks && macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12); + + buf = dec->current_buffer; + assert(buf); + + for (; num_macroblocks > 0; --num_macroblocks) { + unsigned mb_addr = mb->y * dec->width_in_macroblocks + mb->x; + + if (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_PATTERN | PIPE_MPEG12_MB_TYPE_INTRA)) + UploadYcbcrBlocks(dec, buf, mb); + + MacroBlockTypeToPipeWeights(mb, mv_weights); + + for (i = 0; i < 2; ++i) { + if (!dec->ref_frames[i][0]) continue; + + buf->mv_stream[i][mb_addr] = MotionVectorToPipe + ( + mb, i, + i ? PIPE_MPEG12_FS_FIRST_BACKWARD : PIPE_MPEG12_FS_FIRST_FORWARD, + mv_weights[i] + ); + } + + /* see section 7.6.6 of the spec */ + if (mb->num_skipped_macroblocks > 0) { + struct vl_motionvector skipped_mv[2]; + + if (dec->ref_frames[0][0] && !dec->ref_frames[1][0]) { + skipped_mv[0].top.x = skipped_mv[0].top.y = 0; + skipped_mv[0].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX; + } else { + skipped_mv[0] = buf->mv_stream[0][mb_addr]; + skipped_mv[1] = buf->mv_stream[1][mb_addr]; + } + skipped_mv[0].top.field_select = PIPE_VIDEO_FRAME; + skipped_mv[1].top.field_select = PIPE_VIDEO_FRAME; + + skipped_mv[0].bottom = skipped_mv[0].top; + skipped_mv[1].bottom = skipped_mv[1].top; + + ++mb_addr; + for (i = 0; i < mb->num_skipped_macroblocks; ++i, ++mb_addr) { + for (j = 0; j < 2; ++j) { + if (!dec->ref_frames[j][0]) continue; + buf->mv_stream[j][mb_addr] = skipped_mv[j]; + + } + } + } + + ++mb; + } +} + +static void +vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder, + unsigned num_bytes, const void *data) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct vl_mpeg12_buffer *buf; + + unsigned i; + + assert(dec && dec->current_buffer); + buf = dec->current_buffer; + assert(buf); + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_set_layout(&buf->zscan[i], dec->picture_desc.alternate_scan ? + dec->zscan_alternate : dec->zscan_normal); + + vl_mpg12_bs_decode(&buf->bs, num_bytes, data); +} + +static void +vl_mpeg12_end_frame(struct pipe_video_decoder *decoder) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct pipe_sampler_view **mc_source_sv; struct pipe_vertex_buffer vb[3]; + struct vl_mpeg12_buffer *buf; unsigned i, j, component; unsigned nr_components; - assert(buf); + assert(dec && dec->current_buffer); - dec = (struct vl_mpeg12_decoder *)buf->base.decoder; - assert(dec); + buf = dec->current_buffer; + + vl_vb_unmap(&buf->vertex_stream, dec->base.context); - for (i = 0; i < 2; ++i) - sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL; + dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer); + dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer); vb[0] = dec->quads; vb[1] = dec->pos; - surfaces = dst->get_surfaces(dst); - dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_mv); for (i = 0; i < VL_MAX_PLANES; ++i) { - if (!surfaces[i]) continue; + if (!dec->target_surfaces[i]) continue; - vl_mc_set_surface(&buf->mc[i], surfaces[i]); + vl_mc_set_surface(&buf->mc[i], dec->target_surfaces[i]); for (j = 0; j < VL_MAX_REF_FRAMES; ++j) { - if (!sv[j]) continue; + if (!dec->ref_frames[j][i]) continue; vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);; dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); - vl_mc_render_ref(&buf->mc[i], sv[j][i]); + vl_mc_render_ref(&buf->mc[i], dec->ref_frames[j][i]); } } - vb[2] = dec->block_num; - dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr); for (i = 0; i < VL_MAX_PLANES; ++i) { - if (!num_ycbcr_blocks[i]) continue; + if (!buf->num_ycbcr_blocks[i]) continue; vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i); - dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); + dec->base.context->set_vertex_buffers(dec->base.context, 2, vb); - vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]); + vl_zscan_render(&buf->zscan[i] , buf->num_ycbcr_blocks[i]); if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) - vl_idct_flush(&buf->idct[i], num_ycbcr_blocks[i]); + vl_idct_flush(&buf->idct[i], buf->num_ycbcr_blocks[i]); } mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source); for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) { - if (!surfaces[i]) continue; + if (!dec->target_surfaces[i]) continue; - nr_components = util_format_get_nr_components(surfaces[i]->texture->format); + nr_components = util_format_get_nr_components(dec->target_surfaces[i]->texture->format); for (j = 0; j < nr_components; ++j, ++component) { - if (!num_ycbcr_blocks[i]) continue; + if (!buf->num_ycbcr_blocks[i]) continue; vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component); - dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); + dec->base.context->set_vertex_buffers(dec->base.context, 2, vb); if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) vl_idct_prepare_stage2(&buf->idct[component]); @@ -599,11 +778,19 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, dec->base.context->set_fragment_sampler_views(dec->base.context, 1, &mc_source_sv[component]); dec->base.context->bind_fragment_sampler_states(dec->base.context, 1, &dec->sampler_ycbcr); } - vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]); + vl_mc_render_ycbcr(&buf->mc[i], j, buf->num_ycbcr_blocks[component]); } } } +static void +vl_mpeg12_flush(struct pipe_video_decoder *decoder) +{ + assert(decoder); + + //Noop, for shaders it is much faster to flush everything in end_frame +} + static bool init_pipe_state(struct vl_mpeg12_decoder *dec) { @@ -870,21 +1057,21 @@ vl_create_mpeg12_decoder(struct pipe_context *context, dec->base.destroy = vl_mpeg12_destroy; dec->base.create_buffer = vl_mpeg12_create_buffer; - dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer; + dec->base.destroy_buffer = vl_mpeg12_destroy_buffer; + dec->base.set_decode_buffer = vl_mpeg12_set_decode_buffer; + dec->base.set_picture_parameters = vl_mpeg12_set_picture_parameters; + dec->base.set_quant_matrix = vl_mpeg12_set_quant_matrix; + dec->base.set_decode_target = vl_mpeg12_set_decode_target; + dec->base.set_reference_frames = vl_mpeg12_set_reference_frames; + dec->base.begin_frame = vl_mpeg12_begin_frame; + dec->base.decode_macroblock = vl_mpeg12_decode_macroblock; + dec->base.decode_bitstream = vl_mpeg12_decode_bitstream; + dec->base.end_frame = vl_mpeg12_end_frame; + dec->base.flush = vl_mpeg12_flush; dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4); dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels; - - dec->quads = vl_vb_upload_quads(dec->base.context); - dec->pos = vl_vb_upload_pos( - dec->base.context, - dec->base.width / MACROBLOCK_WIDTH, - dec->base.height / MACROBLOCK_HEIGHT - ); - dec->block_num = vl_vb_upload_block_num(dec->base.context, dec->num_blocks); - - dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context); - dec->ves_mv = vl_vb_get_ves_mv(dec->base.context); + dec->width_in_macroblocks = align(dec->base.width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; /* TODO: Implement 422, 444 */ assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); @@ -892,14 +1079,27 @@ vl_create_mpeg12_decoder(struct pipe_context *context, if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { dec->chroma_width = dec->base.width / 2; dec->chroma_height = dec->base.height / 2; + dec->num_blocks = dec->num_blocks * 2; } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { dec->chroma_width = dec->base.width; dec->chroma_height = dec->base.height / 2; + dec->num_blocks = dec->num_blocks * 2 + dec->num_blocks; } else { dec->chroma_width = dec->base.width; dec->chroma_height = dec->base.height; + dec->num_blocks = dec->num_blocks * 3; } + dec->quads = vl_vb_upload_quads(dec->base.context); + dec->pos = vl_vb_upload_pos( + dec->base.context, + dec->base.width / MACROBLOCK_WIDTH, + dec->base.height / MACROBLOCK_HEIGHT + ); + + dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context); + dec->ves_mv = vl_vb_get_ves_mv(dec->base.context); + switch (entrypoint) { case PIPE_VIDEO_ENTRYPOINT_BITSTREAM: format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs); @@ -946,6 +1146,9 @@ vl_create_mpeg12_decoder(struct pipe_context *context, if (!init_pipe_state(dec)) goto error_pipe_state; + memset(dec->intra_matrix, 0x10, 64); + memset(dec->non_intra_matrix, 0x10, 64); + return &dec->base; error_pipe_state: diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h index 01265e368a3..4a8d65335f6 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h @@ -49,12 +49,12 @@ struct vl_mpeg12_decoder unsigned blocks_per_line; unsigned num_blocks; + unsigned width_in_macroblocks; enum pipe_format zscan_source_format; struct pipe_vertex_buffer quads; struct pipe_vertex_buffer pos; - struct pipe_vertex_buffer block_num; void *ves_ycbcr; void *ves_mv; @@ -73,23 +73,34 @@ struct vl_mpeg12_decoder struct vl_mc mc_y, mc_c; void *dsa; + + struct vl_mpeg12_buffer *current_buffer; + struct pipe_mpeg12_picture_desc picture_desc; + uint8_t intra_matrix[64]; + uint8_t non_intra_matrix[64]; + struct pipe_sampler_view *ref_frames[VL_MAX_REF_FRAMES][VL_MAX_PLANES]; + struct pipe_surface *target_surfaces[VL_MAX_PLANES]; }; struct vl_mpeg12_buffer { - struct pipe_video_decode_buffer base; - struct vl_vertex_buffer vertex_stream; - struct pipe_video_buffer *zscan_source; + unsigned block_num; + unsigned num_ycbcr_blocks[3]; + + struct pipe_sampler_view *zscan_source; struct vl_mpg12_bs bs; struct vl_zscan_buffer zscan[VL_MAX_PLANES]; struct vl_idct_buffer idct[VL_MAX_PLANES]; struct vl_mc_buffer mc[VL_MAX_PLANES]; - struct pipe_transfer *tex_transfer[VL_MAX_PLANES]; - short *texels[VL_MAX_PLANES]; + struct pipe_transfer *tex_transfer; + short *texels; + + struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; + struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]; }; /** diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index c0f1449bf80..281db8018eb 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -125,49 +125,6 @@ vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height) return pos; } -struct pipe_vertex_buffer -vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks) -{ - struct pipe_vertex_buffer buf; - struct pipe_transfer *buf_transfer; - struct vertex2s *v; - unsigned i; - - assert(pipe); - - /* create buffer */ - buf.stride = sizeof(struct vertex2s); - buf.buffer_offset = 0; - buf.buffer = pipe_buffer_create - ( - pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STATIC, - sizeof(struct vertex2s) * num_blocks - ); - - if(!buf.buffer) - return buf; - - /* and fill it */ - v = pipe_buffer_map - ( - pipe, - buf.buffer, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &buf_transfer - ); - - for ( i = 0; i < num_blocks; ++i, ++v) { - v->x = i; - v->y = i; - } - - pipe_buffer_unmap(pipe, buf_transfer); - - return buf; -} - static struct pipe_vertex_element vl_vb_get_quad_vertex_element(void) { @@ -211,12 +168,10 @@ vl_vb_get_ves_ycbcr(struct pipe_context *pipe) /* Position element */ vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED; - vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); - /* block num element */ - vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R16G16_SSCALED; + vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R32_FLOAT; - vl_vb_element_helper(&vertex_elems[VS_I_BLOCK_NUM], 1, 2); + vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 2, 1); return pipe->create_vertex_elements_state(pipe, 3, vertex_elems); } @@ -266,7 +221,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, - sizeof(struct pipe_ycbcr_block) * size * 4 + sizeof(struct vl_ycbcr_block) * size * 4 ); if (!buffer->ycbcr[i].resource) goto error_ycbcr; @@ -278,7 +233,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, - sizeof(struct pipe_motionvector) * size + sizeof(struct vl_motionvector) * size ); if (!buffer->mv[i].resource) goto error_mv; @@ -310,7 +265,7 @@ vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component) assert(buffer); - buf.stride = sizeof(struct pipe_ycbcr_block); + buf.stride = sizeof(struct vl_ycbcr_block); buf.buffer_offset = 0; buf.buffer = buffer->ycbcr[component].resource; @@ -324,7 +279,7 @@ vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector) assert(buffer); - buf.stride = sizeof(struct pipe_motionvector); + buf.stride = sizeof(struct vl_motionvector); buf.buffer_offset = 0; buf.buffer = buffer->mv[motionvector].resource; @@ -360,7 +315,7 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) } -struct pipe_ycbcr_block * +struct vl_ycbcr_block * vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component) { assert(buffer); @@ -377,7 +332,7 @@ vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer) return buffer->width; } -struct pipe_motionvector * +struct vl_motionvector * vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame) { assert(buffer); diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h index 74845a42b69..874ecce9041 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h @@ -52,20 +52,56 @@ enum VS_INPUT NUM_VS_INPUTS = 4 }; +enum vl_mv_weight +{ + PIPE_VIDEO_MV_WEIGHT_MIN = 0, + PIPE_VIDEO_MV_WEIGHT_HALF = 128, + PIPE_VIDEO_MV_WEIGHT_MAX = 256 +}; + +enum vl_field_select +{ + PIPE_VIDEO_FRAME = 0, + PIPE_VIDEO_TOP_FIELD = 1, + PIPE_VIDEO_BOTTOM_FIELD = 3, + + /* TODO + PIPE_VIDEO_DUALPRIME + PIPE_VIDEO_16x8 + */ +}; + +struct vl_motionvector +{ + struct { + int16_t x, y; + int16_t field_select; /**< enum pipe_video_field_select */ + int16_t weight; /**< enum pipe_video_mv_weight */ + } top, bottom; +}; + +struct vl_ycbcr_block +{ + uint8_t x, y; + uint8_t intra; + uint8_t coding; + float block_num; +}; + struct vl_vertex_buffer { unsigned width, height; struct { - struct pipe_resource *resource; - struct pipe_transfer *transfer; - struct pipe_ycbcr_block *vertex_stream; + struct pipe_resource *resource; + struct pipe_transfer *transfer; + struct vl_ycbcr_block *vertex_stream; } ycbcr[VL_MAX_PLANES]; struct { - struct pipe_resource *resource; - struct pipe_transfer *transfer; - struct pipe_motionvector *vertex_stream; + struct pipe_resource *resource; + struct pipe_transfer *transfer; + struct vl_motionvector *vertex_stream; } mv[VL_MAX_REF_FRAMES]; }; @@ -73,8 +109,6 @@ struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe); struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height); -struct pipe_vertex_buffer vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks); - void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe); void *vl_vb_get_ves_mv(struct pipe_context *pipe); @@ -89,13 +123,13 @@ void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component); -struct pipe_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component); +struct vl_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component); struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame); unsigned vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer); -struct pipe_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame); +struct vl_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame); void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h index e81b1e9afd2..4db1334d6a4 100644 --- a/src/gallium/auxiliary/vl/vl_vlc.h +++ b/src/gallium/auxiliary/vl/vl_vlc.h @@ -25,116 +25,148 @@ * **************************************************************************/ -/** - * This file is based uppon slice_xvmc.c and vlc.h from the xine project, - * which in turn is based on mpeg2dec. The following is the original copyright: - * - * Copyright (C) 2000-2002 Michel Lespinasse <[email protected]> - * Copyright (C) 1999-2000 Aaron Holtzman <[email protected]> - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - #ifndef vl_vlc_h #define vl_vlc_h -#include "pipe/p_compiler.h" +#include <assert.h> + +#include <pipe/p_compiler.h> + +#include <util/u_math.h> +#include "util/u_pointer.h" struct vl_vlc { - uint32_t buf; /* current 32 bit working set of buffer */ - int bits; /* used bits in working set */ - const uint8_t *ptr; /* buffer with stream data */ - const uint8_t *max; /* ptr+len of buffer */ + uint64_t buffer; + unsigned valid_bits; + uint32_t *data; + uint32_t *end; +}; + +struct vl_vlc_entry +{ + int8_t length; + int8_t value; +}; + +struct vl_vlc_compressed +{ + uint16_t bitcode; + struct vl_vlc_entry entry; }; static INLINE void -vl_vlc_restart(struct vl_vlc *vlc) +vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size) { - vlc->buf = (vlc->ptr[0] << 24) | (vlc->ptr[1] << 16) | (vlc->ptr[2] << 8) | vlc->ptr[3]; - vlc->bits = -16; - vlc->ptr += 4; + unsigned i, bits = util_logbase2(dst_size); + + for (i=0;i<dst_size;++i) { + dst[i].length = 0; + dst[i].value = 0; + } + + for(; src_size > 0; --src_size, ++src) { + for(i=0; i<(1 << (bits - src->entry.length)); ++i) + dst[src->bitcode >> (16 - bits) | i] = src->entry; + } +} + +static INLINE void +vl_vlc_fillbits(struct vl_vlc *vlc) +{ + if (vlc->valid_bits < 32) { + uint32_t value = *vlc->data; + + //assert(vlc->data <= vlc->end); + +#ifndef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + vlc->buffer |= (uint64_t)value << (32 - vlc->valid_bits); + ++vlc->data; + vlc->valid_bits += 32; + } } static INLINE void vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len) { - vlc->ptr = data; - vlc->max = data + len; - vl_vlc_restart(vlc); + assert(vlc); + assert(data && len); + + vlc->buffer = 0; + vlc->valid_bits = 0; + + /* align the data pointer */ + while (pointer_to_uintptr(data) & 3) { + vlc->buffer |= (uint64_t)*data << (56 - vlc->valid_bits); + ++data; + --len; + vlc->valid_bits += 8; + } + vlc->data = (uint32_t*)data; + vlc->end = (uint32_t*)(data + len); + + vl_vlc_fillbits(vlc); + vl_vlc_fillbits(vlc); +} + +static INLINE unsigned +vl_vlc_bytes_left(struct vl_vlc *vlc) +{ + return ((uint8_t*)vlc->end)-((uint8_t*)vlc->data); } -static INLINE bool -vl_vlc_getbyte(struct vl_vlc *vlc) +static INLINE unsigned +vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits) { - vlc->buf <<= 8; - vlc->buf |= vlc->ptr[0]; - vlc->ptr++; - return vlc->ptr < vlc->max; + //assert(vlc->valid_bits >= num_bits); + + return vlc->buffer >> (64 - num_bits); } -#define vl_vlc_getword(vlc, shift) \ -do { \ - (vlc)->buf |= (((vlc)->ptr[0] << 8) | (vlc)->ptr[1]) << (shift); \ - (vlc)->ptr += 2; \ -} while (0) - -/* make sure that there are at least 16 valid bits in bit_buf */ -#define vl_vlc_needbits(vlc) \ -do { \ - if ((vlc)->bits >= 0) { \ - vl_vlc_getword(vlc, (vlc)->bits); \ - (vlc)->bits -= 16; \ - } \ -} while (0) - -/* make sure that the full 32 bit of the buffer are valid */ static INLINE void -vl_vlc_need32bits(struct vl_vlc *vlc) +vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits) { - vl_vlc_needbits(vlc); - if (vlc->bits > -8) { - unsigned n = -vlc->bits; - vlc->buf <<= n; - vlc->buf |= *vlc->ptr << 8; - vlc->bits = -8; - vlc->ptr++; - } - if (vlc->bits > -16) { - unsigned n = -vlc->bits - 8; - vlc->buf <<= n; - vlc->buf |= *vlc->ptr; - vlc->bits = -16; - vlc->ptr++; - } + //assert(vlc->valid_bits > num_bits); + + vlc->buffer <<= num_bits; + vlc->valid_bits -= num_bits; } -/* remove num valid bits from bit_buf */ -#define vl_vlc_dumpbits(vlc, num) \ -do { \ - (vlc)->buf <<= (num); \ - (vlc)->bits += (num); \ -} while (0) +static INLINE unsigned +vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits) +{ + unsigned value; + + //assert(vlc->valid_bits >= num_bits); + + value = vlc->buffer >> (64 - num_bits); + vl_vlc_eatbits(vlc, num_bits); + + return value; +} + +static INLINE signed +vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits) +{ + signed value; + + //assert(vlc->valid_bits >= num_bits); + + value = ((int64_t)vlc->buffer) >> (64 - num_bits); + vl_vlc_eatbits(vlc, num_bits); -/* take num bits from the high part of bit_buf and zero extend them */ -#define vl_vlc_ubits(vlc, num) (((uint32_t)((vlc)->buf)) >> (32 - (num))) + return value; +} -/* take num bits from the high part of bit_buf and sign extend them */ -#define vl_vlc_sbits(vlc, num) (((int32_t)((vlc)->buf)) >> (32 - (num))) +static INLINE int8_t +vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits) +{ + tbl += vl_vlc_peekbits(vlc, num_bits); + vl_vlc_eatbits(vlc, tbl->length); + return tbl->value; +} #endif /* vl_vlc_h */ diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 25a3245066c..3faf801b4b1 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -329,8 +329,15 @@ textured quad blitter.. The source and destination may be the same resource, but overlapping blits are not permitted. ``resource_resolve`` resolves a multisampled resource into a non-multisampled -one. Formats and dimensions must match. This function must be present if a driver +one. Their formats must match. This function must be present if a driver supports multisampling. +The region that is to be resolved is described by ``pipe_resolve_info``, which +provides a source and a destination rectangle. +The source rectangle may be vertically flipped, but otherwise the dimensions +of the rectangles must match, unless PIPE_CAP_SCALED_RESOLVE is supported, +in which case scaling and horizontal flipping are allowed as well. +The result of resolving depth/stencil values may be any function of the values at +the sample points, but returning the value of the centermost sample is preferred. The interfaces to these calls are likely to change to make it easier for a driver to batch multiple blits with the same source and diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 4debcc6ecc4..039cb1c03d5 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1031,9 +1031,21 @@ XXX so let's discuss it, yeah? TBD -.. opcode:: TXQ - Texture Size Query +.. opcode:: TXQ - Texture Size Query (as per NV_gpu_program4) + retrieve the dimensions of the texture + depending on the target. For 1D (width), 2D/RECT/CUBE + (width, height), 3D (width, height, depth), + 1D array (width, layers), 2D array (width, height, layers) - TBD +.. math:: + + lod = src0 + + dst.x = texture_width(unit, lod) + + dst.y = texture_height(unit, lod) + + dst.z = texture_depth(unit, lod) .. opcode:: CONT - Continue diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index a1f8bcae802..56d331f3e7a 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -64,11 +64,16 @@ static INLINE void i915_flush_heuristically(struct i915_context* i915, int num_vertex) { struct i915_winsys *iws = i915->iws; - i915->vertices_since_last_flush += num_vertex; - if ( i915->vertices_since_last_flush > 4096 - || ( i915->vertices_since_last_flush > 256 && - !iws->buffer_is_busy(iws, i915->current.cbuf_bo)) ) + + i915->queued_vertices += num_vertex; + + /* fire if we have more than 1/20th of the last frame's vertices */ + if (i915->queued_vertices > i915->last_fired_vertices / 20) { FLUSH_BATCH(NULL); + i915->fired_vertices += i915->queued_vertices; + i915->queued_vertices = 0; + return; + } } diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index e1d6a749cdc..4f9aa2c3120 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -125,6 +125,9 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, * This is not required, just a heuristic */ FLUSH_BATCH(NULL); + + i915->last_fired_vertices = i915->fired_vertices; + i915->fired_vertices = 0; } /** diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 84862351ffe..fca8688a526 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -264,7 +264,10 @@ struct i915_context { struct util_slab_mempool transfer_pool; struct util_slab_mempool texture_transfer_pool; - int vertices_since_last_flush; + /* state for tracking flushes */ + int last_fired_vertices; + int fired_vertices; + int queued_vertices; /** blitter/hw-clear */ struct blitter_context* blitter; diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 6d76afa9dbc..5d8e3c8274f 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -77,5 +77,6 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->static_dirty = ~0; /* kernel emits flushes in between batchbuffers */ i915->flush_dirty = 0; - i915->vertices_since_last_flush = 0; + i915->fired_vertices += i915->queued_vertices; + i915->queued_vertices = 0; } diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index b760bc461a1..b2683c82033 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -39,9 +39,9 @@ #define I915_PROGRAM_SIZE 192 -/* Use those indices for pos/face routing, must be >= I915_TEX_UNITS */ -#define I915_SEMANTIC_POS 10 -#define I915_SEMANTIC_FACE 11 +/* Use those indices for pos/face routing, must be >= num of inputs */ +#define I915_SEMANTIC_POS 100 +#define I915_SEMANTIC_FACE 101 /** diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index e743f6031eb..c108c702983 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -221,6 +221,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap); return 0; diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index 204cee6fe9e..1a21433eb9e 100644 --- a/src/gallium/drivers/i915/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -268,8 +268,8 @@ static void upload_SCISSOR_RECT(struct i915_context *i915) { unsigned x1 = i915->scissor.minx; unsigned y1 = i915->scissor.miny; - unsigned x2 = i915->scissor.maxx; - unsigned y2 = i915->scissor.maxy; + unsigned x2 = i915->scissor.maxx - 1; + unsigned y2 = i915->scissor.maxy - 1; unsigned sc[3]; sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index ac6e94500c8..41146be9311 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -80,7 +80,7 @@ i915_surface_copy_render(struct pipe_context *pipe, i915->saved_nr_sampler_views, i915->saved_sampler_views); - util_blitter_copy_region(i915->blitter, dst, dst_level, dstx, dsty, dstz, + util_blitter_copy_texture(i915->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box, TRUE); } diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 9178dfa8f69..39e9e2fa6ac 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -243,6 +243,8 @@ brw_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shad return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: assert(0); return 0; diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index ba9705bebee..f9301354fc5 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -51,6 +51,7 @@ C_SOURCES = \ CPP_SOURCES = \ PROGS := lp_test_format \ + lp_test_arit \ lp_test_blend \ lp_test_conv \ lp_test_printf \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index d6b20ceb5ce..129de297824 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -88,11 +88,12 @@ if not env['embedded']: 'format', 'blend', 'conv', - 'printf', - 'sincos', + 'printf', + 'sincos', ] if not env['msvc']: + tests.append('arit') tests.append('round') for test in tests: diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 268f0fa034b..ce92a80721a 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -68,10 +68,17 @@ lp_jit_create_types(struct llvmpipe_context *lp) elem_types[LP_JIT_TEXTURE_BORDER_COLOR] = LLVMArrayType(LLVMFloatTypeInContext(lc), 4); +#if HAVE_LLVM >= 0x0300 + texture_type = LLVMStructCreateNamed(gallivm->context, "texture"); + LLVMStructSetBody(texture_type, elem_types, + Elements(elem_types), 0); +#else texture_type = LLVMStructTypeInContext(lc, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, "texture", texture_type); LLVMInvalidateStructLayout(gallivm->target, texture_type); +#endif LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, gallivm->target, texture_type, @@ -112,8 +119,6 @@ lp_jit_create_types(struct llvmpipe_context *lp) LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, gallivm->target, texture_type); - - LLVMAddTypeName(gallivm->module, "texture", texture_type); } /* struct lp_jit_context */ @@ -129,11 +134,19 @@ lp_jit_create_types(struct llvmpipe_context *lp) elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); +#if HAVE_LLVM >= 0x0300 + context_type = LLVMStructCreateNamed(gallivm->context, "context"); + LLVMStructSetBody(context_type, elem_types, + Elements(elem_types), 0); +#else context_type = LLVMStructTypeInContext(lc, elem_types, Elements(elem_types), 0); LLVMInvalidateStructLayout(gallivm->target, context_type); + LLVMAddTypeName(gallivm->module, "context", context_type); +#endif + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, gallivm->target, context_type, LP_JIT_CTX_CONSTANTS); @@ -155,8 +168,6 @@ lp_jit_create_types(struct llvmpipe_context *lp) LP_CHECK_STRUCT_SIZE(struct lp_jit_context, gallivm->target, context_type); - LLVMAddTypeName(gallivm->module, "context", context_type); - lp->jit_context_ptr_type = LLVMPointerType(context_type, 0); } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 4b2ae1436ea..9e2a45caad6 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -30,6 +30,7 @@ #include "util/u_math.h" #include "util/u_cpu_detect.h" #include "util/u_format.h" +#include "util/u_string.h" #include "util/u_format_s3tc.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -93,7 +94,9 @@ llvmpipe_get_vendor(struct pipe_screen *screen) static const char * llvmpipe_get_name(struct pipe_screen *screen) { - return "llvmpipe"; + static char buf[100]; + util_snprintf(buf, sizeof(buf), "llvmpipe (LLVM 0x%x)", HAVE_LLVM); + return buf; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c new file mode 100644 index 00000000000..ea2a659142f --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c @@ -0,0 +1,295 @@ +/************************************************************************** + * + * Copyright 2011 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> + +#include "util/u_pointer.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_arit.h" + +#include "lp_test.h" + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +typedef float (*unary_func_t)(float); + + +/** + * Describe a test case of one unary function. + */ +struct unary_test_t +{ + /* + * Test name -- name of the mathematical function under test. + */ + + const char *name; + + LLVMValueRef + (*builder)(struct lp_build_context *bld, LLVMValueRef a); + + /* + * Reference (pure-C) function. + */ + float + (*ref)(float a); + + /* + * Test values. + */ + const float *values; + unsigned num_values; +}; + + +const float exp2_values[] = { + -60, + -4, + -2, + -1, + -1e-007, + 0, + 1e-007, + 1, + 2, + 4, + 60 +}; + + +const float log2_values[] = { +#if 0 + /* + * Smallest denormalized number; meant just for experimentation, but not + * validation. + */ + 1.4012984643248171e-45, +#endif + 1e-007, + 0.5, + 1, + 2, + 4, + 100000, + 1e+018 +}; + + +static float rsqrtf(float x) +{ + return 1.0/sqrt(x); +} + + +const float rsqrt_values[] = { + -1, -1e-007, + 1e-007, 1, + -4, -1, + 1, 4, + -1e+035, -100000, + 100000, 1e+035, +}; + + +const float sincos_values[] = { + -5*M_PI/4, + -4*M_PI/4, + -4*M_PI/4, + -3*M_PI/4, + -2*M_PI/4, + -1*M_PI/4, + 1*M_PI/4, + 2*M_PI/4, + 3*M_PI/4, + 4*M_PI/4, + 5*M_PI/4, +}; + + +/* + * Unary test cases. + */ + +static const struct unary_test_t unary_tests[] = { + {"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values)}, + {"log2", &lp_build_log2, &log2f, log2_values, Elements(log2_values)}, + {"exp", &lp_build_exp, &expf, exp2_values, Elements(exp2_values)}, + {"log", &lp_build_log, &logf, log2_values, Elements(log2_values)}, + {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, Elements(rsqrt_values)}, + {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values)}, + {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values)}, +}; + + +/* + * Build LLVM function that exercises the unary operator builder. + */ +static LLVMValueRef +build_unary_test_func(struct gallivm_state *gallivm, + LLVMModuleRef module, + LLVMContextRef context, + const struct unary_test_t *test) +{ + LLVMTypeRef i32t = LLVMInt32TypeInContext(context); + LLVMTypeRef f32t = LLVMFloatTypeInContext(context); + LLVMTypeRef v4f32t = LLVMVectorType(f32t, 4); + LLVMTypeRef args[1] = { f32t }; + LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(f32t, args, Elements(args), 0)); + LLVMValueRef arg1 = LLVMGetParam(func, 0); + LLVMBuilderRef builder = gallivm->builder; + LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + LLVMValueRef ret; + + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, lp_float32_vec4_type()); + + LLVMSetFunctionCallConv(func, LLVMCCallConv); + + LLVMPositionBuilderAtEnd(builder, block); + + /* scalar to vector */ + arg1 = LLVMBuildInsertElement(builder, LLVMGetUndef(v4f32t), arg1, index0, ""); + + ret = test->builder(&bld, arg1); + + /* vector to scalar */ + ret = LLVMBuildExtractElement(builder, ret, index0, ""); + + LLVMBuildRet(builder, ret); + return func; +} + + +/* + * Test one LLVM unary arithmetic builder function. + */ +static boolean +test_unary(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct unary_test_t *test) +{ + LLVMModuleRef module = gallivm->module; + LLVMValueRef test_func; + LLVMExecutionEngineRef engine = gallivm->engine; + LLVMContextRef context = gallivm->context; + char *error = NULL; + unary_func_t test_func_jit; + boolean success = TRUE; + int i; + + test_func = build_unary_test_func(gallivm, module, context, test); + + if (LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { + printf("LLVMVerifyModule: %s\n", error); + LLVMDumpModule(module); + abort(); + } + LLVMDisposeMessage(error); + + test_func_jit = (unary_func_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_func)); + + for (i = 0; i < test->num_values; ++i) { + float value = test->values[i]; + float ref = test->ref(value); + float src = test_func_jit(value); + + double error = fabs(src - ref); + double precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG; + + bool pass = precision >= 20.0; + + if (isnan(ref)) { + continue; + } + + if (!pass || verbose) { + printf("%s(%.9g): ref = %.9g, src = %.9g, precision = %f bits, %s\n", + test->name, value, ref, src, precision, + pass ? "PASS" : "FAIL"); + } + + if (!pass) { + success = FALSE; + } + } + + LLVMFreeMachineCodeForFunction(engine, test_func); + + return success; +} + + +boolean +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +{ + boolean success = TRUE; + int i; + + for (i = 0; i < Elements(unary_tests); ++i) { + if (!test_unary(gallivm, verbose, fp, &unary_tests[i])) { + success = FALSE; + } + } + + return success; +} + + +boolean +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) +{ + /* + * Not randomly generated test cases, so test all. + */ + + return test_all(gallivm, verbose, fp); +} + + +boolean +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +{ + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index f4324e69971..a2795b604d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -75,7 +75,7 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): src_native_type = native_type(format) print 'static void' - print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type) + print 'lp_tile_%s_swizzle_%s(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type) print '{' print ' unsigned x, y;' print ' const uint8_t *src_row = src + y0*src_stride;' @@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): name = format.short_name() print 'static void' - print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type) + print 'lp_tile_%s_unswizzle_%s(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type) print '{' if format.layout == PLAIN \ and format.colorspace == 'rgb' \ @@ -501,7 +501,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): print 'void' print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type) print '{' - print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type + print ' void (*func)(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type print '#ifdef DEBUG' print ' lp_tile_swizzle_count += 1;' print '#endif' @@ -539,7 +539,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type) print '{' - print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type + print ' void (*func)(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type print '#ifdef DEBUG' print ' lp_tile_unswizzle_count += 1;' print '#endif' diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c index 18308b796f3..ffc444e37d1 100644 --- a/src/gallium/drivers/noop/noop_pipe.c +++ b/src/gallium/drivers/noop/noop_pipe.c @@ -324,131 +324,34 @@ static const char *noop_get_name(struct pipe_screen* pscreen) static int noop_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { - switch (param) { - /* Supported features (boolean caps). */ - case PIPE_CAP_NPOT_TEXTURES: - case PIPE_CAP_TWO_SIDED_STENCIL: - case PIPE_CAP_GLSL: - case PIPE_CAP_OCCLUSION_QUERY: - case PIPE_CAP_POINT_SPRITE: - case PIPE_CAP_ANISOTROPIC_FILTER: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - case PIPE_CAP_TEXTURE_SHADOW_MAP: - case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: - - return 1; - case PIPE_CAP_DUAL_SOURCE_BLEND: - - case PIPE_CAP_SM3: - case PIPE_CAP_INDEP_BLEND_ENABLE: - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - case PIPE_CAP_DEPTH_CLAMP: - case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_TIMER_QUERY: - case PIPE_CAP_STREAM_OUTPUT: - case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; - - /* Texturing. */ - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 14; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 16; - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - - /* Render targets. */ - case PIPE_CAP_MAX_RENDER_TARGETS: - return 8; - - /* Fragment coordinate conventions. */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - - default: - return 0; - } + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; + + return screen->get_param(screen, param); } static float noop_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) { - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - default: - return 0.0f; - } + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; + + return screen->get_paramf(screen, param); } static int noop_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) { - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_GEOMETRY: - break; - default: - return 0; - } + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; - switch (param) { - case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: - return 16384; - case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - return 8; - case PIPE_SHADER_CAP_MAX_INPUTS: - return 16; - case PIPE_SHADER_CAP_MAX_TEMPS: - return 256; - case PIPE_SHADER_CAP_MAX_ADDRS: - return 1; - case PIPE_SHADER_CAP_MAX_CONSTS: - return 256; - case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; - case PIPE_SHADER_CAP_MAX_PREDS: - return 0; - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 1; - case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: - case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; - default: - return 0; - } + return screen->get_shader_param(screen, shader, param); } -static boolean noop_is_format_supported(struct pipe_screen* screen, +static boolean noop_is_format_supported(struct pipe_screen* pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, unsigned usage) { - return true; + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; + + return screen->is_format_supported(screen, format, target, sample_count, usage); } static void noop_destroy_screen(struct pipe_screen *screen) diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index 3210d1ff77b..aae6d9889bb 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -10,6 +10,7 @@ LIBRARY_INCLUDES = \ C_SOURCES = nouveau_screen.c \ nouveau_fence.c \ nouveau_mm.c \ - nouveau_buffer.c + nouveau_buffer.c \ + nouveau_video.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h index 696e0d3f24e..19bf7c84ac7 100644 --- a/src/gallium/drivers/nouveau/nouveau_context.h +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -23,4 +23,7 @@ nouveau_context(struct pipe_context *pipe) return (struct nouveau_context *)pipe; } +void +nouveau_context_init_vdec(struct nouveau_context *); + #endif diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index d910809a0ec..cf291c6c595 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -76,6 +76,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *); void nouveau_screen_fini(struct nouveau_screen *); +void nouveau_screen_init_vdec(struct nouveau_screen *); #ifndef NOUVEAU_NVC0 diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c new file mode 100644 index 00000000000..620c030e112 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_video.c @@ -0,0 +1,41 @@ + +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_context.h" + +static int +nouveau_screen_get_video_param(struct pipe_screen *pscreen, + enum pipe_video_profile profile, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(pscreen, profile); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(pscreen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(pscreen, profile); + default: + debug_printf("unknown video param: %d\n", param); + return 0; + } +} + +void +nouveau_screen_init_vdec(struct nouveau_screen *screen) +{ + screen->base.get_video_param = nouveau_screen_get_video_param; + screen->base.is_video_format_supported = vl_video_buffer_is_format_supported; +} + +void +nouveau_context_init_vdec(struct nouveau_context *nv) +{ + nv->pipe.create_video_decoder = vl_create_decoder; + nv->pipe.create_video_buffer = vl_video_buffer_create; +} diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index ac3e361a446..0d464063b5b 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -149,6 +149,8 @@ nv50_create(struct pipe_screen *pscreen, void *priv) assert(nv50->draw); draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + nouveau_context_init_vdec(&nv50->base); + return pipe; } diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index c1226d5eb26..284db69e312 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -171,7 +171,8 @@ void nv50_validate_derived_rs(struct nv50_context *); extern void nv50_init_state_functions(struct nv50_context *); /* nv50_state_validate.c */ -extern boolean nv50_state_validate(struct nv50_context *); +/* @words: check for space before emitting relocs */ +extern boolean nv50_state_validate(struct nv50_context *, unsigned words); /* nv50_surface.c */ extern void nv50_clear(struct pipe_context *, unsigned buffers, diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c index be43147468a..34502d0a397 100644 --- a/src/gallium/drivers/nv50/nv50_formats.c +++ b/src/gallium/drivers/nv50/nv50_formats.c @@ -116,7 +116,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_Z24_S8_UNORM, - B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0), + B_(C0, C1, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0), SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_Z24_X8_UNORM, @@ -124,7 +124,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8_Z24_UNORM, - B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0), + B_(C1, C0, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0), SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index d72b23c137a..4271731efa7 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -462,6 +462,10 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) mi->saturate = 1; mi->def[0] = nvi->def[0]; mi->def[0]->insn = mi; + if (nvi->flags_def) { + mi->flags_def = nvi->flags_def; + mi->flags_def->insn = mi; + } nv_nvi_delete(nvi); } } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index a697ff5ecf7..581aad19627 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -91,6 +91,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_SCALED_RESOLVE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return nv50_screen(pscreen)->tesla->grclass >= NVA0_3D; @@ -182,6 +183,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; @@ -315,6 +318,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_init_resource_functions(pscreen); + nouveau_screen_init_vdec(&screen->base); + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &screen->fence.bo); if (ret) @@ -602,6 +607,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); + if (!nv50_blitctx_create(screen)) + goto fail; + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); return pscreen; diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 64ad209a728..315ca80c0d2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -21,6 +21,8 @@ struct nv50_context; #define NV50_SCREEN_RESIDENT_BO_COUNT 5 +struct nv50_blitctx; + struct nv50_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; @@ -39,6 +41,8 @@ struct nv50_screen { struct nouveau_resource *gp_code_heap; struct nouveau_resource *fp_code_heap; + struct nv50_blitctx *blitctx; + struct { void **entries; int next; @@ -71,6 +75,8 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +boolean nv50_blitctx_create(struct nv50_screen *); + void nv50_screen_make_buffers_resident(struct nv50_screen *); int nv50_screen_tic_alloc(struct nv50_screen *, void *); diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index e5b10c37bef..d73f7c7f213 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -130,13 +130,14 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog) int ret; unsigned size; - if (prog->translated) + if (!prog->translated) { + prog->translated = nv50_program_translate(prog); + if (!prog->translated) + return FALSE; + } else + if (prog->res) return TRUE; - prog->translated = nv50_program_translate(prog); - if (!prog->translated) - return FALSE; - if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap; else if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 8b0b08f8e93..44f2d25c1a7 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -350,7 +350,7 @@ static struct state_validate { #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) boolean -nv50_state_validate(struct nv50_context *nv50) +nv50_state_validate(struct nv50_context *nv50, unsigned words) { unsigned i; @@ -367,6 +367,8 @@ nv50_state_validate(struct nv50_context *nv50) nv50->dirty = 0; } + MARK_RING(nv50->screen->base.channel, words, 0); + nv50_bufctx_emit_relocs(nv50); return TRUE; diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index eefbaad6483..8bca900e1ff 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -198,6 +198,7 @@ nv50_resource_copy_region(struct pipe_context *pipe, { struct nv50_screen *screen = nv50_context(pipe)->screen; int ret; + boolean m2mf; unsigned dst_layer = dstz, src_layer = src_box->z; /* Fallback for buffers. */ @@ -207,9 +208,15 @@ nv50_resource_copy_region(struct pipe_context *pipe, return; } + assert(src->nr_samples == dst->nr_samples); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; - if (src->format == dst->format && src->nr_samples == dst->nr_samples) { + if (m2mf) { struct nv50_m2mf_rect drect, srect; unsigned i; unsigned nx = util_format_get_nblocksx(src->format, src_box->width); @@ -368,7 +375,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ nv50->dirty &= NV50_NEW_FRAMEBUFFER; - if (!nv50_state_validate(nv50)) + if (!nv50_state_validate(nv50, 9 + (fb->nr_cbufs * 2))) return; if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { @@ -405,12 +412,546 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, nv50->dirty = dirty & ~NV50_NEW_FRAMEBUFFER; } + +struct nv50_blitctx +{ + struct nv50_screen *screen; + struct { + struct pipe_framebuffer_state fb; + struct nv50_program *vp; + struct nv50_program *gp; + struct nv50_program *fp; + unsigned num_textures[3]; + unsigned num_samplers[3]; + struct pipe_sampler_view *texture; + struct nv50_tsc_entry *sampler; + unsigned dirty; + unsigned clip_nr; + } saved; + struct nv50_program vp; + struct nv50_program fp; + struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ + uint32_t fp_offset; + uint16_t color_mask; + uint8_t filter; +}; + +static void +nv50_blitctx_make_vp(struct nv50_blitctx *blit) +{ + static const uint32_t code[] = + { + 0x10000001, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */ + 0x0423c788, + 0x10000205, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */ + 0x0423c788, + 0x10000409, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */ + 0x0423c788, + 0x1000060d, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */ + 0x0423c788, + 0x10000811, /* exit mov b32 o[0x10] s[0x10] */ /* TEXC.z */ + 0x0423c789, + }; + + blit->vp.type = PIPE_SHADER_VERTEX; + blit->vp.translated = TRUE; + blit->vp.code = (uint32_t *)code; /* const_cast */ + blit->vp.code_size = sizeof(code); + blit->vp.max_gpr = 4; + blit->vp.max_out = 5; + blit->vp.out_nr = 2; + blit->vp.out[0].mask = 0x3; + blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION; + blit->vp.out[1].hw = 2; + blit->vp.out[1].mask = 0x7; + blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC; + blit->vp.vp.attrs[0] = 0x73; + blit->vp.vp.psiz = 0x40; + blit->vp.vp.edgeflag = 0x40; +} + +static void +nv50_blitctx_make_fp(struct nv50_blitctx *blit) +{ + static const uint32_t code[] = + { + /* 3 coords RGBA in, RGBA out, also for Z32_FLOAT(_S8X24_USCALED) */ + 0x80000000, /* interp $r0 v[0x0] */ + 0x80010004, /* interp $r1 v[0x4] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,2,3 } $t0 $s0 { $r0,1,2 } */ + 0x0000c785, /* exit */ + + /* 3 coords ZS in, S encoded in R, Z encoded in GBA (8_UNORM) */ + 0x80000000, /* interp $r0 v[0x00] */ + 0x80010004, /* interp $r1 v[0x04] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */ + 0x00000784, + 0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */ + 0x04b7ffff, + 0xa0000201, /* cvt f32 $r0 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt rni s32 $r2 f32 $r2 */ + 0x8c004780, + 0xc0010001, /* mul f32 $r0 $r0 1/0xff */ + 0x03b8080b, + 0xd03f0405, /* and b32 $r1 $r2 0x0000ff */ + 0x0000000f, + 0xd000040d, /* and b32 $r3 $r2 0xff0000 */ + 0x000ff003, + 0xd0000409, /* and b32 $r2 $r2 0x00ff00 */ + 0x00000ff3, + 0xa0000205, /* cvt f32 $r1 s32 $r1 */ + 0x44014780, + 0xa000060d, /* cvt f32 $r3 s32 $r3 */ + 0x44014780, + 0xa0000409, /* cvt f32 $r2 s32 $r2 */ + 0x44014780, + 0xc0010205, /* mul f32 $r1 $r1 1/0x0000ff */ + 0x03b8080b, + 0xc001060d, /* mul f32 $r3 $r3 1/0x00ff00 */ + 0x0338080b, + 0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */ + 0x0378080b, + 0xf0000001, /* exit never nop */ + 0xe0000001, + + /* 3 coords ZS in, Z encoded in RGB, S encoded in A (U8_UNORM) */ + 0x80000000, /* interp $r0 v[0x00] */ + 0x80010004, /* interp $r1 v[0x04] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */ + 0x00000784, + 0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */ + 0x04b7ffff, + 0xa0000281, /* cvt f32 $r3 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt rni s32 $r2 f32 $r2 */ + 0x8c004780, + 0xc001060d, /* mul f32 $r3 $r3 1/0xff */ + 0x03b8080b, + 0xd03f0401, /* and b32 $r0 $r2 0x0000ff */ + 0x0000000f, + 0xd0000405, /* and b32 $r1 $r2 0x00ff00 */ + 0x00000ff3, + 0xd0000409, /* and b32 $r2 $r2 0xff0000 */ + 0x000ff003, + 0xa0000001, /* cvt f32 $r0 s32 $r0 */ + 0x44014780, + 0xa0000205, /* cvt f32 $r1 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt f32 $r2 s32 $r2 */ + 0x44014780, + 0xc0010001, /* mul f32 $r0 $r0 1/0x0000ff */ + 0x03b8080b, + 0xc0010205, /* mul f32 $r1 $r1 1/0x00ff00 */ + 0x0378080b, + 0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */ + 0x0338080b, + 0xf0000001, /* exit never nop */ + 0xe0000001 + }; + + blit->fp.type = PIPE_SHADER_FRAGMENT; + blit->fp.translated = TRUE; + blit->fp.code = (uint32_t *)code; /* const_cast */ + blit->fp.code_size = sizeof(code); + blit->fp.max_gpr = 4; + blit->fp.max_out = 4; + blit->fp.in_nr = 1; + blit->fp.in[0].mask = 0x7; /* last component flat */ + blit->fp.in[0].linear = 1; + blit->fp.in[0].sn = TGSI_SEMANTIC_GENERIC; + blit->fp.out_nr = 1; + blit->fp.out[0].mask = 0xf; + blit->fp.out[0].sn = TGSI_SEMANTIC_COLOR; + blit->fp.fp.interp = 0x00020403; + blit->fp.gp.primid = 0x80; +} + +static void +nv50_blitctx_make_sampler(struct nv50_blitctx *blit) +{ + /* clamp to edge, min/max lod = 0, nearest filtering */ + + blit->sampler[0].id = -1; + + blit->sampler[0].tsc[0] = 0x00000092; + blit->sampler[0].tsc[1] = 0x00000051; + + /* clamp to edge, min/max lod = 0, bilinear filtering */ + + blit->sampler[1].id = -1; + + blit->sampler[1].tsc[0] = 0x00000092; + blit->sampler[1].tsc[1] = 0x00000062; +} + +/* Since shaders cannot export stencil, we cannot copy stencil values when + * rendering to ZETA, so we attach the ZS surface to a colour render target. + */ +static INLINE enum pipe_format +nv50_blit_zeta_to_colour_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: return PIPE_FORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_Z32_FLOAT: return PIPE_FORMAT_R32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: return PIPE_FORMAT_R32G32_FLOAT; + default: + assert(0); + return PIPE_FORMAT_NONE; + } +} + +static void +nv50_blitctx_get_color_mask_and_fp(struct nv50_blitctx *blit, + enum pipe_format format, uint8_t mask) +{ + blit->color_mask = 0; + + switch (format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + blit->fp_offset = 160; + if (mask & PIPE_MASK_Z) + blit->color_mask |= 0x0111; + if (mask & PIPE_MASK_S) + blit->color_mask |= 0x1000; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + blit->fp_offset = 24; + if (mask & PIPE_MASK_Z) + blit->color_mask |= 0x1110; + if (mask & PIPE_MASK_S) + blit->color_mask |= 0x0001; + break; + default: + blit->fp_offset = 0; + if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) blit->color_mask |= 0x0001; + if (mask & (PIPE_MASK_G | PIPE_MASK_S)) blit->color_mask |= 0x0010; + if (mask & PIPE_MASK_B) blit->color_mask |= 0x0100; + if (mask & PIPE_MASK_A) blit->color_mask |= 0x1000; + break; + } +} + +static void +nv50_blit_set_dst(struct nv50_context *nv50, + struct pipe_resource *res, unsigned level, unsigned layer) +{ + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_surface templ; + + if (util_format_is_depth_or_stencil(res->format)) + templ.format = nv50_blit_zeta_to_colour_format(res->format); + else + templ.format = res->format; + + templ.usage = PIPE_USAGE_STREAM; + templ.u.tex.level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + + nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ); + nv50->framebuffer.nr_cbufs = 1; + nv50->framebuffer.zsbuf = NULL; + nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width; + nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height; +} + +static INLINE void +nv50_blit_fixup_tic_entry(struct pipe_sampler_view *view) +{ + struct nv50_tic_entry *ent = nv50_tic_entry(view); + + ent->tic[2] &= ~(1 << 31); /* scaled coordinates, ok with 3d textures ? */ + + /* magic: */ + + ent->tic[3] = 0x20000000; /* affects quality of near vertical edges in MS8 */ +} + +static void +nv50_blit_set_src(struct nv50_context *nv50, + struct pipe_resource *res, unsigned level, unsigned layer) +{ + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_sampler_view templ; + + templ.format = res->format; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + templ.u.tex.first_level = templ.u.tex.last_level = level; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + nv50->textures[2][0] = nv50_create_sampler_view(pipe, res, &templ); + + nv50_blit_fixup_tic_entry(nv50->textures[2][0]); + + nv50->num_textures[0] = nv50->num_textures[1] = 0; + nv50->num_textures[2] = 1; +} + +static void +nv50_blitctx_prepare_state(struct nv50_blitctx *blit) +{ + struct nouveau_channel *chan = blit->screen->base.channel; + + /* blend state */ + BEGIN_RING(chan, RING_3D(COLOR_MASK(0)), 1); + OUT_RING (chan, blit->color_mask); + BEGIN_RING(chan, RING_3D(BLEND_ENABLE(0)), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(LOGIC_OP_ENABLE), 1); + OUT_RING (chan, 0); + + /* rasterizer state */ +#ifndef NV50_SCISSORS_CLIPPING + BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 1); + OUT_RING (chan, 1); +#endif + BEGIN_RING(chan, RING_3D(VERTEX_TWO_SIDE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MSAA_MASK(0)), 4); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + BEGIN_RING(chan, RING_3D(POLYGON_MODE_FRONT), 3); + OUT_RING (chan, NV50_3D_POLYGON_MODE_FRONT_FILL); + OUT_RING (chan, NV50_3D_POLYGON_MODE_BACK_FILL); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(CULL_FACE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POLYGON_OFFSET_FILL_ENABLE), 1); + OUT_RING (chan, 0); + + /* zsa state */ + BEGIN_RING(chan, RING_3D(DEPTH_TEST_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(STENCIL_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(ALPHA_TEST_ENABLE), 1); + OUT_RING (chan, 0); +} + +static void +nv50_blitctx_pre_blit(struct nv50_blitctx *blit, struct nv50_context *nv50) +{ + int s; + + blit->saved.fb.width = nv50->framebuffer.width; + blit->saved.fb.height = nv50->framebuffer.height; + blit->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs; + blit->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0]; + blit->saved.fb.zsbuf = nv50->framebuffer.zsbuf; + + blit->saved.vp = nv50->vertprog; + blit->saved.gp = nv50->gmtyprog; + blit->saved.fp = nv50->fragprog; + + nv50->vertprog = &blit->vp; + nv50->gmtyprog = NULL; + nv50->fragprog = &blit->fp; + + blit->saved.clip_nr = nv50->clip.nr; + + nv50->clip.nr = 0; + + for (s = 0; s < 3; ++s) { + blit->saved.num_textures[s] = nv50->num_textures[s]; + blit->saved.num_samplers[s] = nv50->num_samplers[s]; + } + blit->saved.texture = nv50->textures[2][0]; + blit->saved.sampler = nv50->samplers[2][0]; + + nv50->samplers[2][0] = &blit->sampler[blit->filter]; + + nv50->num_samplers[0] = nv50->num_samplers[1] = 0; + nv50->num_samplers[2] = 1; + + blit->saved.dirty = nv50->dirty; + + nv50->dirty = + NV50_NEW_FRAMEBUFFER | + NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS; +} + +static void +nv50_blitctx_post_blit(struct nv50_context *nv50, struct nv50_blitctx *blit) +{ + int s; + + pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL); + + nv50->framebuffer.width = blit->saved.fb.width; + nv50->framebuffer.height = blit->saved.fb.height; + nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs; + nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0]; + nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf; + + nv50->vertprog = blit->saved.vp; + nv50->gmtyprog = blit->saved.gp; + nv50->fragprog = blit->saved.fp; + + nv50->clip.nr = blit->saved.clip_nr; + + pipe_sampler_view_reference(&nv50->textures[2][0], NULL); + + for (s = 0; s < 3; ++s) { + nv50->num_textures[s] = blit->saved.num_textures[s]; + nv50->num_samplers[s] = blit->saved.num_samplers[s]; + } + nv50->textures[2][0] = blit->saved.texture; + nv50->samplers[2][0] = blit->saved.sampler; + + nv50->dirty = blit->saved.dirty | + (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK | + NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG); +} + +static void +nv50_resource_resolve(struct pipe_context *pipe, + const struct pipe_resolve_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_screen *screen = nv50->screen; + struct nv50_blitctx *blit = screen->blitctx; + struct nouveau_channel *chan = screen->base.channel; + struct pipe_resource *src = info->src.res; + struct pipe_resource *dst = info->dst.res; + float x0, x1, y0, y1, z; + float x_range, y_range; + + nv50_blitctx_get_color_mask_and_fp(blit, dst->format, info->mask); + + blit->filter = util_format_is_depth_or_stencil(dst->format) ? 0 : 1; + + nv50_blitctx_pre_blit(blit, nv50); + + nv50_blit_set_dst(nv50, dst, info->dst.level, info->dst.layer); + nv50_blit_set_src(nv50, src, 0, info->src.layer); + + nv50_blitctx_prepare_state(blit); + + nv50_state_validate(nv50, 36); + + x_range = + (float)(info->src.x1 - info->src.x0) / + (float)(info->dst.x1 - info->dst.x0); + y_range = + (float)(info->src.y1 - info->src.y0) / + (float)(info->dst.y1 - info->dst.y0); + + x0 = (float)info->src.x0 - x_range * (float)info->dst.x0; + y0 = (float)info->src.y0 - y_range * (float)info->dst.y0; + + x1 = x0 + 16384.0f * x_range; + y1 = y0 + 16384.0f * y_range; + + x0 *= (float)(1 << nv50_miptree(src)->ms_x); + x1 *= (float)(1 << nv50_miptree(src)->ms_x); + y0 *= (float)(1 << nv50_miptree(src)->ms_y); + y1 *= (float)(1 << nv50_miptree(src)->ms_y); + + z = (float)info->src.layer; + + BEGIN_RING(chan, RING_3D(FP_START_ID), 1); + OUT_RING (chan, + blit->fp.code_base + blit->fp_offset); + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 0); + + /* Draw a large triangle in screen coordinates covering the whole + * render target, with scissors defining the destination region. + * The vertex is supplied with non-normalized texture coordinates + * arranged in a way to yield the desired offset and scale. + */ + + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + OUT_RING (chan, (info->dst.x1 << 16) | info->dst.x0); + OUT_RING (chan, (info->dst.y1 << 16) | info->dst.y0); + + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x0); + OUT_RINGf (chan, y0); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 0.0f); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x1); + OUT_RINGf (chan, y0); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_x); + OUT_RINGf (chan, 0.0f); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x0); + OUT_RINGf (chan, y1); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_y); + BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1); + OUT_RING (chan, 0); + + /* re-enable normally constant state */ + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 1); + + nv50_blitctx_post_blit(nv50, blit); +} + +boolean +nv50_blitctx_create(struct nv50_screen *screen) +{ + screen->blitctx = CALLOC_STRUCT(nv50_blitctx); + if (!screen->blitctx) { + NOUVEAU_ERR("failed to allocate blit context\n"); + return FALSE; + } + + screen->blitctx->screen = screen; + + nv50_blitctx_make_vp(screen->blitctx); + nv50_blitctx_make_fp(screen->blitctx); + + nv50_blitctx_make_sampler(screen->blitctx); + + screen->blitctx->color_mask = 0x1111; + + return TRUE; +} + void nv50_init_surface_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; pipe->resource_copy_region = nv50_resource_copy_region; + pipe->resource_resolve = nv50_resource_resolve; pipe->clear_render_target = nv50_clear_render_target; pipe->clear_depth_stencil = nv50_clear_depth_stencil; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f23008ae4cf..1c8347a793a 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -647,7 +647,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS))) nv50_update_user_vbufs(nv50); - nv50_state_validate(nv50); + nv50_state_validate(nv50, 8); /* 8 as minimum, we use flush_notify here */ chan->flush_notify = nv50_draw_vbo_flush_notify; diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 983db23eedb..360afbb943e 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -150,6 +150,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) assert(nvc0->draw); draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); + nouveau_context_init_vdec(&nvc0->base); + return pipe; } diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 605a0b04018..c79256a6ba2 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -24,6 +24,9 @@ #include "util/u_format_s3tc.h" #include "pipe/p_screen.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + #include "nvc0_context.h" #include "nvc0_screen.h" @@ -167,6 +170,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; @@ -373,6 +378,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nvc0_screen_init_resource_functions(pscreen); + nouveau_screen_init_vdec(&screen->base); + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &screen->fence.bo); if (ret) diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index 67bba3c6cc3..a4fd17e5324 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -205,6 +205,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe, { struct nvc0_screen *screen = nvc0_context(pipe)->screen; int ret; + boolean m2mf; unsigned dst_layer = dstz, src_layer = src_box->z; /* Fallback for buffers. */ @@ -214,9 +215,15 @@ nvc0_resource_copy_region(struct pipe_context *pipe, return; } + assert(src->nr_samples == dst->nr_samples); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; - if (src->format == dst->format && src->nr_samples == dst->nr_samples) { + if (m2mf) { struct nv50_m2mf_rect drect, srect; unsigned i; unsigned nx = util_format_get_nblocksx(src->format, src_box->width); diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 8cb6cd0938e..3b77c9600c6 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -179,6 +179,8 @@ nvfx_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } @@ -224,6 +226,8 @@ nvfx_screen_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(screen, profile); default: return 0; } diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index 339906e6a63..04b0304b44f 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -288,7 +288,7 @@ nvfx_resource_copy_region(struct pipe_context *pipe, * TODO: perhaps support reinterpreting the formats */ struct blitter_context* blitter = nvfx_get_blitter(pipe, 1); - util_blitter_copy_region(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE); + util_blitter_copy_texture(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE); nvfx_put_blitter(pipe, blitter); } else diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 4088216adcb..4f021276a8f 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -26,19 +26,51 @@ C_SOURCES = \ r300_texture.c \ r300_texture_desc.c \ r300_tgsi_to_rc.c \ - r300_transfer.c + r300_transfer.c \ + \ + compiler/radeon_code.c \ + compiler/radeon_compiler.c \ + compiler/radeon_compiler_util.c \ + compiler/radeon_emulate_branches.c \ + compiler/radeon_emulate_loops.c \ + compiler/radeon_program.c \ + compiler/radeon_program_print.c \ + compiler/radeon_opcodes.c \ + compiler/radeon_program_alu.c \ + compiler/radeon_program_pair.c \ + compiler/radeon_program_tex.c \ + compiler/radeon_pair_translate.c \ + compiler/radeon_pair_schedule.c \ + compiler/radeon_pair_regalloc.c \ + compiler/radeon_pair_dead_sources.c \ + compiler/radeon_dataflow.c \ + compiler/radeon_dataflow_deadcode.c \ + compiler/radeon_dataflow_swizzles.c \ + compiler/radeon_list.c \ + compiler/radeon_optimize.c \ + compiler/radeon_remove_constants.c \ + compiler/radeon_rename_regs.c \ + compiler/radeon_variable.c \ + compiler/r3xx_fragprog.c \ + compiler/r300_fragprog.c \ + compiler/r300_fragprog_swizzle.c \ + compiler/r300_fragprog_emit.c \ + compiler/r500_fragprog.c \ + compiler/r500_fragprog_emit.c \ + compiler/r3xx_vertprog.c \ + compiler/r3xx_vertprog_dump.c \ + compiler/memory_pool.c \ + \ + $(TOP)/src/glsl/ralloc.c \ + $(TOP)/src/mesa/program/register_allocate.c -LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ - -I$(TOP)/include - -COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a -EXTRA_OBJECTS = \ - $(COMPILER_ARCHIVE) +LIBRARY_INCLUDES = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/glsl include ../../Makefile.template -.PHONY: $(COMPILER_ARCHIVE) -$(COMPILER_ARCHIVE): - $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler +test: default + @$(MAKE) -s -C compiler/tests/ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 3af157a7956..7ffd1c27c96 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -1,13 +1,11 @@ Import('*') -r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') - env = env.Clone() -# add the paths for r300compiler env.Append(CPPPATH = [ - '#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa', + '#/src/glsl', + '#/src/mapi', ]) r300 = env.ConvenienceLibrary( @@ -36,7 +34,41 @@ r300 = env.ConvenienceLibrary( 'r300_texture_desc.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', - ] + r300compiler) + r300compiler + 'compiler/radeon_code.c', + 'compiler/radeon_compiler.c', + 'compiler/radeon_compiler_util.c', + 'compiler/radeon_program.c', + 'compiler/radeon_program_print.c', + 'compiler/radeon_opcodes.c', + 'compiler/radeon_program_alu.c', + 'compiler/radeon_program_pair.c', + 'compiler/radeon_program_tex.c', + 'compiler/radeon_pair_translate.c', + 'compiler/radeon_pair_schedule.c', + 'compiler/radeon_pair_regalloc.c', + 'compiler/radeon_pair_dead_sources.c', + 'compiler/radeon_optimize.c', + 'compiler/radeon_remove_constants.c', + 'compiler/radeon_rename_regs.c', + 'compiler/radeon_emulate_branches.c', + 'compiler/radeon_emulate_loops.c', + 'compiler/radeon_dataflow.c', + 'compiler/radeon_dataflow_deadcode.c', + 'compiler/radeon_dataflow_swizzles.c', + 'compiler/radeon_variable.c', + 'compiler/radeon_list.c', + 'compiler/r3xx_fragprog.c', + 'compiler/r300_fragprog.c', + 'compiler/r300_fragprog_swizzle.c', + 'compiler/r300_fragprog_emit.c', + 'compiler/r500_fragprog.c', + 'compiler/r500_fragprog_emit.c', + 'compiler/r3xx_vertprog.c', + 'compiler/r3xx_vertprog_dump.c', + 'compiler/memory_pool.c', + '#/src/glsl/ralloc.c', + '#/src/mesa/program/register_allocate.c' + ]) env.Alias('r300', r300) diff --git a/src/gallium/drivers/r300/compiler/memory_pool.c b/src/gallium/drivers/r300/compiler/memory_pool.c new file mode 100644 index 00000000000..ddcdddf9e3c --- /dev/null +++ b/src/gallium/drivers/r300/compiler/memory_pool.c @@ -0,0 +1,97 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "memory_pool.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +#define POOL_LARGE_ALLOC 4096 +#define POOL_ALIGN 8 + + +struct memory_block { + struct memory_block * next; +}; + +void memory_pool_init(struct memory_pool * pool) +{ + memset(pool, 0, sizeof(struct memory_pool)); +} + + +void memory_pool_destroy(struct memory_pool * pool) +{ + while(pool->blocks) { + struct memory_block * block = pool->blocks; + pool->blocks = block->next; + free(block); + } +} + +static void refill_pool(struct memory_pool * pool) +{ + unsigned int blocksize = pool->total_allocated; + struct memory_block * newblock; + + if (!blocksize) + blocksize = 2*POOL_LARGE_ALLOC; + + newblock = (struct memory_block*)malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; + + pool->head = (unsigned char*)(newblock + 1); + pool->end = ((unsigned char*)newblock) + blocksize; + pool->total_allocated += blocksize; +} + + +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +{ + if (bytes < POOL_LARGE_ALLOC) { + void * ptr; + + if (pool->head + bytes > pool->end) + refill_pool(pool); + + assert(pool->head + bytes <= pool->end); + + ptr = pool->head; + + pool->head += bytes; + pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + + return ptr; + } else { + struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); + + block->next = pool->blocks; + pool->blocks = block; + + return (block + 1); + } +} + + diff --git a/src/gallium/drivers/r300/compiler/memory_pool.h b/src/gallium/drivers/r300/compiler/memory_pool.h new file mode 100644 index 00000000000..42344d0e3ba --- /dev/null +++ b/src/gallium/drivers/r300/compiler/memory_pool.h @@ -0,0 +1,80 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef MEMORY_POOL_H +#define MEMORY_POOL_H + +struct memory_block; + +/** + * Provides a pool of memory that can quickly be allocated from, at the + * cost of being unable to explicitly free one of the allocated blocks. + * Instead, the entire pool can be freed at once. + * + * The idea is to allow one to quickly allocate a flexible amount of + * memory during operations like shader compilation while avoiding + * reference counting headaches. + */ +struct memory_pool { + unsigned char * head; + unsigned char * end; + unsigned int total_allocated; + struct memory_block * blocks; +}; + + +void memory_pool_init(struct memory_pool * pool); +void memory_pool_destroy(struct memory_pool * pool); +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + + +/** + * Generic helper for growing an array that has separate size/count + * and reserved counters to accomodate up to num new element. + * + * type * Array; + * unsigned int Size; + * unsigned int Reserved; + * + * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k); + * assert(Size + k < Reserved); + * + * \note Size is not changed by this macro. + * + * \warning Array, Size, Reserved have to be lvalues and may be evaluated + * several times. + */ +#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ + unsigned int _num = (num); \ + if ((size) + _num > (reserved)) { \ + unsigned int newreserve = (reserved) * 2; \ + type * newarray; \ + if (newreserve < _num) \ + newreserve = 4 * _num; /* arbitrary heuristic */ \ + newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ + memcpy(newarray, (array), (size) * sizeof(type)); \ + (array) = newarray; \ + (reserved) = newreserve; \ + } \ +} while(0) + +#endif /* MEMORY_POOL_H */ diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.c b/src/gallium/drivers/r300/compiler/r300_fragprog.c new file mode 100644 index 00000000000..deba9ca834d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.c @@ -0,0 +1,338 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_fragprog.h" + +#include <stdio.h> + +#include "../r300_reg.h" + +static void presub_string(char out[10], unsigned int inst) +{ + switch(inst & 0x600000){ + case R300_ALU_SRCP_1_MINUS_2_SRC0: + sprintf(out, "bias"); + break; + case R300_ALU_SRCP_SRC1_MINUS_SRC0: + sprintf(out, "sub"); + break; + case R300_ALU_SRCP_SRC1_PLUS_SRC0: + sprintf(out, "add"); + break; + case R300_ALU_SRCP_1_MINUS_SRC0: + sprintf(out, "inv "); + break; + } +} + +static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +{ + return (r400_ext_addr & bit) ? 1 << 5 : 0; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + if (c->is_r400) { + fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); + } + + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + + (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); + unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + + (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + + fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " + "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); + + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; + i <= tex_offset + tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { + char srcc[4][10], dstc[20]; + char srca[4][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', (regc & 31) | msbc); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', (rega & 31) | msba); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_RGB_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + + sprintf(dstc, "t%i.%s ", + ((code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) + & 31) | msb, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + rgb_addr >> 29) & 3, + flags); + strcat(dstc, tmp); + } + /* Presub */ + presub_string(srcc[3], code->alu.inst[i].rgb_inst); + presub_string(srca[3], code->alu.inst[i].alpha_inst); + + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_A_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + sprintf(dsta, "t%i.w ", + ((code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) + | msb); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + alpha_addr >> 25) & 3); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n" + " w: %3s %3s %3s %5s-> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], srcc[3], dstc, + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], srca[3], dsta, + code->alu.inst[i].alpha_addr); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d < 20 ) { + switch(d) { + case R300_ALU_ARGC_SRCP_XYZ: + sprintf(buf, "srcp.xyz"); + break; + case R300_ALU_ARGC_SRCP_XXX: + sprintf(buf, "srcp.xxx"); + break; + case R300_ALU_ARGC_SRCP_YYY: + sprintf(buf, "srcp.yyy"); + break; + case R300_ALU_ARGC_SRCP_ZZZ: + sprintf(buf, "srcp.zzz"); + break; + case R300_ALU_ARGC_SRCP_WWW: + sprintf(buf, "srcp.www"); + break; + } + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d < 16) { + switch(d) { + case R300_ALU_ARGA_SRCP_X: + sprintf(buf, "srcp.x"); + break; + case R300_ALU_ARGA_SRCP_Y: + sprintf(buf, "srcp.y"); + break; + case R300_ALU_ARGA_SRCP_Z: + sprintf(buf, "srcp.z"); + break; + case R300_ALU_ARGA_SRCP_W: + sprintf(buf, "srcp.w"); + break; + } + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].rgb_inst, + code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? + "NOP" : "", + arga[0], arga[1],arga[2], + code->alu.inst[i].alpha_inst); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.h b/src/gallium/drivers/r300/compiler/r300_fragprog.h new file mode 100644 index 00000000000..0c88bab2f33 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + + +extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); + +extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user); + +#endif diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c new file mode 100644 index 00000000000..e6fd1fde62d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c @@ -0,0 +1,536 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs <[email protected]> + * + * \author Jerome Glisse <[email protected]> + */ + +#include "r300_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" +#include "r300_fragprog_swizzle.h" + + +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + +static unsigned int get_msbs_alu(unsigned int bits) +{ + return (bits >> 6) & 0x7; +} + +/** + * @param lsbs The number of least significant bits + */ +static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +{ + return (bits >> lsbs) & 0x15; +} + +#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) +{ + if (index > code->pixsize) + code->pixsize = index; +} + +static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) +{ + if (!src.Used) + return 0; + + if (src.File == RC_FILE_CONSTANT) { + return src.Index | (1 << 5); + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index & 0x1f; + } + + return 0; +} + + +static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTC_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; + case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; + case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; + case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTA_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; + case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; + case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; + case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; + case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; + case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; + case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) +{ + int ip; + int j; + PROG_CODE; + + if (code->alu.length >= c->Base.max_alu_insts) { + error("Too many ALU instructions"); + return 0; + } + + ip = code->alu.length++; + + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + /* Set the RGB address */ + unsigned int src = use_source(code, inst->RGB.Src[j]); + unsigned int arg; + if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); + + code->alu.inst[ip].rgb_addr |= src << (6*j); + + /* Set the Alpha address */ + src = use_source(code, inst->Alpha.Src[j]); + if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); + + code->alu.inst[ip].alpha_addr |= src << (6*j); + + arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].rgb_inst |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].alpha_inst |= arg << (7*j); + } + + /* Presubtract */ + if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + + if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; + code->alu.inst[ip].rgb_addr |= + ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].rgb_addr |= + (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | + R300_RGB_TARGET(inst->RGB.Target); + emit->node_flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; + code->alu.inst[ip].alpha_addr |= + ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | + R300_ALPHA_TARGET(inst->Alpha.Target); + emit->node_flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = 1; + } + if (inst->Nop) + code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; + + return 1; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static int finish_node(struct r300_emit_state * emit) +{ + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; + unsigned alu_offset; + unsigned alu_end; + unsigned tex_offset; + unsigned tex_end; + + unsigned int alu_offset_msbs, alu_end_msbs; + + if (code->alu.length == emit->node_first_alu) { + /* Generate a single NOP for this node */ + struct rc_pair_instruction inst; + memset(&inst, 0, sizeof(inst)); + if (!emit_alu(emit, &inst)) + return 0; + } + + alu_offset = emit->node_first_alu; + alu_end = code->alu.length - alu_offset - 1; + tex_offset = emit->node_first_tex; + tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); + return 0; + } + + tex_end = 0; + } else { + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; + } + + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + ((alu_offset << R300_ALU_START_SHIFT) + & R300_ALU_START_MASK) + | ((alu_end << R300_ALU_SIZE_SHIFT) + & R300_ALU_SIZE_MASK) + | ((tex_offset << R300_TEX_START_SHIFT) + & R300_TEX_START_MASK) + | ((tex_end << R300_TEX_SIZE_SHIFT) + & R300_TEX_SIZE_MASK) + | emit->node_flags + | (get_msbs_tex(tex_offset, 5) + << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 5) + << R400_TEX_SIZE_MSB_SHIFT) + ; + + /* Write r400 extended instruction fields. These will be ignored on + * r300 cards. */ + alu_offset_msbs = get_msbs_alu(alu_offset); + alu_end_msbs = get_msbs_alu(alu_end); + switch(emit->current_node) { + case 0: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START3_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; + break; + case 1: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START2_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; + break; + case 2: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START1_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; + break; + case 3: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START0_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; + break; + } + return 1; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static int begin_tex(struct r300_emit_state * emit) +{ + PROG_CODE; + + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return 1; + } + + if (emit->current_node == 3) { + error("Too many texture indirections"); + return 0; + } + + if (!finish_node(emit)) + return 0; + + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; + return 1; +} + + +static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) +{ + unsigned int unit; + unsigned int dest; + unsigned int opcode; + PROG_CODE; + + if (code->tex.length >= emit->compiler->Base.max_tex_insts) { + error("Too many TEX instructions"); + return 0; + } + + unit = inst->U.I.TexSrcUnit; + dest = inst->U.I.DstReg.Index; + + switch(inst->U.I.Opcode) { + case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); + return 0; + } + + if (inst->U.I.Opcode == RC_OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->U.I.SrcReg[0].Index); + + code->tex.inst[code->tex.length++] = + ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) + & R300_SRC_ADDR_MASK) + | ((dest << R300_DST_ADDR_SHIFT) + & R300_DST_ADDR_MASK) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT) + | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? + R400_SRC_ADDR_EXT_BIT : 0) + | (dest >= R300_PFS_NUM_TEMP_REGS ? + R400_DST_ADDR_EXT_BIT : 0) + ; + return 1; +} + + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + unsigned int tex_end; + + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; + + memset(code, 0, sizeof(struct r300_fragment_program_code)); + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + begin_tex(&emit); + continue; + } + + emit_tex(&emit, inst); + } else { + emit_alu(&emit, &inst->U.P); + } + } + + if (code->pixsize >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); + + if (compiler->Base.Error) + return; + + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + + /* Set r400 extended instruction fields. These values will be ignored + * on r300 cards. */ + code->r400_code_offset_ext |= + (get_msbs_alu(0) + << R400_ALU_OFFSET_MSB_SHIFT) + | (get_msbs_alu(code->alu.length - 1) + << R400_ALU_SIZE_MSB_SHIFT); + + tex_end = code->tex.length ? code->tex.length - 1 : 0; + code->code_offset = + ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + & R300_PFS_CNTL_ALU_OFFSET_MASK) + | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) + & R300_PFS_CNTL_ALU_END_MASK) + | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + & R300_PFS_CNTL_TEX_OFFSET_MASK) + | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) + & R300_PFS_CNTL_TEX_END_MASK) + | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) + ; + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = emit.current_node; i >= 0; --i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS + || code->alu.length > R300_PFS_MAX_ALU_INST + || code->tex.length > R300_PFS_MAX_TEX_INST) { + + code->r390_mode = 1; + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c new file mode 100644 index 00000000000..b7bca8c0cfa --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r300_fragprog_swizzle.h" + +#include <stdio.h> + +#include "../r300_reg.h" +#include "radeon_compiler.h" + +#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) + +struct swizzle_data { + unsigned int hash; /**< swizzle value this matches */ + unsigned int base; /**< base value for hw swizzle */ + unsigned int stride; /**< difference in base between arg0/1/2 */ + unsigned int srcp_stride; /**< difference in base between arg0/scrp */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + unsigned int swz = GET_SWZ(swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + +/** + * Determines if the given swizzle is valid for r300/r400. In most situations + * it is better to use r300_swizzle_is_native() which can be accesed via + * struct radeon_compiler *c; c->SwizzleCaps->IsNative(). + */ +int r300_swizzle_is_native_basic(unsigned int swizzle) +{ + if(lookup_native_swizzle(swizzle)) + return 1; + else + return 0; +} + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + const struct swizzle_data* sd; + unsigned int relevant; + int j; + + if (opcode == RC_OPCODE_KIL || + opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP) { + if (reg.Abs || reg.Negate) + return 0; + + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(reg.Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != j) + return 0; + } + + return 1; + } + + relevant = 0; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) + relevant |= 1 << j; + + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + sd = lookup_native_swizzle(reg.Swizzle); + if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) + return 0; + + return 1; +} + + +static void r300_swizzle_split( + struct rc_src_register src, unsigned int mask, + struct rc_swizzle_split * split) +{ + split->NumPhases = 0; + + while(mask) { + unsigned int best_matchcount = 0; + unsigned int best_matchmask = 0; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + unsigned int matchcount = 0; + unsigned int matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + unsigned int swz; + if (!GET_BIT(mask, comp)) + continue; + swz = GET_SWZ(src.Swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (mask & RC_MASK_XYZ)) + break; + } + } + + if (mask & RC_MASK_W) + best_matchmask |= RC_MASK_W; + + split->Phase[split->NumPhases++] = best_matchmask; + mask &= ~best_matchmask; + } +} + +struct rc_swizzle_caps r300_swizzle_caps = { + .IsNative = r300_swizzle_is_native, + .Split = r300_swizzle_split +}; + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { + fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); + return 0; + } + + if (src == RC_PAIR_PRESUB_SRC) { + return sd->base + sd->srcp_stride; + } else { + return sd->base + src*sd->stride; + } +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) +{ + unsigned int swz = GET_SWZ(swizzle, 0); + if (src == RC_PAIR_PRESUB_SRC) { + return R300_ALU_ARGA_SRCP_X + swz; + } + if (swz < 3) + return swz + 3*src; + + switch(swz) { + case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h new file mode 100644 index 00000000000..f2635be140d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_SWIZZLE_H_ +#define __R300_FRAGPROG_SWIZZLE_H_ + +#include "radeon_swizzle.h" + +extern struct rc_swizzle_caps r300_swizzle_caps; + +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); +int r300_swizzle_is_native_basic(unsigned int swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c new file mode 100644 index 00000000000..bb6c010e8e3 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,172 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdio.h> + +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" +#include "radeon_program_alu.h" +#include "radeon_program_tex.h" +#include "radeon_rename_regs.h" +#include "radeon_remove_constants.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void dataflow_outputs_mark_use(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_fragment_program_compiler * c = userdata; + callback(data, c->OutputColor[0], RC_MASK_XYZW); + callback(data, c->OutputColor[1], RC_MASK_XYZW); + callback(data, c->OutputColor[2], RC_MASK_XYZW); + callback(data, c->OutputColor[3], RC_MASK_XYZW); + callback(data, c->OutputDepth, RC_MASK_W); +} + +static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct rc_instruction *rci; + + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction * inst = &rci->U.I; + unsigned i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); + + if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; + + if (inst->DstReg.WriteMask & RC_MASK_Z) { + inst->DstReg.WriteMask = RC_MASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + if (!info->IsComponentwise) { + continue; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); + } + } +} + +static int radeon_saturate_output( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + + if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT) + return 0; + + inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + return 1; +} + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + int sat_out = c->state.frag_clamp; + + /* Lists of instruction transformations. */ + struct radeon_program_transformation saturate_output[] = { + { &radeon_saturate_output, c }, + { 0, 0 } + }; + + struct radeon_program_transformation rewrite_tex[] = { + { &radeonTransformTEX, c }, + { 0, 0 } + }; + + struct radeon_program_transformation rewrite_if[] = { + { &r500_transform_IF, 0 }, + {0, 0} + }; + + struct radeon_program_transformation native_rewrite_r500[] = { + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation native_rewrite_r300[] = { + { &radeonTransformALU, 0 }, + { &r300_transform_trig_simple, 0 }, + { 0, 0 } + }; + + /* List of compiler passes. */ + struct radeon_compiler_pass fs_list[] = { + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL}, + /* This transformation needs to be done before any of the IF + * instructions are modified. */ + {"transform KILP", 1, 1, rc_transform_KILP, NULL}, + {"unroll loops", 1, is_r500, rc_unroll_loops, NULL}, + {"transform loops", 1, !is_r500, rc_transform_loops, NULL}, + {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output}, + {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, + {"transform IF", 1, is_r500, rc_local_transform, rewrite_if}, + {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, + {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, + {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use}, + {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + /* This pass makes it easier for the scheduler to group TEX + * instructions and reduces the chances of creating too + * many texture indirections.*/ + {"register rename", 1, !is_r500, rc_rename_regs, NULL}, + {"pair translate", 1, 1, rc_pair_translate, NULL}, + {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, + {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, + {"register allocation", 1, 1, rc_pair_regalloc, &opt}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, + {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, + {"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL}, + {"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL}, + {NULL, 0, 0, NULL, NULL} + }; + + c->Base.type = RC_FRAGMENT_PROGRAM; + c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; + + rc_run_compiler(&c->Base, fs_list); + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c new file mode 100644 index 00000000000..654f9a070d5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,1045 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdio.h> + +#include "../r300_reg.h" + +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_program_alu.h" +#include "radeon_swizzle.h" +#include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" +#include "radeon_remove_constants.h" + +struct loop { + int BgnLoop; + +}; + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(vpi->SrcReg[x].File), \ + RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + + +static unsigned long t_dst_mask(unsigned int mask) +{ + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; +} + +static unsigned long t_dst_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case RC_FILE_OUTPUT: + return PVS_DST_REG_OUT; + case RC_FILE_ADDRESS: + return PVS_DST_REG_A0; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_NONE: + case RC_FILE_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case RC_FILE_INPUT: + return PVS_SRC_REG_INPUT; + case RC_FILE_CONSTANT: + return PVS_SRC_REG_CONSTANT; + } +} + +static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return 0; + if (aclass == PVS_SRC_REG_TEMPORARY) + return 0; + + if (a.RelAddr || b.RelAddr) + return 1; + if (a.Index != b.Index) + return 1; + + return 0; +} + +static inline unsigned long t_swizzle(unsigned int swizzle) +{ + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + if (src->File == RC_FILE_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | + (src->RelAddr << 4) | (src->Abs << 3); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (src->RelAddr << 4) | (src->Abs << 3); +} + +static int valid_dst(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { + assert(dst->Index == 0); + } + + return 1; +} + +static void ei_vector1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_vector2(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); +} + +static void ei_math1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_lit(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); +} + +static void ei_mad(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + unsigned int i; + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && + vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && + vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + 0, + 1, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + /* Arguments with constant swizzles still count as a unique + * temporary, so we should make sure these arguments share a + * register index with one of the other arguments. */ + for (i = 0; i < 3; i++) { + unsigned int j; + if (vpi->SrcReg[i].File != RC_FILE_NONE) + continue; + + for (j = 0; j < 3; j++) { + if (i != j) { + vpi->SrcReg[i].Index = + vpi->SrcReg[j].Index; + break; + } + } + } + } + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} + +static void ei_pow(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} + +static void mark_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned int * writemasks = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= R300_VS_MAX_TEMPS) + return; + + writemasks[index] |= mask; +} + +static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler) +{ + return PVS_SRC_OPERAND(compiler->PredicateIndex, + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_W), + t_src_class(RC_FILE_TEMPORARY), + 0); +} + +static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler, + unsigned int hw_opcode, int is_math) +{ + return PVS_OP_DST_OPERAND(hw_opcode, + is_math, + 0, + compiler->PredicateIndex, + RC_MASK_W, + t_dst_class(RC_FILE_TEMPORARY)); + +} + +static void ei_if(struct r300_vertex_program_compiler * compiler, + struct rc_instruction *rci, + unsigned int * inst, + unsigned int branch_depth) +{ + unsigned int predicate_opcode; + int is_math = 0; + + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode IF not supported\n"); + return; + } + + /* Reserve a temporary to use as our predicate stack counter, if we + * don't already have one. */ + if (!compiler->PredicateMask) { + unsigned int writemasks[RC_REGISTER_MAX_INDEX]; + struct rc_instruction * inst; + unsigned int i; + memset(writemasks, 0, sizeof(writemasks)); + for(inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions; + inst = inst->Next) { + rc_for_all_writes_mask(inst, mark_write, writemasks); + } + for(i = 0; i < compiler->Base.max_temp_regs; i++) { + unsigned int mask = ~writemasks[i] & RC_MASK_XYZW; + /* Only the W component can be used fo the predicate + * stack counter. */ + if (mask & RC_MASK_W) { + compiler->PredicateMask = RC_MASK_W; + compiler->PredicateIndex = i; + break; + } + } + if (i == compiler->Base.max_temp_regs) { + rc_error(&compiler->Base, "No free temporary to use for" + " predicate stack counter.\n"); + return; + } + } + predicate_opcode = + branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ; + + rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0)); + if (branch_depth == 0) { + is_math = 1; + predicate_opcode = ME_PRED_SET_NEQ; + inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + inst[2] = 0; + } else { + predicate_opcode = VE_PRED_SET_NEQ_PUSH; + inst[1] = t_pred_src(compiler); + inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + } + + inst[0] = t_pred_dst(compiler, predicate_opcode, is_math); + inst[3] = 0; + +} + +static void ei_else(struct r300_vertex_program_compiler * compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ELSE not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void ei_endif(struct r300_vertex_program_compiler *compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ENDIF not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void translate_vertex_program(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; + struct rc_instruction *rci; + + struct loop * loops = NULL; + int current_loop_depth = 0; + int loops_reserved = 0; + + unsigned int branch_depth = 0; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + compiler->code->num_temporaries = 0; + + compiler->SetHwInputOutput(compiler); + + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction *vpi = &rci->U.I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; + const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode); + + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; + + if (info->HasDstReg) { + /* Neither is Saturate. */ + if (vpi->SaturateMode != RC_SATURATE_NONE) { + rc_error(&compiler->Base, "Vertex program does not support the Saturate " + "modifier (yet).\n"); + } + } + + if (compiler->code->length >= c->max_alu_insts * 4) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } + + assert(compiler->Base.is_r500 || + (vpi->Opcode != RC_OPCODE_SEQ && + vpi->Opcode != RC_OPCODE_SNE)); + + switch (vpi->Opcode) { + case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; + case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_ELSE: ei_else(compiler, inst); break; + case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break; + case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break; + case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; + case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; + case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; + case RC_OPCODE_BGNLOOP: + { + struct loop * l; + + if ((!compiler->Base.is_r500 + && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) + || loops_reserved >= R500_VS_MAX_FC_DEPTH) { + rc_error(&compiler->Base, + "Loops are nested too deep."); + return; + } + memory_pool_array_reserve(&compiler->Base.Pool, + struct loop, loops, current_loop_depth, + loops_reserved, 1); + l = &loops[current_loop_depth++]; + memset(l , 0, sizeof(struct loop)); + l->BgnLoop = (compiler->code->length / 4); + continue; + } + case RC_OPCODE_ENDLOOP: + { + struct loop * l; + unsigned int act_addr; + unsigned int last_addr; + unsigned int ret_addr; + + assert(loops); + l = &loops[current_loop_depth - 1]; + act_addr = l->BgnLoop - 1; + last_addr = (compiler->code->length / 4) - 1; + ret_addr = l->BgnLoop; + + if (loops_reserved >= R300_VS_MAX_FC_OPS) { + rc_error(&compiler->Base, + "Too many flow control instructions."); + return; + } + if (compiler->Base.is_r500) { + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].lw = + R500_PVS_FC_ACT_ADRS(act_addr) + | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) + ; + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].uw = + R500_PVS_FC_LAST_INST(last_addr) + | R500_PVS_FC_RTN_INST(ret_addr) + ; + } else { + compiler->code->fc_op_addrs.r300 + [compiler->code->num_fc_ops] = + R300_PVS_FC_ACT_ADRS(act_addr) + | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) + | R300_PVS_FC_LAST_INST(last_addr) + | R300_PVS_FC_RTN_INST(ret_addr) + ; + } + compiler->code->fc_loop_index[compiler->code->num_fc_ops] = + R300_PVS_FC_LOOP_INIT_VAL(0x0) + | R300_PVS_FC_LOOP_STEP_VAL(0x1) + ; + compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( + compiler->code->num_fc_ops); + compiler->code->num_fc_ops++; + current_loop_depth--; + continue; + } + + default: + rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name); + return; + } + + /* Non-flow control instructions that are inside an if statement + * need to pay attention to the predicate bit. */ + if (branch_depth + && vpi->Opcode != RC_OPCODE_IF + && vpi->Opcode != RC_OPCODE_ELSE + && vpi->Opcode != RC_OPCODE_ENDIF) { + + inst[0] |= (PVS_DST_PRED_ENABLE_MASK + << PVS_DST_PRED_ENABLE_SHIFT); + inst[0] |= (PVS_DST_PRED_SENSE_MASK + << PVS_DST_PRED_SENSE_SHIFT); + } + + /* Update the number of temporaries. */ + if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY && + vpi->DstReg.Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->DstReg.Index + 1; + + for (unsigned i = 0; i < info->NumSrcRegs; i++) + if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY && + vpi->SrcReg[i].Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1; + + if (compiler->PredicateMask) + if (compiler->PredicateIndex >= compiler->code->num_temporaries) + compiler->code->num_temporaries = compiler->PredicateIndex + 1; + + if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) { + rc_error(&compiler->Base, "Too many temporaries.\n"); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } +} + +struct temporary_allocation { + unsigned int Allocated:1; + unsigned int HwTemp:15; + struct rc_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; + struct rc_instruction *inst; + struct rc_instruction *end_loop = NULL; + unsigned int num_orig_temps = 0; + char hwtemps[RC_REGISTER_MAX_INDEX]; + struct temporary_allocation * ta; + unsigned int i, j; + + memset(hwtemps, 0, sizeof(hwtemps)); + + rc_recompute_ips(c); + + /* Pass 1: Count original temporaries. */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (inst->U.I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->U.I.SrcReg[i].Index + 1; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + if (inst->U.I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->U.I.DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + /* Instructions inside of loops need to use the ENDLOOP + * instruction as their LastRead. */ + if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + int endloops = 1; + struct rc_instruction * ptr; + for(ptr = inst->Next; + ptr != &compiler->Base.Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + endloops++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + endloops--; + if (endloops <= 0) { + end_loop = ptr; + break; + } + } + } + } + + if (inst == end_loop) { + end_loop = NULL; + continue; + } + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; + } + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = 0; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < c->max_temp_regs; ++j) { + if (!hwtemps[j]) + break; + } + ta[orig].Allocated = 1; + ta[orig].HwTemp = j; + hwtemps[ta[orig].HwTemp] = 1; + } + + inst->U.I.DstReg.Index = ta[orig].HwTemp; + } + } + } +} + +/** + * R3xx-R4xx vertex engine does not support the Absolute source operand modifier + * and the Saturate opcode modifier. Only Absolute is currently transformed. + */ +static int transform_nonnative_modifiers( + struct radeon_compiler *c, + struct rc_instruction *inst, + void* unused) +{ + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + /* Transform ABS(a) to MAX(a, -a). */ + for (i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].Abs) { + struct rc_instruction *new_inst; + unsigned temp; + + inst->U.I.SrcReg[i].Abs = 0; + + temp = rc_find_free_temporary(c); + + new_inst = rc_insert_new_instruction(c, inst->Prev); + new_inst->U.I.Opcode = RC_OPCODE_MAX; + new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + new_inst->U.I.DstReg.Index = temp; + new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + + memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = temp; + inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; + } + } + return 1; +} + +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static int transform_source_conflicts( + struct radeon_compiler *c, + struct rc_instruction* inst, + void* unused) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->NumSrcRegs == 3) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) + || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + + reset_srcreg(&inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[2].Index = tmpreg; + } + } + + if (opcode->NumSrcRegs >= 2) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + + reset_srcreg(&inst->U.I.SrcReg[1]); + inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[1].Index = tmpreg; + } + } + + return 1; +} + +static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c; + int i; + + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = i; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler->Base.Program.OutputsWritten |= 1 << i; + } + } +} + +static void dataflow_outputs_mark_used(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_vertex_program_compiler * c = userdata; + int i; + + for(i = 0; i < 32; ++i) { + if (c->RequiredOutputs & (1 << i)) + callback(data, i, RC_MASK_XYZW); + } +} + +static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + (void) opcode; + (void) reg; + + return 1; +} + +static void transform_negative_addressing(struct r300_vertex_program_compiler *c, + struct rc_instruction *arl, + struct rc_instruction *end, + int min_offset) +{ + struct rc_instruction *inst, *add; + unsigned const_swizzle; + + /* Transform ARL */ + add = rc_insert_new_instruction(&c->Base, arl->Prev); + add->U.I.Opcode = RC_OPCODE_ADD; + add->U.I.DstReg.File = RC_FILE_TEMPORARY; + add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); + add->U.I.DstReg.WriteMask = RC_MASK_X; + add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; + add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, + min_offset, &const_swizzle); + add->U.I.SrcReg[1].Swizzle = const_swizzle; + + arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; + arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; + + /* Rewrite offsets up to and excluding inst. */ + for (inst = arl->Next; inst != end; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) + if (inst->U.I.SrcReg[i].RelAddr) + inst->U.I.SrcReg[i].Index -= min_offset; + } +} + +static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user) +{ + struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler; + struct rc_instruction *inst, *lastARL = NULL; + int min_offset = 0; + + for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (inst->U.I.Opcode == RC_OPCODE_ARL) { + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); + + lastARL = inst; + min_offset = 0; + continue; + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].RelAddr && + inst->U.I.SrcReg[i].Index < 0) { + /* ARL must precede any indirect addressing. */ + if (lastARL == NULL) { + rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL."); + return; + } + + if (inst->U.I.SrcReg[i].Index < min_offset) + min_offset = inst->U.I.SrcReg[i].Index; + } + } + } + + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); +} + +static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { + .IsNative = &swizzle_is_native, + .Split = 0 /* should never be called */ +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) +{ + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + + /* Lists of instruction transformations. */ + struct radeon_program_transformation alu_rewrite_r500[] = { + { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_scale_vertex, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation alu_rewrite_r300[] = { + { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_simple, 0 }, + { 0, 0 } + }; + + /* Note: These passes have to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * or non-native modifiers will not be treated properly. + */ + struct radeon_program_transformation emulate_modifiers[] = { + { &transform_nonnative_modifiers, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation resolve_src_conflicts[] = { + { &transform_source_conflicts, 0 }, + { 0, 0 } + }; + + /* List of compiler passes. */ + struct radeon_compiler_pass vs_list[] = { + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL}, + {"transform loops", 1, 1, rc_transform_loops, NULL}, + {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL}, + {"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500}, + {"native rewrite", 1, !is_r500, rc_local_transform, alu_rewrite_r300}, + {"emulate modifiers", 1, !is_r500, rc_local_transform, emulate_modifiers}, + {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_used}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + /* This pass must be done after optimizations. */ + {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, + {"register allocation", 1, opt, allocate_temporary_registers, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, 1, translate_vertex_program, NULL}, + {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, + {NULL, 0, 0, NULL, NULL} + }; + + c->Base.type = RC_VERTEX_PROGRAM; + c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + + rc_run_compiler(&c->Base, vs_list); + + c->code->InputsRead = c->Base.Program.InputsRead; + c->code->OutputsWritten = c->Base.Program.OutputsWritten; + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c new file mode 100644 index 00000000000..2bc0a87eed8 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c @@ -0,0 +1,207 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" +#include "radeon_code.h" +#include "../r300_reg.h" + +#include <stdio.h> + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_COND_MUX_EQ", + " VE_COND_MUX_GT", + " VE_COND_MUX_GTE", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + + +static void r300_vs_op_dump(uint32_t op) +{ + fprintf(stderr, " dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { + fprintf(stderr, "PRED %u", + (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); + } + if (op & 0x80) { + if (op & 0x1) { + fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +static void r300_vs_src_dump(uint32_t src) +{ + fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + +void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user) +{ + struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler; + struct r300_vertex_program_code * vs = c->code; + unsigned instrcount = vs->length / 4; + unsigned i; + + fprintf(stderr, "Final vertex program code:\n"); + + for(i = 0; i < instrcount; i++) { + unsigned offset = i*4; + unsigned src; + + fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); + + for(src = 0; src < 3; ++src) { + fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); + r300_vs_src_dump(vs->body.d[offset+1+src]); + } + } + + fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); + for(i = 0; i < vs->num_fc_ops; i++) { + switch((vs->fc_ops >> (i * 2)) & 0x3 ) { + case 0: fprintf(stderr, "NOP"); break; + case 1: fprintf(stderr, "JUMP"); break; + case 2: fprintf(stderr, "LOOP"); break; + case 3: fprintf(stderr, "JSR"); break; + } + if (c->Base.is_r500) { + fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n", + vs->fc_op_addrs.r500[i].uw, + vs->fc_op_addrs.r500[i].lw); + } else { + fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c new file mode 100644 index 00000000000..cf99f5e4538 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c @@ -0,0 +1,539 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r500_fragprog.h" + +#include <stdio.h> + +#include "radeon_compiler_util.h" +#include "radeon_list.h" +#include "radeon_variable.h" +#include "../r300_reg.h" + +/** + * Rewrite IF instructions to use the ALU result special register. + */ +int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst_if, + void *data) +{ + struct rc_variable * writer; + struct rc_list * writer_list, * list_ptr; + struct rc_list * var_list = rc_get_variables(c); + unsigned int generic_if = 0; + unsigned int alu_chan; + + if (inst_if->U.I.Opcode != RC_OPCODE_IF) { + return 0; + } + + writer_list = rc_variable_list_get_writers( + var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); + if (!writer_list) { + generic_if = 1; + } else { + + /* Make sure it is safe for the writers to write to + * ALU Result */ + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + struct rc_instruction * inst; + writer = list_ptr->Item; + /* We are going to modify the destination register + * of writer, so if it has a reader other than + * inst_if (aka ReaderCount > 1) we must fall back to + * our generic IF. + * If the writer has a lower IP than inst_if, this + * means that inst_if is above the writer in a loop. + * I'm not sure why this would ever happen, but + * if it does we want to make sure we fall back + * to our generic IF. */ + if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { + generic_if = 1; + break; + } + + /* The ALU Result is not preserved across IF + * instructions, so if there is another IF + * instruction between writer and inst_if, then + * we need to fall back to generic IF. */ + for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + if (info->IsFlowControl) { + generic_if = 1; + break; + } + } + if (generic_if) { + break; + } + } + } + + if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { + alu_chan = RC_ALURESULT_X; + } else { + alu_chan = RC_ALURESULT_W; + } + if (generic_if) { + struct rc_instruction * inst_mov = + rc_insert_new_instruction(c, inst_if->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.DstReg.File = RC_FILE_NONE; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.WriteALUResult = alu_chan; + inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + if (alu_chan == RC_ALURESULT_X) { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + } else { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); + } + } else { + rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; + unsigned int reverse_srcs = 0; + unsigned int preserve_opcode = 0; + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + writer = list_ptr->Item; + switch(writer->Inst->U.I.Opcode) { + case RC_OPCODE_SEQ: + compare_func = RC_COMPARE_FUNC_EQUAL; + break; + case RC_OPCODE_SNE: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + break; + case RC_OPCODE_SLE: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SGE: + compare_func = RC_COMPARE_FUNC_GEQUAL; + break; + case RC_OPCODE_SGT: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SLT: + compare_func = RC_COMPARE_FUNC_LESS; + break; + default: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + preserve_opcode = 1; + break; + } + if (!preserve_opcode) { + writer->Inst->U.I.Opcode = RC_OPCODE_SUB; + } + writer->Inst->U.I.DstReg.WriteMask = 0; + writer->Inst->U.I.DstReg.File = RC_FILE_NONE; + writer->Inst->U.I.WriteALUResult = alu_chan; + writer->Inst->U.I.ALUResultCompare = compare_func; + if (reverse_srcs) { + struct rc_src_register temp_src; + temp_src = writer->Inst->U.I.SrcReg[0]; + writer->Inst->U.I.SrcReg[0] = + writer->Inst->U.I.SrcReg[1]; + writer->Inst->U.I.SrcReg[1] = temp_src; + } + } + } + + inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE( + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + inst_if->U.I.SrcReg[0].Negate = 0; + + return 1; +} + +static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + unsigned int relevant; + int i; + + if (opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_TXD || + opcode == RC_OPCODE_TXL || + opcode == RC_OPCODE_KIL) { + if (reg.Abs) + return 0; + + if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) + return 0; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) { + reg.Negate &= ~(1 << i); + continue; + } + if (swz >= 4) + return 0; + } + + if (reg.Negate) + return 0; + + return 1; + } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) + return 1; + + return 0; + } else { + /* ALU instructions support almost everything */ + relevant = 0; + for(i = 0; i < 3; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + return 1; + } +} + +/** + * Split source register access. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. + */ +static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, + struct rc_swizzle_split * split) +{ + unsigned int negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(src.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) + continue; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; + } + + split->NumPhases = 0; + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + split->Phase[split->NumPhases++] = negatebase[i]; + } +} + +struct rc_swizzle_caps r500_swizzle_caps = { + .IsNative = r500_swizzle_is_native, + .Split = r500_swizzle_split +}; + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "H"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +void r500FragmentProgramDump(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + int n, i; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case R500_INST_TYPE_ALU: + case R500_INST_TYPE_OUT: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3, (inst >> 29) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 29) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case R500_INST_TYPE_FC: + fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + /* JUMP_FUNC JUMP_ANY*/ + fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, + (inst & R500_FC_JUMP_ANY) >> 5); + + /* OP */ + switch(inst & 0x7){ + case R500_FC_OP_JUMP: + fprintf(stderr, "JUMP"); + break; + case R500_FC_OP_LOOP: + fprintf(stderr, "LOOP"); + break; + case R500_FC_OP_ENDLOOP: + fprintf(stderr, "ENDLOOP"); + break; + case R500_FC_OP_REP: + fprintf(stderr, "REP"); + break; + case R500_FC_OP_ENDREP: + fprintf(stderr, "ENDREP"); + break; + case R500_FC_OP_BREAKLOOP: + fprintf(stderr, "BREAKLOOP"); + break; + case R500_FC_OP_BREAKREP: + fprintf(stderr, "BREAKREP"); + break; + case R500_FC_OP_CONTINUE: + fprintf(stderr, "CONTINUE"); + break; + } + fprintf(stderr," "); + /* A_OP */ + switch(inst & (0x3 << 6)){ + case R500_FC_A_OP_NONE: + fprintf(stderr, "NONE"); + break; + case R500_FC_A_OP_POP: + fprintf(stderr, "POP"); + break; + case R500_FC_A_OP_PUSH: + fprintf(stderr, "PUSH"); + break; + } + /* B_OP0 B_OP1 */ + for(i=0; i<2; i++){ + fprintf(stderr, " "); + switch(inst & (0x3 << (24 + (i * 2)))){ + /* R500_FC_B_OP0_NONE + * R500_FC_B_OP1_NONE */ + case 0: + fprintf(stderr, "NONE"); + break; + case R500_FC_B_OP0_DECR: + case R500_FC_B_OP1_DECR: + fprintf(stderr, "DECR"); + break; + case R500_FC_B_OP0_INCR: + case R500_FC_B_OP1_INCR: + fprintf(stderr, "INCR"); + break; + } + } + /*POP_CNT B_ELSE */ + fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4); + inst = code->inst[n].inst3; + /* JUMP_ADDR */ + fprintf(stderr, " %d", inst >> 16); + + if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){ + fprintf(stderr, " IGN_UNC"); + } + inst = code->inst[n].inst3; + fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst); + fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", + inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); + break; + case R500_INST_TYPE_TEX: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.h b/src/gallium/drivers/r300/compiler/r500_fragprog.h new file mode 100644 index 00000000000..6aa448cc6f7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + +extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); + +extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user); + +extern struct rc_swizzle_caps r500_swizzle_caps; + +extern int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst_if, + void* data); + +#endif diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c new file mode 100644 index 00000000000..c30cd753d15 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c @@ -0,0 +1,678 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson <[email protected]> + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs <[email protected]> + * + * \author Jerome Glisse <[email protected]> + * + * \author Corbin Simpson <[email protected]> + * + */ + +#include "r500_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" + +#define PROG_CODE \ + struct r500_fragment_program_code *code = &c->code->code.r500 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +struct branch_info { + int If; + int Else; + int Endif; +}; + +struct r500_loop_info { + int BgnLoop; + + int BranchDepth; + int * Brks; + int BrkCount; + int BrkReserved; + + int * Conts; + int ContCount; + int ContReserved; +}; + +struct emit_state { + struct radeon_compiler * C; + struct r500_fragment_program_code * Code; + + struct branch_info * Branches; + unsigned int CurrentBranchDepth; + unsigned int BranchesReserved; + + struct r500_loop_info * Loops; + unsigned int CurrentLoopDepth; + unsigned int LoopsReserved; + + unsigned int MaxBranchDepth; + +}; + +static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_CND: return R500_ALPHA_OP_CND; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static unsigned int fix_hw_swizzle(unsigned int swz) +{ + switch (swz) { + case RC_SWIZZLE_ZERO: + case RC_SWIZZLE_UNUSED: + swz = 4; + break; + case RC_SWIZZLE_HALF: + swz = 5; + break; + case RC_SWIZZLE_ONE: + swz = 6; + break; + } + + return swz; +} + +static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) +{ + unsigned int t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) +{ + unsigned int t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) +{ + switch(func) { + case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; + case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; + case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; + case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; + default: + rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); + return 0; + } +} + +static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) +{ + /* From docs: + * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. + * MSB = 1 << 7 */ + if (!src.Used) + return 1 << 7; + + if (src.File == RC_FILE_CONSTANT) { + return src.Index | R500_RGB_ADDR0_CONST; + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index; + } + + return 0; +} + +/** + * NOP the specified instruction if it is not a texture lookup. + */ +static void alu_nop(struct r300_fragment_program_compiler *c, int ip) +{ + PROG_CODE; + + if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { + code->inst[ip].inst0 |= R500_INST_NOP; + } +} + +/** + * Emit a paired ALU instruction. + */ +static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) +{ + int ip; + PROG_CODE; + + if (code->inst_end >= c->Base.max_alu_insts-1) { + error("emit_alu: Too many instructions"); + return; + } + + ip = ++code->inst_end; + + /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ + if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || + inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { + if (ip > 0) { + alu_nop(c, ip - 1); + } + } + + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + if (inst->WriteALUResult) { + error("Cannot write output and ALU result at the same time"); + return; + } + } else { + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + } + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); + code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Nop) { + code->inst[ip].inst0 |= R500_INST_NOP; + } + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->code->writes_depth = 1; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + /* Set the presubtract operation. */ + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; + break; + default: + break; + } + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; + break; + default: + break; + } + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + + if (inst->WriteALUResult) { + code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; + + if (inst->WriteALUResult == RC_ALURESULT_X) + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; + else + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; + + code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); + } +} + +static unsigned int translate_strq_swizzle(unsigned int swizzle) +{ + unsigned int swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) +{ + int ip; + PROG_CODE; + + if (code->inst_end >= c->Base.max_alu_insts-1) { + error("emit_tex: Too many instructions"); + return 0; + } + + ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->DstReg.WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE; + + if (inst->TexSrcTarget == RC_TEXTURE_RECT) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case RC_OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case RC_OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case RC_OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case RC_OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + case RC_OPCODE_TXD: + code->inst[ip].inst1 |= R500_TEX_INST_DXDY; + break; + case RC_OPCODE_TXL: + code->inst[ip].inst1 |= R500_TEX_INST_LOD; + break; + default: + error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); + } + + use_temporary(code, inst->SrcReg[0].Index); + if (inst->Opcode != RC_OPCODE_KIL) + use_temporary(code, inst->DstReg.Index); + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) + | (GET_SWZ(inst->TexSwizzle, 0) << 24) + | (GET_SWZ(inst->TexSwizzle, 1) << 26) + | (GET_SWZ(inst->TexSwizzle, 2) << 28) + | (GET_SWZ(inst->TexSwizzle, 3) << 30) + ; + + if (inst->Opcode == RC_OPCODE_TXD) { + use_temporary(code, inst->SrcReg[1].Index); + use_temporary(code, inst->SrcReg[2].Index); + + /* DX and DY parameters are specified in a separate register. */ + code->inst[ip].inst3 = + R500_DX_ADDR(inst->SrcReg[1].Index) | + (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | + R500_DY_ADDR(inst->SrcReg[2].Index) | + (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); + } + + return 1; +} + +static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) +{ + unsigned int newip; + + if (s->Code->inst_end >= s->C->max_alu_insts-1) { + rc_error(s->C, "emit_tex: Too many instructions"); + return; + } + + newip = ++s->Code->inst_end; + + /* Currently all loops use the same integer constant to intialize + * the loop variables. */ + if(!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } + s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; + + switch(inst->U.I.Opcode){ + struct branch_info * branch; + struct r500_loop_info * loop; + case RC_OPCODE_BGNLOOP: + memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, + s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); + + loop = &s->Loops[s->CurrentLoopDepth++]; + memset(loop, 0, sizeof(struct r500_loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; + + s->Code->inst[newip].inst2 = R500_FC_OP_LOOP + | R500_FC_JUMP_FUNC(0x00) + | R500_FC_IGNORE_UNCOVERED + ; + break; + case RC_OPCODE_BRK: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, + loop->BrkCount, loop->BrkReserved, 1); + + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_CONT: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, + loop->ContCount, loop->ContReserved, 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_ENDLOOP: + { + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_JUMP_ANY + | R500_FC_IGNORE_UNCOVERED + ; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) + ; + + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(newip) + ; + + /* Set jump address for the BRK instructions. */ + while(loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = + R500_FC_JUMP_ADDR(newip + 1); + } + + /* Set jump address for CONT instructions. */ + while(loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = + R500_FC_JUMP_ADDR(newip); + } + s->CurrentLoopDepth--; + break; + } + case RC_OPCODE_IF: + if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { + rc_error(s->C, "Branch depth exceeds hardware limit"); + return; + } + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); + + branch = &s->Branches[s->CurrentBranchDepth++]; + branch->If = newip; + branch->Else = -1; + branch->Endif = -1; + + if (s->CurrentBranchDepth > s->MaxBranchDepth) + s->MaxBranchDepth = s->CurrentBranchDepth; + + /* actual instruction is filled in at ENDIF time */ + break; + + case RC_OPCODE_ELSE: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Else = newip; + + /* actual instruction is filled in at ENDIF time */ + break; + + case RC_OPCODE_ENDIF: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Endif = newip; + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ + | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + | R500_FC_IGNORE_UNCOVERED + ; + + if (branch->Else >= 0) { + /* increment branch counter also if jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + + s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_B_ELSE /* all active pixels want to jump */ + | R500_FC_B_OP0_NONE /* no counter op if stay */ + | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } else { + /* don't touch branch counter on jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } + + + s->CurrentBranchDepth--; + break; + default: + rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); + } +} + +void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct emit_state s; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + + memset(&s, 0, sizeof(s)); + s.C = &compiler->Base; + s.Code = code; + + memset(code, 0, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_end = -1; + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + emit_flowcontrol(&s, inst); + } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + continue; + } else { + emit_tex(compiler, &inst->U.I); + } + } else { + emit_paired(compiler, &inst->U.P); + } + } + + if (code->max_temp_idx >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used"); + + if (compiler->Base.Error) + return; + + if (code->inst_end == -1 || + (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + int ip; + + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= compiler->Base.max_alu_insts-1) { + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; + } + + ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } + + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { + if (code->max_temp_idx < 1) + code->max_temp_idx = 1; + + code->us_fc_ctrl |= R500_FC_FULL_FC_EN; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_code.c b/src/gallium/drivers/r300/compiler/radeon_code.c new file mode 100644 index 00000000000..6842fb873bc --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_code.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_code.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "radeon_program.h" + +void rc_constants_init(struct rc_constant_list * c) +{ + memset(c, 0, sizeof(*c)); +} + +/** + * Copy a constants structure, assuming that the destination structure + * is not initialized. + */ +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +{ + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; +} + +void rc_constants_destroy(struct rc_constant_list * c) +{ + free(c->Constants); + memset(c, 0, sizeof(*c)); +} + +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +{ + unsigned index = c->Count; + + if (c->Count >= c->_Reserved) { + struct rc_constant * newlist; + + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; + + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + + free(c->Constants); + c->Constants = newlist; + } + + c->Constants[index] = *constant; + c->Count++; + + return index; +} + + +/** + * Add a state vector to the constant list, while trying to avoid duplicates. + */ +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && + c->Constants[index].u.State[1] == state1) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.Size = 4; + constant.u.State[0] = state0; + constant.u.State[1] = state1; + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate vector to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate scalar to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +{ + unsigned index; + int free_index = -1; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + unsigned comp; + for(comp = 0; comp < c->Constants[index].Size; ++comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return index; + } + } + + if (c->Constants[index].Size < 4) + free_index = index; + } + } + + if (free_index >= 0) { + unsigned comp = c->Constants[free_index].Size++; + c->Constants[free_index].u.Immediate[comp] = data; + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return free_index; + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 1; + constant.u.Immediate[0] = data; + *swizzle = RC_SWIZZLE_XXXX; + + return rc_constants_add(c, &constant); +} + +void rc_constants_print(struct rc_constant_list * c) +{ + unsigned int i; + for(i = 0; i < c->Count; i++) { + if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) { + float * values = c->Constants[i].u.Immediate; + fprintf(stderr, "CONST[%u] = " + "{ %10.4f %10.4f %10.4f %10.4f }\n", + i, values[0],values[1], values[2], values[3]); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h new file mode 100644 index 00000000000..67e6acf8b10 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_code.h @@ -0,0 +1,306 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_CODE_H +#define RADEON_CODE_H + +#include <stdint.h> + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R400_PFS_MAX_ALU_INST 512 +#define R400_PFS_MAX_TEX_INST 512 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 +#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 +#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) + +enum { + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, + + RC_CONSTANT_IMMEDIATE, + + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE +}; + +enum { + RC_STATE_SHADOW_AMBIENT = 0, + + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR, + RC_STATE_R300_TEXSCALE_FACTOR, + RC_STATE_R300_VIEWPORT_SCALE, + RC_STATE_R300_VIEWPORT_OFFSET +}; + +struct rc_constant { + unsigned Type:2; /**< RC_CONSTANT_xxx */ + unsigned Size:3; + + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; +}; + +struct rc_constant_list { + struct rc_constant * Constants; + unsigned Count; + + unsigned _Reserved; +}; + +void rc_constants_init(struct rc_constant_list * c); +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); +void rc_constants_destroy(struct rc_constant_list * c); +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); +void rc_constants_print(struct rc_constant_list * c); + +/** + * Compare functions. + * + * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you + * the correct GL compare function. + */ +typedef enum { + RC_COMPARE_FUNC_NEVER = 0, + RC_COMPARE_FUNC_LESS, + RC_COMPARE_FUNC_EQUAL, + RC_COMPARE_FUNC_LEQUAL, + RC_COMPARE_FUNC_GREATER, + RC_COMPARE_FUNC_NOTEQUAL, + RC_COMPARE_FUNC_GEQUAL, + RC_COMPARE_FUNC_ALWAYS +} rc_compare_func; + +/** + * Coordinate wrapping modes. + * + * These are not quite the same as their GL counterparts yet. + */ +typedef enum { + RC_WRAP_NONE = 0, + RC_WRAP_REPEAT, + RC_WRAP_MIRRORED_REPEAT, + RC_WRAP_MIRRORED_CLAMP +} rc_wrap_mode; + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * This field contains swizzle for some lowering passes + * (shadow comparison, unorm->snorm conversion) + */ + unsigned texture_swizzle:12; + + /** + * If the sampler is used as a shadow sampler, + * this field specifies the compare function. + * + * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). + * \sa rc_compare_func + */ + unsigned texture_compare_func : 3; + + /** + * No matter what the sampler type is, + * this field turns it into a shadow sampler. + */ + unsigned compare_mode_enabled : 1; + + /** + * If the sampler will receive non-normalized coords, + * this field is set. The scaling factor is given by + * RC_STATE_R300_TEXRECT_FACTOR. + */ + unsigned non_normalized_coords : 1; + + /** + * This field specifies wrapping modes for the sampler. + * + * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths + * will be performed on the coordinates. + */ + unsigned wrap_mode : 3; + + /** + * The coords are scaled after applying the wrap mode emulation + * and right before texture fetch. The scaling factor is given by + * RC_STATE_R300_TEXSCALE_FACTOR. */ + unsigned clamp_and_scale_before_fetch : 1; + + /** + * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM + * in the shader. + */ + unsigned convert_unorm_to_snorm:1; + } unit[16]; + + unsigned frag_clamp:1; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + unsigned int length; /**< total # of texture instructions used */ + uint32_t inst[R400_PFS_MAX_TEX_INST]; + } tex; + + struct { + unsigned int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + uint32_t r400_ext_addr; + } inst[R400_PFS_MAX_ALU_INST]; + } alu; + + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t r400_code_offset_ext; /* US_CODE_EXT */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ + /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries + * for r400 cards */ + unsigned int r390_mode:1; +}; + + +struct r500_fragment_program_code { + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + + int max_temp_idx; + + uint32_t us_fc_ctrl; + + uint32_t int_constants[32]; + uint32_t int_constant_count; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + unsigned writes_depth:1; + + struct rc_constant_list constants; + unsigned *constants_remap_table; +}; + + +#define R300_VS_MAX_ALU 256 +#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) +#define R500_VS_MAX_ALU 1024 +#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) +#define R300_VS_MAX_TEMPS 32 +/* This is the max for all chipsets (r300-r500) */ +#define R300_VS_MAX_FC_OPS 16 +/* The r500 maximum depth is not just for loops, but any combination of loops + * and subroutine jumps. */ +#define R500_VS_MAX_FC_DEPTH 8 +#define R300_VS_MAX_LOOP_DEPTH 1 + +#define VSF_MAX_INPUTS 32 +#define VSF_MAX_OUTPUTS 32 + +struct r300_vertex_program_code { + int length; + union { + uint32_t d[R500_VS_MAX_ALU_DWORDS]; + float f[R500_VS_MAX_ALU_DWORDS]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + + struct rc_constant_list constants; + unsigned *constants_remap_table; + + uint32_t InputsRead; + uint32_t OutputsWritten; + + unsigned int num_fc_ops; + uint32_t fc_ops; + union { + uint32_t r300[R300_VS_MAX_FC_OPS]; + struct { + uint32_t lw; + uint32_t uw; + } r500[R300_VS_MAX_FC_OPS]; + } fc_op_addrs; + int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; +}; + +#endif /* RADEON_CODE_H */ + diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c new file mode 100644 index 00000000000..b7936725d85 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c @@ -0,0 +1,489 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include "radeon_dataflow.h" +#include "radeon_program.h" +#include "radeon_program_pair.h" +#include "radeon_compiler_util.h" + + +void rc_init(struct radeon_compiler * c) +{ + memset(c, 0, sizeof(*c)); + + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; +} + +void rc_destroy(struct radeon_compiler * c) +{ + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); +} + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + if (!(c->Debug & RC_DBG_LOG)) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void rc_error(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + c->Error = 1; + + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; + + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); + + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } + + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "r300compiler error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) +{ + rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); + return 1; +} + +/** + * Recompute c->Program.InputsRead and c->Program.OutputsWritten + * based on which inputs and outputs are actually referenced + * in program instructions. + */ +void rc_calculate_inputs_outputs(struct radeon_compiler * c) +{ + struct rc_instruction *inst; + + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) + { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int i; + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; + } + } +} + +/** + * Rewrite the program such that everything that source the given input + * register will source new_input instead. + */ +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) +{ + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << input); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { + inst->U.I.SrcReg[i].File = new_input.File; + inst->U.I.SrcReg[i].Index = new_input.Index; + inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); + if (!inst->U.I.SrcReg[i].Abs) { + inst->U.I.SrcReg[i].Negate ^= new_input.Negate; + inst->U.I.SrcReg[i].Abs = new_input.Abs; + } + + c->Program.InputsRead |= 1 << new_input.Index; + } + } + } +} + + +/** + * Rewrite the program such that everything that writes into the given + * output register will instead write to new_output. The new_output + * writemask is honoured. + */ +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) +{ + struct rc_instruction * inst; + + c->Program.OutputsWritten &= ~(1 << output); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.Index = new_output; + inst->U.I.DstReg.WriteMask &= writemask; + + c->Program.OutputsWritten |= 1 << new_output; + } + } + } +} + + +/** + * Rewrite the program such that a given output is duplicated. + */ +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +{ + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tempreg; + } + } + } + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = dup_output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + c->Program.OutputsWritten |= 1 << dup_output; +} + + +/** + * Introduce standard code fragment to deal with fragment.position. + */ +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction * inst_rcp; + struct rc_instruction * inst_mul; + struct rc_instruction * inst_mad; + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << wpos); + c->Program.InputsRead |= 1 << new_input; + + /* perspective divide */ + inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tempregi; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + + inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_rcp->U.I.SrcReg[0].Index = new_input; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tempregi; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_mul->U.I.SrcReg[0].Index = new_input; + + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tempregi; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + + /* viewport transformation */ + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tempregi; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = tempregi; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; + + if (full_vtransform) { + inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); + } else { + inst_mad->U.I.SrcReg[1].Index = + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + } + + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == wpos) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} + + +/** + * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. + * Gallium and OpenGL define it the other way around. + * + * So let's just negate FACE at the beginning of the shader and rewrite the rest + * of the shader to read from the newly allocated temporary. + */ +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_add; + struct rc_instruction *inst; + + /* perspective divide */ + inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tempregi; + inst_add->U.I.DstReg.WriteMask = RC_MASK_X; + + inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + + inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; + inst_add->U.I.SrcReg[1].Index = face; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == face) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} + +static void reg_count_callback(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + int *max_reg = userdata; + if (file == RC_FILE_TEMPORARY) + (int)index > *max_reg ? *max_reg = index : 0; +} + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) +{ + int max_reg = -1; + struct rc_instruction * tmp; + memset(s, 0, sizeof(*s)); + + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; + tmp = tmp->Next){ + const struct rc_opcode_info * info; + rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(tmp->U.I.Opcode); + if (info->Opcode == RC_OPCODE_BEGIN_TEX) + continue; + if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) + s->num_presub_ops++; + } else { + if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + /* Assuming alpha will never be a flow control or + * a tex instruction. */ + if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) + s->num_alpha_insts++; + if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) + s->num_rgb_insts++; + info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); + } + if (info->IsFlowControl) + s->num_fc_insts++; + if (info->HasTexture) + s->num_tex_insts++; + s->num_insts++; + } + s->num_temp_regs = max_reg + 1; +} + +static void print_stats(struct radeon_compiler * c) +{ + struct rc_program_stats s; + + if (c->initial_num_insts <= 5) + return; + + rc_get_stats(c, &s); + + switch (c->type) { + case RC_VERTEX_PROGRAM: + fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Flow Control Instructions\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_fc_insts, s.num_temp_regs); + break; + + case RC_FRAGMENT_PROGRAM: + fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, + s.num_temp_regs); + break; + default: + assert(0); + } +} + +static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { + "Vertex Program", + "Fragment Program" +}; + +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + for (unsigned i = 0; list[i].name; i++) { + if (list[i].predicate) { + list[i].run(c, list[i].user); + + if (c->Error) + return; + + if ((c->Debug & RC_DBG_LOG) && list[i].dump) { + fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); + rc_print_program(&c->Program); + } + } + } +} + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + struct rc_program_stats s; + + rc_get_stats(c, &s); + c->initial_num_insts = s.num_insts; + + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); + rc_print_program(&c->Program); + } + + rc_run_compiler_passes(c, list); + + if (c->Debug & RC_DBG_STATS) + print_stats(c); +} + +void rc_validate_final_shader(struct radeon_compiler *c, void *user) +{ + /* Check the number of constants. */ + if (c->Program.Constants.Count > c->max_constants) { + rc_error(c, "Too many constants. Max: %i, Got: %i\n", + c->max_constants, c->Program.Constants.Count); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h new file mode 100644 index 00000000000..74594af23c2 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h @@ -0,0 +1,171 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/compiler.h" + +#include "memory_pool.h" +#include "radeon_code.h" +#include "radeon_program.h" +#include "radeon_emulate_loops.h" + +#define RC_DBG_LOG (1 << 0) +#define RC_DBG_STATS (1 << 1) + +struct rc_swizzle_caps; + +enum rc_program_type { + RC_VERTEX_PROGRAM, + RC_FRAGMENT_PROGRAM, + RC_NUM_PROGRAM_TYPES +}; + +struct radeon_compiler { + struct memory_pool Pool; + struct rc_program Program; + enum rc_program_type type; + unsigned Debug:2; + unsigned Error:1; + char * ErrorMsg; + + /* Hardware specification. */ + unsigned is_r400:1; + unsigned is_r500:1; + unsigned has_half_swizzles:1; + unsigned has_presub:1; + unsigned disable_optimizations:1; + unsigned max_temp_regs; + unsigned max_constants; + int max_alu_insts; + unsigned max_tex_insts; + + /* Whether to remove unused constants and empty holes in constant space. */ + unsigned remove_unused_constants:1; + + /** + * Variables used internally, not be touched by callers + * of the compiler + */ + /*@{*/ + struct rc_swizzle_caps * SwizzleCaps; + /*@}*/ + + struct emulate_loop_state loop_state; + + unsigned initial_num_insts; /* Number of instructions at start. */ +}; + +void rc_init(struct radeon_compiler * c); +void rc_destroy(struct radeon_compiler * c); + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...); +void rc_error(struct radeon_compiler * c, const char * fmt, ...); + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); + +/** + * This macro acts like an if-statement that can be used to implement + * non-aborting assertions in the compiler. + * + * It checks whether \p cond is true. If not, an internal compiler error is + * flagged and the if-clause is run. + * + * A typical use-case would be: + * + * if (rc_assert(c, condition-that-must-be-true)) + * return; + */ +#define rc_assert(c, cond) \ + (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) + +void rc_calculate_inputs_outputs(struct radeon_compiler * c); + +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform); +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); + +struct r300_fragment_program_compiler { + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + /* Optional transformations and features. */ + struct r300_fragment_program_external_state state; + unsigned enable_shadow_ambient; + /* Register corresponding to the depthbuffer. */ + unsigned OutputDepth; + /* Registers corresponding to the four colorbuffers. */ + unsigned OutputColor[4]; + + void * UserData; + void (*AllocateHwInputs)( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata); +}; + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + uint32_t RequiredOutputs; + + void * UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); + + int PredicateIndex; + unsigned int PredicateMask; +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); +void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user); + +struct radeon_compiler_pass { + const char *name; /* Name of the pass. */ + int dump; /* Dump the program if Debug == 1? */ + int predicate; /* Run this pass? */ + void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */ + void *user; /* Optional parameter which is passed to the run function. */ +}; + +struct rc_program_stats { + unsigned num_insts; + unsigned num_fc_insts; + unsigned num_tex_insts; + unsigned num_rgb_insts; + unsigned num_alpha_insts; + unsigned num_presub_ops; + unsigned num_temp_regs; +}; + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_validate_final_shader(struct radeon_compiler *c, void *user); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c new file mode 100644 index 00000000000..2742721f800 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c @@ -0,0 +1,701 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_compiler_util.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" +/** + */ +unsigned int rc_swizzle_to_writemask(unsigned int swz) +{ + unsigned int mask = 0; + unsigned int i; + + for(i = 0; i < 4; i++) { + mask |= 1 << GET_SWZ(swz, i); + } + mask &= RC_MASK_XYZW; + + return mask; +} + +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +/** + * The purpose of this function is to standardize the number channels used by + * swizzles. All swizzles regardless of what instruction they are a part of + * should have 4 channels initialized with values. + * @param channels The number of channels in initial_value that have a + * meaningful value. + * @return An initialized swizzle that has all of the unused channels set to + * RC_SWIZZLE_UNUSED. + */ +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) +{ + unsigned int i; + for (i = channels; i < 4; i++) { + SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); + } + return initial_value; +} + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +{ + unsigned int ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +unsigned int combine_swizzles(unsigned int src, unsigned int swz) +{ + unsigned int ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + + return ret; +} + +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + } + return RC_SWIZZLE_UNUSED; +} + +/* Reorder mask bits according to swizzle. */ +unsigned swizzle_mask(unsigned swizzle, unsigned mask) +{ + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; +} + +static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) +{ + if (info->HasTexture) { + return 0; + } + switch (info->Opcode) { + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +/** + * @return A swizzle the results from converting old_swizzle using + * conversion_swizzle + */ +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle) +{ + unsigned int i; + unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (i = 0; i < 4; i++) { + unsigned int new_chan = get_swz(conversion_swizzle, i); + if (new_chan == RC_SWIZZLE_UNUSED) { + continue; + } + SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); + } + return new_swizzle; +} + +static unsigned int rewrite_writemask( + unsigned int old_mask, + unsigned int conversion_swizzle) +{ + unsigned int new_mask = 0; + unsigned int i; + + for (i = 0; i < 4; i++) { + if (!GET_BIT(old_mask, i) + || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { + continue; + } + new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); + } + + return new_mask; +} + +/** + * This function rewrites the writemask of sub and adjusts the swizzles + * of all its source registers based on the conversion_swizzle. + * conversion_swizzle represents a mapping of the old writemask to the + * new writemask. For a detailed description of how conversion swizzles + * work see rc_rewrite_swizzle(). + */ +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + unsigned int i; + + sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); + + if (!srcs_need_rewrite(info)) { + return ; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + sub->Arg[i].Swizzle = + rc_adjust_channels(sub->Arg[i].Swizzle, + conversion_swizzle); + } +} + +static void normal_rewrite_writemask_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + unsigned int * new_mask = (unsigned int *)userdata; + src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask); +} + +/** + * This function is the same as rc_pair_rewrite_writemask() except it + * operates on normal instructions. + */ +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle) +{ + unsigned int new_mask; + struct rc_sub_instruction * sub = &inst->U.I; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + sub->DstReg.WriteMask = + rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); + + if (info->HasTexture) { + unsigned int i; + assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); + for (i = 0; i < 4; i++) { + unsigned int swz = GET_SWZ(conversion_swizzle, i); + if (swz > 3) + continue; + SET_SWZ(sub->TexSwizzle, swz, i); + } + } + + if (!srcs_need_rewrite(info)) { + return; + } + + new_mask = sub->DstReg.WriteMask; + rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask); +} + +/** + * This function replaces each value 'swz' in swizzle with the value of + * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's + * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want + * to change all the Y's in swizzle to X, then conversion_swizzle should be + * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then + * conversion swizzle should be YX__ (0xfc1). + * @param swizzle The swizzle to change + * @param conversion_swizzle Describes the conversion to perform on the swizzle + * @return A converted swizzle + */ +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int conversion_swizzle) +{ + unsigned int chan; + unsigned int out_swizzle = swizzle; + + for (chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(swizzle, chan); + unsigned int new_swz; + if (swz > 3) { + SET_SWZ(out_swizzle, chan, swz); + } else { + new_swz = GET_SWZ(conversion_swizzle, swz); + if (new_swz != RC_SWIZZLE_UNUSED) { + SET_SWZ(out_swizzle, chan, new_swz); + } else { + SET_SWZ(out_swizzle, chan, swz); + } + } + } + return out_swizzle; +} + +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + +void reset_srcreg(struct rc_src_register* reg) +{ + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; +} + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask) +{ + if (src_file != dst_file || src_idx != dst_idx) { + return RC_MASK_NONE; + } + return dst_mask & rc_swizzle_to_writemask(src_swz); +} + +/** + * @return A bit mask specifying whether this swizzle will select from an RGB + * source, an Alpha source, or both. + */ +unsigned int rc_source_type_swz(unsigned int swizzle) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; + + for(chan = 0; chan < 4; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; +} + +unsigned int rc_source_type_mask(unsigned int mask) +{ + unsigned int ret = RC_SOURCE_NONE; + + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; + + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; + + return ret; +} + +struct src_select { + rc_register_file File; + int Index; + unsigned int SrcType; +}; + +struct can_use_presub_data { + struct src_select Selects[5]; + unsigned int SelectCount; + const struct rc_src_register * ReplaceReg; + unsigned int ReplaceRemoved; +}; + +static void can_use_presub_data_add_select( + struct can_use_presub_data * data, + rc_register_file file, + unsigned int index, + unsigned int src_type) +{ + struct src_select * select; + + select = &data->Selects[data->SelectCount++]; + select->File = file; + select->Index = index; + select->SrcType = src_type; +} + +/** + * This callback function counts the number of sources in inst that are + * different from the sources in can_use_presub_data->RemoveSrcs. + */ +static void can_use_presub_read_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct can_use_presub_data * d = userdata; + + if (!d->ReplaceRemoved && src == d->ReplaceReg) { + d->ReplaceRemoved = 1; + return; + } + + if (src->File == RC_FILE_NONE) + return; + + can_use_presub_data_add_select(d, src->File, src->Index, + rc_source_type_swz(src->Swizzle)); +} + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1) +{ + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + int rgb_count = 0, alpha_count = 0; + unsigned int src_type0, src_type1; + + if (presub_op == RC_PRESUB_NONE) { + return 1; + } + + if (info->HasTexture) { + return 0; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } + + memset(&d, 0, sizeof(d)); + d.ReplaceReg = replace_reg; + + rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); + + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + + src_type0 = rc_source_type_swz(presub_src0->Swizzle); + can_use_presub_data_add_select(&d, + presub_src0->File, + presub_src0->Index, + src_type0); + + if (num_presub_srcs > 1) { + src_type1 = rc_source_type_swz(presub_src1->Swizzle); + can_use_presub_data_add_select(&d, + presub_src1->File, + presub_src1->Index, + src_type1); + + /* Even if both of the presub sources read from the same + * register, we still need to use 2 different source selects + * for them, so we need to increment the count to compensate. + */ + if (presub_src0->File == presub_src1->File + && presub_src0->Index == presub_src1->Index) { + if (src_type0 & src_type1 & RC_SOURCE_RGB) { + rgb_count++; + } + if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + } + + /* Count the number of source selects for Alpha and RGB. If we + * encounter two of the same source selects then we can ignore the + * first one. */ + for (i = 0; i < d.SelectCount; i++) { + unsigned int j; + unsigned int src_type = d.Selects[i].SrcType; + for (j = i + 1; j < d.SelectCount; j++) { + if (d.Selects[i].File == d.Selects[j].File + && d.Selects[i].Index == d.Selects[j].Index) { + src_type &= ~d.Selects[j].SrcType; + } + } + if (src_type & RC_SOURCE_RGB) { + rgb_count++; + } + + if (src_type & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + + if (rgb_count > 3 || alpha_count > 3) { + return 0; + } + + return 1; +} + +struct max_data { + unsigned int Max; + unsigned int HasFileType; + rc_register_file File; +}; + +static void max_callback( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct max_data * d = (struct max_data*)userdata; + if (file == d->File && (!d->HasFileType || index > d->Max)) { + d->Max = index; + d->HasFileType = 1; + } +} + +/** + * @return The maximum index of the specified register file used by the + * program. + */ +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file) +{ + struct max_data data; + struct rc_instruction * inst; + data.Max = 0; + data.HasFileType = 0; + data.File = file; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, max_callback, &data); + rc_for_all_writes_mask(inst, max_callback, &data); + } + if (!data.HasFileType) { + return -1; + } else { + return data.Max; + } +} + +static unsigned int get_source_readmask( + struct rc_pair_sub_instruction * sub, + unsigned int source, + unsigned int src_type) +{ + unsigned int i; + unsigned int readmask = 0; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for (i = 0; i < info->NumSrcRegs; i++) { + if (sub->Arg[i].Source != source + || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) { + continue; + } + readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle); + } + return readmask; +} + +/** + * This function attempts to remove a source from a pair instructions. + * @param inst + * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd + * @param source The index of the source to remove + * @param new_readmask A mask representing the components that are read by + * the source that is intended to replace the one you are removing. If you + * want to remove a source only and not replace it, this parameter should be + * zero. + * @return 1 if the source was successfully removed, 0 if it was not + */ +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask) +{ + unsigned int readmask = 0; + + readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type); + readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type); + + if ((new_readmask & readmask) != readmask) + return 0; + + if (src_type & RC_SOURCE_RGB) { + memset(&inst->U.P.RGB.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + if (src_type & RC_SOURCE_ALPHA) { + memset(&inst->U.P.Alpha.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + return 1; +} + +/** + * @return RC_OPCODE_NOOP if inst is not a flow control instruction. + * @return The opcode of inst if it is a flow control instruction. + */ +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst) +{ + const struct rc_opcode_info * info; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(inst->U.I.Opcode); + } else { + info = rc_get_opcode_info(inst->U.P.RGB.Opcode); + /*A flow control instruction shouldn't have an alpha + * instruction.*/ + assert(!info->IsFlowControl || + inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); + } + + if (info->IsFlowControl) + return info->Opcode; + else + return RC_OPCODE_NOP; + +} + +/** + * @return The BGNLOOP instruction that starts the loop ended by endloop. + */ +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop) +{ + unsigned int endloop_count = 0; + struct rc_instruction * inst; + for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + endloop_count++; + } else if (op == RC_OPCODE_BGNLOOP) { + if (endloop_count == 0) { + return inst; + } else { + endloop_count--; + } + } + } + return NULL; +} + +/** + * @return The ENDLOOP instruction that ends the loop started by bgnloop. + */ +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop) +{ + unsigned int bgnloop_count = 0; + struct rc_instruction * inst; + for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + bgnloop_count++; + } else if (op == RC_OPCODE_ENDLOOP) { + if (bgnloop_count == 0) { + return inst; + } else { + bgnloop_count--; + } + } + } + return NULL; +} + +/** + * @return A conversion swizzle for converting from old_mask->new_mask + */ +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask) +{ + unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + unsigned int old_idx; + unsigned int new_idx = 0; + for (old_idx = 0; old_idx < 4; old_idx++) { + if (!GET_BIT(old_mask, old_idx)) + continue; + for ( ; new_idx < 4; new_idx++) { + if (GET_BIT(new_mask, new_idx)) { + SET_SWZ(conversion_swizzle, old_idx, new_idx); + new_idx++; + break; + } + } + } + return conversion_swizzle; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h new file mode 100644 index 00000000000..3730aa888c0 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h @@ -0,0 +1,89 @@ +#include "radeon_program_constants.h" + +#ifndef RADEON_PROGRAM_UTIL_H +#define RADEON_PROGRAM_UTIL_H + +#include "radeon_opcodes.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_pair_instruction; +struct rc_pair_sub_instruction; +struct rc_src_register; + +unsigned int rc_swizzle_to_writemask(unsigned int swz); + +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); + +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, + rc_swizzle swz_z, rc_swizzle swz_w); + +unsigned int combine_swizzles(unsigned int src, unsigned int swz); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); + +unsigned swizzle_mask(unsigned swizzle, unsigned mask); + +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle); + +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle); + +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle); + +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int new_mask); + +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +void reset_srcreg(struct rc_src_register* reg); + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask); + +unsigned int rc_source_type_swz(unsigned int swizzle); + +unsigned int rc_source_type_mask(unsigned int mask); + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1); + +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file); + +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask); + +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst); + +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop); +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop); + +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask); + +#endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c new file mode 100644 index 00000000000..a8decacedaf --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.c @@ -0,0 +1,892 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_program.h" + +struct read_write_mask_data { + void * UserData; + rc_read_write_mask_fn Cb; +}; + +static void reads_normal_callback( + void * userdata, + struct rc_instruction * fullinst, + struct rc_src_register * src) +{ + struct read_write_mask_data * cb_data = userdata; + unsigned int refmask = 0; + unsigned int chan; + for(chan = 0; chan < 4; chan++) { + refmask |= 1 << GET_SWZ(src->Swizzle, chan); + } + refmask &= RC_MASK_XYZW; + + if (refmask) { + cb_data->Cb(cb_data->UserData, fullinst, src->File, + src->Index, refmask); + } + + if (refmask && src->RelAddr) { + cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0, + RC_MASK_X); + } +} + +static void pair_get_src_refmasks(unsigned int * refmasks, + struct rc_pair_instruction * inst, + unsigned int swz, unsigned int src) +{ + if (swz >= 4) + return; + + if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { + if(src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = + rc_presubtract_src_reg_count( + inst->RGB.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } + else { + refmasks[src] |= 1 << swz; + } + } + + if (swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = rc_presubtract_src_reg_count( + inst->Alpha.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } + else { + refmasks[src] |= 1 << swz; + } + } +} + +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + unsigned int refmasks[3] = { 0, 0, 0 }; + + unsigned int arg; + + for(arg = 0; arg < 3; ++arg) { + unsigned int chan; + for(chan = 0; chan < 3; ++chan) { + unsigned int swz_rgb = + GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); + unsigned int swz_alpha = + GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan); + pair_get_src_refmasks(refmasks, inst, swz_rgb, + inst->RGB.Arg[arg].Source); + pair_get_src_refmasks(refmasks, inst, swz_alpha, + inst->Alpha.Arg[arg].Source); + } + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); + + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); + } +} + +static void pair_sub_for_all_args( + struct rc_instruction * fullinst, + struct rc_pair_sub_instruction * sub, + rc_pair_read_arg_fn cb, + void * userdata) +{ + int i; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type; + + src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + + if (src_type == RC_SOURCE_NONE) + continue; + + if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { + unsigned int presub_type; + unsigned int presub_src_count; + struct rc_pair_instruction_source * src_array; + unsigned int j; + + if (src_type & RC_SOURCE_RGB) { + presub_type = fullinst-> + U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.RGB.Src; + } else { + presub_type = fullinst-> + U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.Alpha.Src; + } + presub_src_count + = rc_presubtract_src_reg_count(presub_type); + for(j = 0; j < presub_src_count; j++) { + cb(userdata, fullinst, &sub->Arg[i], + &src_array[j]); + } + } else { + struct rc_pair_instruction_source * src = + rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); + if (src) { + cb(userdata, fullinst, &sub->Arg[i], src); + } + } + } +} + +/* This function calls the callback function (cb) for each source used by + * the instruction. + * */ +void rc_for_all_reads_src( + struct rc_instruction * inst, + rc_read_src_fn cb, + void * userdata) +{ + const struct rc_opcode_info * opcode = + rc_get_opcode_info(inst->U.I.Opcode); + + /* This function only works with normal instructions. */ + if (inst->Type != RC_INSTRUCTION_NORMAL) { + assert(0); + return; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + + if (inst->U.I.SrcReg[src].File == RC_FILE_NONE) + continue; + + if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_regs = rc_presubtract_src_reg_count( + inst->U.I.PreSub.Opcode); + for( i = 0; i < srcp_regs; i++) { + cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]); + } + } else { + cb(userdata, inst, &inst->U.I.SrcReg[src]); + } + } +} + +/** + * This function calls the callback function (cb) for each arg of the RGB and + * alpha components. + */ +void rc_pair_for_all_reads_arg( + struct rc_instruction * inst, + rc_pair_read_arg_fn cb, + void * userdata) +{ + /* This function only works with pair instructions. */ + if (inst->Type != RC_INSTRUCTION_PAIR) { + assert(0); + return; + } + + pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata); + pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata); +} + +/** + * Calls a callback function for all register reads. + * + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + struct read_write_mask_data cb_data; + cb_data.UserData = userdata; + cb_data.Cb = cb; + + rc_for_all_reads_src(inst, reads_normal_callback, &cb_data); + } else { + reads_pair(inst, cb, userdata); + } +} + + + +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); + + if (inst->Alpha.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +/** + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + writes_normal(inst, cb, userdata); + } else { + writes_pair(inst, cb, userdata); + } +} + + +struct mask_to_chan_data { + void * UserData; + rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct mask_to_chan_data * d = data; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} + +static void remap_normal_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + unsigned int remapped_presub = 0; + + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; + + cb(userdata, fullinst, &file, &index); + + inst->DstReg.File = file; + inst->DstReg.Index = index; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; + + if (file == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_srcs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + /* Make sure we only remap presubtract sources once in + * case more than one source register reads the + * presubtract result. */ + if (remapped_presub) + continue; + + for(i = 0; i < srcp_srcs; i++) { + file = inst->PreSub.SrcReg[i].File; + index = inst->PreSub.SrcReg[i].Index; + cb(userdata, fullinst, &file, &index); + inst->PreSub.SrcReg[i].File = file; + inst->PreSub.SrcReg[i].Index = index; + } + remapped_presub = 1; + } + else { + cb(userdata, fullinst, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } + } +} + +static void remap_pair_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.DestIndex = index; + } + + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.DestIndex = index; + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } + + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } +} + + +/** + * Remap all register accesses according to the given function. + * That is, call the function \p cb for each referenced register (both read and written) + * and update the given instruction \p inst accordingly + * if it modifies its \ref pfile and \ref pindex contents. + */ +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) + remap_normal_instruction(inst, cb, userdata); + else + remap_pair_instruction(inst, cb, userdata); +} + +struct branch_write_mask { + unsigned int IfWriteMask:4; + unsigned int ElseWriteMask:4; + unsigned int HasElse:1; +}; + +union get_readers_read_cb { + rc_read_src_fn I; + rc_pair_read_arg_fn P; +}; + +struct get_readers_callback_data { + struct radeon_compiler * C; + struct rc_reader_data * ReaderData; + rc_read_src_fn ReadNormalCB; + rc_pair_read_arg_fn ReadPairCB; + rc_read_write_mask_fn WriteCB; + rc_register_file DstFile; + unsigned int DstIndex; + unsigned int DstMask; + unsigned int AliveWriteMask; + /* For convenience, this is indexed starting at 1 */ + struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; +}; + +static struct rc_reader * add_reader( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask) +{ + struct rc_reader * new; + memory_pool_array_reserve(pool, struct rc_reader, data->Readers, + data->ReaderCount, data->ReadersReserved, 1); + new = &data->Readers[data->ReaderCount++]; + new->Inst = inst; + new->WriteMask = mask; + return new; +} + +static void add_reader_normal( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_src_register * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.I.Src = src; +} + + +static void add_reader_pair( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.P.Src = src; + new->U.P.Arg = arg; +} + +static unsigned int get_readers_read_callback( + struct get_readers_callback_data * cb_data, + unsigned int has_rel_addr, + rc_register_file file, + unsigned int index, + unsigned int swizzle) +{ + unsigned int shared_mask, read_mask; + + if (has_rel_addr) { + cb_data->ReaderData->Abort = 1; + return RC_MASK_NONE; + } + + shared_mask = rc_src_reads_dst_mask(file, index, swizzle, + cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); + + if (shared_mask == RC_MASK_NONE) + return shared_mask; + + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ + + read_mask = rc_swizzle_to_writemask(swizzle); + if (cb_data->ReaderData->AbortOnRead & read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + if (cb_data->ReaderData->LoopDepth > 0) { + cb_data->ReaderData->AbortOnWrite |= + (read_mask & cb_data->AliveWriteMask); + } + + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & cb_data->AliveWriteMask) != read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + return shared_mask; +} + +static void get_readers_pair_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int shared_mask; + struct get_readers_callback_data * d = userdata; + + shared_mask = get_readers_read_callback(d, + 0 /*Pair Instructions don't use RelAddr*/, + src->File, src->Index, arg->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + + if (d->ReadPairCB) + d->ReadPairCB(d->ReaderData, inst, arg, src); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src); +} + +/** + * This function is used by rc_get_readers_normal() to determine whether inst + * is a reader of userdata->ReaderData->Writer + */ +static void get_readers_normal_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct get_readers_callback_data * d = userdata; + unsigned int shared_mask; + + shared_mask = get_readers_read_callback(d, + src->RelAddr, src->File, src->Index, src->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + /* The callback function could potentially clear d->ReaderData->Abort, + * so we need to call it before we return. */ + if (d->ReadNormalCB) + d->ReadNormalCB(d->ReaderData, inst, src); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src); +} + +/** + * This function is used by rc_get_readers_normal() to determine when + * userdata->ReaderData->Writer is dead (i. e. All compontents of its + * destination register have been overwritten by other instructions). + */ +static void get_readers_write_callback( + void *userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_readers_callback_data * d = userdata; + + if (index == d->DstIndex && file == d->DstFile) { + unsigned int shared_mask = mask & d->DstMask; + d->ReaderData->AbortOnRead &= ~shared_mask; + d->AliveWriteMask &= ~shared_mask; + if (d->ReaderData->AbortOnWrite & shared_mask) { + d->ReaderData->Abort = 1; + } + } + + if(d->WriteCB) + d->WriteCB(d->ReaderData, inst, file, index, mask); +} + +static void push_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + (*branch_depth)++; + if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[*branch_depth].IfWriteMask = + d->AliveWriteMask; +} + +static void pop_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + struct branch_write_mask * masks = &d->BranchMasks[*branch_depth]; + + if (masks->HasElse) { + /* Abort on read for components that were written in the IF + * block. */ + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~masks->ElseWriteMask; + /* Abort on read for components that were written in the ELSE + * block. */ + d->ReaderData->AbortOnRead |= + masks->ElseWriteMask & ~d->AliveWriteMask; + + d->AliveWriteMask = masks->IfWriteMask + ^ ((masks->IfWriteMask ^ masks->ElseWriteMask) + & (masks->IfWriteMask ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + + } + memset(masks, 0, sizeof(struct branch_write_mask)); + (*branch_depth)--; +} + +static void get_readers_for_single_write( + void * userdata, + struct rc_instruction * writer, + rc_register_file dst_file, + unsigned int dst_index, + unsigned int dst_mask) +{ + struct rc_instruction * tmp; + unsigned int branch_depth = 0; + struct rc_instruction * endloop = NULL; + unsigned int abort_on_read_at_endloop = 0; + struct get_readers_callback_data * d = userdata; + + d->ReaderData->Writer = writer; + d->ReaderData->AbortOnRead = 0; + d->ReaderData->AbortOnWrite = 0; + d->ReaderData->LoopDepth = 0; + d->ReaderData->InElse = 0; + d->DstFile = dst_file; + d->DstIndex = dst_index; + d->DstMask = dst_mask; + d->AliveWriteMask = dst_mask; + memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); + + if (!dst_mask) + return; + + for(tmp = writer->Next; tmp != &d->C->Program.Instructions; + tmp = tmp->Next){ + rc_opcode opcode = rc_get_flow_control_inst(tmp); + switch(opcode) { + case RC_OPCODE_BGNLOOP: + d->ReaderData->LoopDepth++; + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ENDLOOP: + if (d->ReaderData->LoopDepth > 0) { + d->ReaderData->LoopDepth--; + if (d->ReaderData->LoopDepth == 0) { + d->ReaderData->AbortOnWrite = 0; + } + pop_branch_mask(d, &branch_depth); + } else { + /* Here we have reached an ENDLOOP without + * seeing its BGNLOOP. These means that + * the writer was written inside of a loop, + * so it could have readers that are above it + * (i.e. they have a lower IP). To find these + * readers we jump to the BGNLOOP instruction + * and check each instruction until we get + * back to the writer. + */ + endloop = tmp; + tmp = rc_match_endloop(tmp); + if (!tmp) { + rc_error(d->C, "Failed to match endloop.\n"); + d->ReaderData->Abort = 1; + return; + } + abort_on_read_at_endloop = d->ReaderData->AbortOnRead; + d->ReaderData->AbortOnRead |= d->AliveWriteMask; + continue; + } + break; + case RC_OPCODE_IF: + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) { + d->ReaderData->InElse = 1; + } else { + unsigned int temp_mask = d->AliveWriteMask; + d->AliveWriteMask = + d->BranchMasks[branch_depth].IfWriteMask; + d->BranchMasks[branch_depth].ElseWriteMask = + temp_mask; + d->BranchMasks[branch_depth].HasElse = 1; + } + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + d->ReaderData->AbortOnRead = d->AliveWriteMask; + d->ReaderData->InElse = 0; + } + else { + pop_branch_mask(d, &branch_depth); + } + break; + default: + break; + } + + if (d->ReaderData->InElse) + continue; + + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, + get_readers_normal_read_callback, d); + } else { + rc_pair_for_all_reads_arg(tmp, + get_readers_pair_read_callback, d); + } + + /* This can happen when we jump from an ENDLOOP to BGNLOOP */ + if (tmp == writer) { + tmp = endloop; + endloop = NULL; + d->ReaderData->AbortOnRead = abort_on_read_at_endloop; + continue; + } + rc_for_all_writes_mask(tmp, get_readers_write_callback, d); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + if (branch_depth == 0 && !d->AliveWriteMask) + return; + } +} + +static void init_get_readers_callback_data( + struct get_readers_callback_data * d, + struct rc_reader_data * reader_data, + struct radeon_compiler * c, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + reader_data->Abort = 0; + reader_data->ReaderCount = 0; + reader_data->ReadersReserved = 0; + reader_data->Readers = NULL; + + d->C = c; + d->ReaderData = reader_data; + d->ReadNormalCB = read_normal_cb; + d->ReadPairCB = read_pair_cb; + d->WriteCB = write_cb; +} + +/** + * This function will create a list of readers via the rc_reader_data struct. + * This function will abort (set the flag data->Abort) and return if it + * encounters an instruction that reads from @param writer and also a different + * instruction. Here are some examples: + * + * writer = instruction 0; + * 0 MOV TEMP[0].xy, TEMP[1].xy + * 1 MOV TEMP[0].zw, TEMP[2].xy + * 2 MOV TEMP[3], TEMP[0] + * The Abort flag will be set on instruction 2, because it reads values written + * by instructions 0 and 1. + * + * writer = instruction 1; + * 0 IF TEMP[0].x + * 1 MOV TEMP[1], TEMP[2] + * 2 ELSE + * 3 MOV TEMP[1], TEMP[2] + * 4 ENDIF + * 5 MOV TEMP[3], TEMP[1] + * The Abort flag will be set on instruction 5, because it could read from the + * value written by either instruction 1 or 3, depending on the jump decision + * made at instruction 0. + * + * writer = instruction 0; + * 0 MOV TEMP[0], TEMP[1] + * 2 BGNLOOP + * 3 ADD TEMP[0], TEMP[0], none.1 + * 4 ENDLOOP + * The Abort flag will be set on instruction 3, because in the first iteration + * of the loop it reads the value written by instruction 0 and in all other + * iterations it reads the value written by instruction 3. + * + * @param read_cb This function will be called for for every instruction that + * has been determined to be a reader of writer. + * @param write_cb This function will be called for every instruction after + * writer. + */ +void rc_get_readers( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); +} + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + if (sub_writer->WriteMask) { + get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, + sub_writer->DestIndex, sub_writer->WriteMask); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h new file mode 100644 index 00000000000..d8a627258ea --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_DATAFLOW_H +#define RADEON_DATAFLOW_H + +#include "radeon_program_constants.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_swizzle_caps; +struct rc_src_register; +struct rc_pair_instruction_arg; +struct rc_pair_instruction_source; +struct rc_pair_sub_instruction; +struct rc_compiler; + + +/** + * Help analyze and modify the register accesses of instructions. + */ +/*@{*/ +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); + +typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst, + struct rc_src_register * src); +void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, + void * userdata); + +typedef void (*rc_pair_read_arg_fn)(void * userdata, + struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src); +void rc_pair_for_all_reads_arg(struct rc_instruction * inst, + rc_pair_read_arg_fn cb, void * userdata); + +typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex); +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); +/*@}*/ + +struct rc_reader { + struct rc_instruction * Inst; + unsigned int WriteMask; + union { + struct { + struct rc_src_register * Src; + } I; + struct { + struct rc_pair_instruction_arg * Arg; + struct rc_pair_instruction_source * Src; + } P; + } U; +}; + +struct rc_reader_data { + unsigned int Abort; + unsigned int AbortOnRead; + unsigned int AbortOnWrite; + unsigned int LoopDepth; + unsigned int InElse; + struct rc_instruction * Writer; + + unsigned int ReaderCount; + unsigned int ReadersReserved; + struct rc_reader * Readers; + + /* If this flag is enabled, rc_get_readers will exit as soon possbile + * after the Abort flag is set.*/ + unsigned int ExitOnAbort; + void * CbData; +}; + +void rc_get_readers( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); +/** + * Compiler passes based on dataflow analysis. + */ +/*@{*/ +typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, + void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); +void rc_dataflow_deadcode(struct radeon_compiler * c, void *user); +void rc_dataflow_swizzles(struct radeon_compiler * c, void *user); +/*@}*/ + +void rc_optimize(struct radeon_compiler * c, void *user); + +#endif /* RADEON_DATAFLOW_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c new file mode 100644 index 00000000000..678e1475883 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +struct updatemask_state { + unsigned char Output[RC_REGISTER_MAX_INDEX]; + unsigned char Temporary[RC_REGISTER_MAX_INDEX]; + unsigned char Address; + unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; +}; + +struct instruction_state { + unsigned char WriteMask:4; + unsigned char WriteALUResult:1; + unsigned char SrcReg[3]; +}; + +struct loopinfo { + struct updatemask_state * Breaks; + unsigned int BreakCount; + unsigned int BreaksReserved; +}; + +struct branchinfo { + unsigned int HaveElse:1; + + struct updatemask_state StoreEndif; + struct updatemask_state StoreElse; +}; + +struct deadcode_state { + struct radeon_compiler * C; + struct instruction_state * Instructions; + + struct updatemask_state R; + + struct branchinfo * BranchStack; + unsigned int BranchStackSize; + unsigned int BranchStackReserved; + + struct loopinfo * LoopStack; + unsigned int LoopStackSize; + unsigned int LoopStackReserved; +}; + + +static void or_updatemasks( + struct updatemask_state * dst, + struct updatemask_state * a, + struct updatemask_state * b) +{ + for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + dst->Output[i] = a->Output[i] | b->Output[i]; + dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; + } + + for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + dst->Special[i] = a->Special[i] | b->Special[i]; + + dst->Address = a->Address | b->Address; +} + +static void push_break(struct deadcode_state *s) +{ + struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; + memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, + loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); + + memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); +} + +static void push_loop(struct deadcode_state * s) +{ + memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, + s->LoopStackSize, s->LoopStackReserved, 1); + memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); +} + +static void push_branch(struct deadcode_state * s) +{ + struct branchinfo * branch; + + memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, + s->BranchStackSize, s->BranchStackReserved, 1); + + branch = &s->BranchStack[s->BranchStackSize++]; + branch->HaveElse = 0; + memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); +} + +static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); + return 0; + } + + if (file == RC_FILE_OUTPUT) + return &s->R.Output[index]; + else + return &s->R.Temporary[index]; + } else if (file == RC_FILE_ADDRESS) { + return &s->R.Address; + } else if (file == RC_FILE_SPECIAL) { + if (index >= RC_NUM_SPECIAL_REGISTERS) { + rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->R.Special[index]; + } + + return 0; +} + +static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned char * pused = get_used_ptr(s, file, index); + if (pused) + *pused |= mask; +} + +static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + struct instruction_state * insts = &s->Instructions[inst->IP]; + unsigned int usedmask = 0; + unsigned int srcmasks[3]; + + if (opcode->HasDstReg) { + unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); + if (pused) { + usedmask = *pused & inst->U.I.DstReg.WriteMask; + *pused &= ~usedmask; + } + } + + insts->WriteMask |= usedmask; + + if (inst->U.I.WriteALUResult) { + unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); + if (pused && *pused) { + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usedmask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usedmask |= RC_MASK_W; + + *pused = 0; + insts->WriteALUResult = 1; + } + } + + rc_compute_sources_for_writemask(inst, usedmask, srcmasks); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; + insts->SrcReg[src] |= newsrcmask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(newsrcmask, chan)) + refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + } + + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; + + if (!refmask) + continue; + + mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); + + if (inst->U.I.SrcReg[src].RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void mark_output_use(void * data, unsigned int index, unsigned int mask) +{ + struct deadcode_state * s = data; + + mark_used(s, RC_FILE_OUTPUT, index, mask); +} + +void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) +{ + struct deadcode_state s; + unsigned int nr_instructions; + rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; + unsigned int ip; + + memset(&s, 0, sizeof(s)); + s.C = c; + + nr_instructions = rc_recompute_ips(c); + s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); + memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); + + dce(c, &s, &mark_output_use); + + for(struct rc_instruction * inst = c->Program.Instructions.Prev; + inst != &c->Program.Instructions; + inst = inst->Prev) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + switch(opcode->Opcode){ + /* Mark all sources in the loop body as used before doing + * normal deadcode analysis. This is probably not optimal. + */ + case RC_OPCODE_ENDLOOP: + { + int endloops = 1; + struct rc_instruction *ptr; + for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ + opcode = rc_get_opcode_info(ptr->U.I.Opcode); + if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + endloops--; + continue; + } + if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ + endloops++; + continue; + } + if(opcode->HasDstReg){ + int src = 0; + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(ptr, + ptr->U.I.DstReg.WriteMask, srcmasks); + for(src=0; src < opcode->NumSrcRegs; src++){ + mark_used(&s, + ptr->U.I.SrcReg[src].File, + ptr->U.I.SrcReg[src].Index, + srcmasks[src]); + } + } + } + push_loop(&s); + break; + } + case RC_OPCODE_BRK: + push_break(&s); + break; + case RC_OPCODE_BGNLOOP: + { + unsigned int i; + struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; + for(i = 0; i < loop->BreakCount; i++) { + or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); + } + break; + } + case RC_OPCODE_CONT: + break; + case RC_OPCODE_ENDIF: + push_branch(&s); + break; + default: + if (opcode->IsFlowControl && s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); + } else { + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; + } + } else { + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + } + } + } + + update_instruction(&s, inst); + } + + ip = 0; + for(struct rc_instruction * inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next, ++ip) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int dead = 1; + unsigned int srcmasks[3]; + unsigned int usemask; + + if (!opcode->HasDstReg) { + dead = 0; + } else { + inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + if (s.Instructions[ip].WriteMask) + dead = 0; + + if (s.Instructions[ip].WriteALUResult) + dead = 0; + else + inst->U.I.WriteALUResult = RC_ALURESULT_NONE; + } + + if (dead) { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } + + usemask = s.Instructions[ip].WriteMask; + + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usemask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usemask |= RC_MASK_W; + + rc_compute_sources_for_writemask(inst, usemask, srcmasks); + + for(unsigned int src = 0; src < 3; ++src) { + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } + + rc_calculate_inputs_outputs(c); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c new file mode 100644 index 00000000000..133a9f72ec7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static void rewrite_source(struct radeon_compiler * c, + struct rc_instruction * inst, unsigned src) +{ + struct rc_swizzle_split split; + unsigned int tempreg = rc_find_free_temporary(c); + unsigned int usemask; + + usemask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + usemask |= 1 << chan; + } + + c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); + + for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { + struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); + unsigned int phase_refmask; + unsigned int masked_negate; + + mov->U.I.Opcode = RC_OPCODE_MOV; + mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + mov->U.I.DstReg.Index = tempreg; + mov->U.I.DstReg.WriteMask = split.Phase[phase]; + mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + mov->U.I.PreSub = inst->U.I.PreSub; + + phase_refmask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(split.Phase[phase], chan)) + SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + else + phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); + } + + phase_refmask &= RC_MASK_XYZW; + + masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; + if (masked_negate == 0) + mov->U.I.SrcReg[0].Negate = 0; + else if (masked_negate == split.Phase[phase]) + mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + + } + + inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[src].Index = tempreg; + inst->U.I.SrcReg[src].Swizzle = 0; + inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; + inst->U.I.SrcReg[src].Abs = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); + } +} + +void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + rewrite_source(c, inst, src); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c new file mode 100644 index 00000000000..7bede344f30 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c @@ -0,0 +1,342 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_emulate_branches.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct proxy_info { + unsigned int Proxied:1; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct register_proxies { + struct proxy_info Temporary[RC_REGISTER_MAX_INDEX]; +}; + +struct branch_info { + struct rc_instruction * If; + struct rc_instruction * Else; +}; + +struct emulate_branch_state { + struct radeon_compiler * C; + + struct branch_info * Branches; + unsigned int BranchCount; + unsigned int BranchReserved; +}; + + +static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + struct rc_instruction * inst_mov; + + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->BranchCount, s->BranchReserved, 1); + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount++]; + memset(branch, 0, sizeof(struct branch_info)); + branch->If = inst; + + /* Make a safety copy of the decision register, because we will need + * it at ENDIF time and it might be overwritten in both branches. */ + inst_mov = rc_insert_new_instruction(s->C, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); + inst_mov->U.I.DstReg.WriteMask = RC_MASK_X; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + inst->U.I.SrcReg[0].Swizzle = 0; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].Negate = 0; +} + +static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + + if (!s->BranchCount) { + rc_error(s->C, "Encountered ELSE outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount - 1]; + branch->Else = inst; +} + + +struct state_and_proxies { + struct emulate_branch_state * S; + struct register_proxies * Proxies; +}; + +static struct proxy_info * get_proxy_info(struct state_and_proxies * sap, + rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_TEMPORARY) { + return &sap->Proxies->Temporary[index]; + } else { + return 0; + } +} + +static void scan_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int comp) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, file, index); + + if (proxy && !proxy->Proxied) { + proxy->Proxied = 1; + proxy->Index = rc_find_free_temporary(sap->S->C); + } +} + +static void remap_proxy_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex); + + if (proxy && proxy->Proxied) { + *pfile = RC_FILE_TEMPORARY; + *pindex = proxy->Index; + } +} + +/** + * Redirect all writes in the instruction range [begin, end) to proxy + * temporary registers. + */ +static void allocate_and_insert_proxies(struct emulate_branch_state * s, + struct register_proxies * proxies, + struct rc_instruction * begin, + struct rc_instruction * end) +{ + struct state_and_proxies sap; + + sap.S = s; + sap.Proxies = proxies; + + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + rc_for_all_writes_mask(inst, scan_write, &sap); + rc_remap_registers(inst, remap_proxy_function, &sap); + } + + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (proxies->Temporary[index].Proxied) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = index; + } + } +} + + +static void inject_cmp(struct emulate_branch_state * s, + struct rc_instruction * inst_if, + struct rc_instruction * inst_endif, + rc_register_file file, unsigned int index, + struct proxy_info ifproxy, + struct proxy_info elseproxy) +{ + struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.DstReg.File = file; + inst_cmp->U.I.DstReg.Index = index; + inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + inst_cmp->U.I.SrcReg[0].Abs = 1; + inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index; + inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index; +} + +static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + struct register_proxies IfProxies; + struct register_proxies ElseProxies; + + if (!s->BranchCount) { + rc_error(s->C, "Encountered ENDIF outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount - 1]; + + memset(&IfProxies, 0, sizeof(IfProxies)); + memset(&ElseProxies, 0, sizeof(ElseProxies)); + + allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst); + + if (branch->Else) + allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst); + + /* Insert the CMP instructions at the end. */ + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) { + inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index, + IfProxies.Temporary[index], ElseProxies.Temporary[index]); + } + } + + /* Remove all traces of the branch instructions */ + rc_remove_instruction(branch->If); + if (branch->Else) + rc_remove_instruction(branch->Else); + rc_remove_instruction(inst); + + s->BranchCount--; + + if (VERBOSE) { + DBG("Program after ENDIF handling:\n"); + rc_print_program(&s->C->Program); + } +} + + +struct remap_output_data { + unsigned int Output:RC_REGISTER_INDEX_BITS; + unsigned int Temporary:RC_REGISTER_INDEX_BITS; +}; + +static void remap_output_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct remap_output_data * data = userdata; + + if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) { + *pfile = RC_FILE_TEMPORARY; + *pindex = data->Temporary; + } +} + + +/** + * Output registers cannot be read from and so cannot be dealt with like + * temporary registers. + * + * We do the simplest thing: If an output registers is written within + * a branch, then *all* writes to this register are proxied to a + * temporary register, and a final MOV is appended to the end of + * the program. + */ +static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode; + + if (!s->BranchCount) + return; + + opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (!opcode->HasDstReg) + return; + + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { + struct remap_output_data remap; + struct rc_instruction * inst_mov; + + remap.Output = inst->U.I.DstReg.Index; + remap.Temporary = rc_find_free_temporary(s->C); + + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_output_function, &remap); + } + + inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; + inst_mov->U.I.DstReg.Index = remap.Output; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = remap.Temporary; + } +} + +/** + * Remove branch instructions; instead, execute both branches + * on different register sets and choose between their results + * using CMP instructions in place of the original ENDIF. + */ +void rc_emulate_branches(struct radeon_compiler *c, void *user) +{ + struct emulate_branch_state s; + struct rc_instruction * ptr; + + memset(&s, 0, sizeof(s)); + s.C = c; + + /* Untypical loop because we may remove the current instruction */ + ptr = c->Program.Instructions.Next; + while(ptr != &c->Program.Instructions) { + struct rc_instruction * inst = ptr; + ptr = ptr->Next; + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + switch(inst->U.I.Opcode) { + case RC_OPCODE_IF: + handle_if(&s, inst); + break; + case RC_OPCODE_ELSE: + handle_else(&s, inst); + break; + case RC_OPCODE_ENDIF: + handle_endif(&s, inst); + break; + default: + fix_output_writes(&s, inst); + break; + } + } else { + rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h new file mode 100644 index 00000000000..818ab84d0cd --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h @@ -0,0 +1,30 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_EMULATE_BRANCHES_H +#define RADEON_EMULATE_BRANCHES_H + +struct radeon_compiler; + +void rc_emulate_branches(struct radeon_compiler *c, void *user); + +#endif /* RADEON_EMULATE_BRANCHES_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c new file mode 100644 index 00000000000..205eecd1129 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c @@ -0,0 +1,522 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_emulate_loops.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct const_value { + struct radeon_compiler * C; + struct rc_src_register * Src; + float Value; + int HasValue; +}; + +struct count_inst { + struct radeon_compiler * C; + int Index; + rc_swizzle Swz; + float Amount; + int Unknown; +}; + +static float get_constant_value(struct radeon_compiler * c, + struct rc_src_register * src, + int chan) +{ + float base = 1.0f; + int swz = GET_SWZ(src->Swizzle, chan); + if(swz >= 4 || src->Index >= c->Program.Constants.Count ){ + rc_error(c, "get_constant_value: Can't find a value.\n"); + return 0.0f; + } + if(GET_BIT(src->Negate, chan)){ + base = -1.0f; + } + return base * + c->Program.Constants.Constants[src->Index].u.Immediate[swz]; +} + +static int src_reg_is_immediate(struct rc_src_register * src, + struct radeon_compiler * c) +{ + return src->File == RC_FILE_CONSTANT && + c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; +} + +static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, + struct loop_info * loop) +{ + unsigned int total_i = rc_recompute_ips(c); + unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; + /* +1 because the program already has one iteration of the loop. */ + return 1 + ((c->max_alu_insts - total_i) / loop_i); +} + +static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int iterations) +{ + unsigned int i; + struct rc_instruction * ptr; + struct rc_instruction * first = loop->BeginLoop->Next; + struct rc_instruction * last = loop->EndLoop->Prev; + struct rc_instruction * append_to = last; + rc_remove_instruction(loop->BeginLoop); + rc_remove_instruction(loop->EndLoop); + for( i = 1; i < iterations; i++){ + for(ptr = first; ptr != last->Next; ptr = ptr->Next){ + struct rc_instruction *new = rc_alloc_instruction(c); + memcpy(new, ptr, sizeof(struct rc_instruction)); + rc_insert_instruction(append_to, new); + append_to = new; + } + } +} + + +static void update_const_value(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct const_value * value = data; + if(value->Src->File != file || + value->Src->Index != index || + !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_MOV: + if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){ + return; + } + value->HasValue = 1; + value->Value = + get_constant_value(value->C, &inst->U.I.SrcReg[0], 0); + break; + } +} + +static void get_incr_amount(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct count_inst * count_inst = data; + int amnt_src_index; + const struct rc_opcode_info * opcode; + float amount; + + if(file != RC_FILE_TEMPORARY || + count_inst->Index != index || + (1 << GET_SWZ(count_inst->Swz,0) != mask)){ + return; + } + /* Find the index of the counter register. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + if(opcode->NumSrcRegs != 2){ + count_inst->Unknown = 1; + return; + } + if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[0].Index == count_inst->Index && + inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ + amnt_src_index = 1; + } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[1].Index == count_inst->Index && + inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ + amnt_src_index = 0; + } + else{ + count_inst->Unknown = 1; + return; + } + if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index], + count_inst->C)){ + amount = get_constant_value(count_inst->C, + &inst->U.I.SrcReg[amnt_src_index], 0); + } + else{ + count_inst->Unknown = 1 ; + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + count_inst->Amount += amount; + break; + case RC_OPCODE_SUB: + if(amnt_src_index == 0){ + count_inst->Unknown = 0; + return; + } + count_inst->Amount -= amount; + break; + default: + count_inst->Unknown = 1; + return; + } +} + +/** + * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless + * of how many iterations they have. + */ +static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) +{ + int end_loops; + int iterations; + struct count_inst count_inst; + float limit_value; + struct rc_src_register * counter; + struct rc_src_register * limit; + struct const_value counter_value; + struct rc_instruction * inst; + + /* Find the counter and the upper limit */ + + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){ + limit = &loop->Cond->U.I.SrcReg[0]; + counter = &loop->Cond->U.I.SrcReg[1]; + } + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){ + limit = &loop->Cond->U.I.SrcReg[1]; + counter = &loop->Cond->U.I.SrcReg[0]; + } + else{ + DBG("No constant limit.\n"); + return 0; + } + + /* Find the initial value of the counter */ + counter_value.Src = counter; + counter_value.Value = 0.0f; + counter_value.HasValue = 0; + counter_value.C = c; + for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; + inst = inst->Next){ + rc_for_all_writes_mask(inst, update_const_value, &counter_value); + } + if(!counter_value.HasValue){ + DBG("Initial counter value cannot be determined.\n"); + return 0; + } + DBG("Initial counter value is %f\n", counter_value.Value); + /* Determine how the counter is modified each loop */ + count_inst.C = c; + count_inst.Index = counter->Index; + count_inst.Swz = counter->Swizzle; + count_inst.Amount = 0.0f; + count_inst.Unknown = 0; + end_loops = 1; + for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ + switch(inst->U.I.Opcode){ + /* XXX In the future we might want to try to unroll nested + * loops here.*/ + case RC_OPCODE_BGNLOOP: + end_loops++; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = inst; + end_loops--; + break; + case RC_OPCODE_BRK: + /* Don't unroll loops if it has a BRK instruction + * other one used when testing the main conditional + * of the loop. */ + + /* Make sure we haven't entered a nested loops. */ + if(inst != loop->Brk && end_loops == 1) { + return 0; + } + break; + /* XXX Check if the counter is modified within an if statement. + */ + case RC_OPCODE_IF: + break; + default: + rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); + if(count_inst.Unknown){ + return 0; + } + break; + } + } + /* Infinite loop */ + if(count_inst.Amount == 0.0f){ + return 0; + } + DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); + /* Calculate the number of iterations of this loop. Keeping this + * simple, since we only support increment and decrement loops. + */ + limit_value = get_constant_value(c, limit, 0); + DBG("Limit is %f.\n", limit_value); + /* The iteration calculations are opposite of what you would expect. + * In a normal loop, if the condition is met, then loop continues, but + * with our loops, if the condition is met, the is exited. */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + case RC_OPCODE_SLE: + iterations = (int) ceilf((limit_value - counter_value.Value) / + count_inst.Amount); + break; + + case RC_OPCODE_SGT: + case RC_OPCODE_SLT: + iterations = (int) floorf((limit_value - counter_value.Value) / + count_inst.Amount) + 1; + break; + default: + return 0; + } + + if (c->max_alu_insts > 0 + && iterations > loop_max_possible_iterations(c, loop)) { + return 0; + } + + DBG("Loop will have %d iterations.\n", iterations); + + /* Prepare loop for unrolling */ + rc_remove_instruction(loop->Cond); + rc_remove_instruction(loop->If); + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + + unroll_loop(c, loop, iterations); + loop->EndLoop = NULL; + return 1; +} + +/** + * @param c + * @param loop + * @param inst A pointer to a BGNLOOP instruction. + * @return 1 if all of the members of loop where set. + * @return 0 if there was an error and some members of loop are still NULL. + */ +static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, + struct rc_instruction * inst) +{ + struct rc_instruction * ptr; + + if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ + rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); + return 0; + } + + memset(loop, 0, sizeof(struct loop_info)); + + loop->BeginLoop = inst; + + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { + + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", + __FUNCTION__); + return 0; + } + + switch(ptr->U.I.Opcode){ + case RC_OPCODE_BGNLOOP: + { + /* Nested loop, skip ahead to the end. */ + unsigned int loop_depth = 1; + for(ptr = ptr->Next; ptr != &c->Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loop_depth++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + if (!--loop_depth) { + break; + } + } + } + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", + __FUNCTION__); + return 0; + } + break; + } + case RC_OPCODE_BRK: + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF + || ptr->Prev->U.I.Opcode != RC_OPCODE_IF + || loop->Brk){ + continue; + } + loop->Brk = ptr; + loop->If = ptr->Prev; + loop->EndIf = ptr->Next; + switch(loop->If->Prev->U.I.Opcode){ + case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLE: + case RC_OPCODE_SEQ: + case RC_OPCODE_SNE: + break; + default: + return 0; + } + loop->Cond = loop->If->Prev; + break; + + case RC_OPCODE_ENDLOOP: + loop->EndLoop = ptr; + break; + } + } + + if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf + && loop->Cond && loop->EndLoop) { + return 1; + } + return 0; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * <Additional conditional code> -> <Additional conditional code> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> <Loop Body> + * <Loop Body> -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return 1 for success, 0 for failure + */ +static int transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) +{ + struct loop_info * loop; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + + if (!build_loop_info(s->C, loop, inst)) { + rc_error(s->C, "Failed to build loop info\n"); + return 0; + } + + if(try_unroll_loop(s->C, loop)){ + return 1; + } + + /* Reverse the conditional instruction */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + loop->Cond->U.I.Opcode = RC_OPCODE_SLT; + break; + case RC_OPCODE_SLT: + loop->Cond->U.I.Opcode = RC_OPCODE_SGE; + break; + case RC_OPCODE_SLE: + loop->Cond->U.I.Opcode = RC_OPCODE_SGT; + break; + case RC_OPCODE_SGT: + loop->Cond->U.I.Opcode = RC_OPCODE_SLE; + break; + case RC_OPCODE_SEQ: + loop->Cond->U.I.Opcode = RC_OPCODE_SNE; + break; + case RC_OPCODE_SNE: + loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; + break; + default: + rc_error(s->C, "loop->Cond is not a conditional.\n"); + return 0; + } + + /* Prepare the loop to be emulated */ + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); + return 1; +} + +void rc_transform_loops(struct radeon_compiler *c, void *user) +{ + struct emulate_loop_state * s = &c->loop_state; + struct rc_instruction * ptr; + + memset(s, 0, sizeof(struct emulate_loop_state)); + s->C = c; + for(ptr = s->C->Program.Instructions.Next; + ptr != &s->C->Program.Instructions; ptr = ptr->Next) { + if(ptr->Type == RC_INSTRUCTION_NORMAL && + ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + if (!transform_loop(s, ptr)) + return; + } + } +} + +void rc_unroll_loops(struct radeon_compiler *c, void *user) +{ + struct rc_instruction * inst; + struct loop_info loop; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + if (build_loop_info(c, &loop, inst)) { + try_unroll_loop(c, &loop); + } + } + } +} + +void rc_emulate_loops(struct radeon_compiler *c, void *user) +{ + struct emulate_loop_state * s = &c->loop_state; + int i; + /* Iterate backwards of the list of loops so that loops that nested + * loops are unrolled first. + */ + for( i = s->LoopCount - 1; i >= 0; i-- ){ + unsigned int iterations; + + if(!s->Loops[i].EndLoop){ + continue; + } + iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); + unroll_loop(s->C, &s->Loops[i], iterations); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h new file mode 100644 index 00000000000..cd800c059d9 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h @@ -0,0 +1,32 @@ + + +#ifndef RADEON_EMULATE_LOOPS_H +#define RADEON_EMULATE_LOOPS_H + +#define MAX_ITERATIONS 8 + +struct radeon_compiler; + +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * Cond; + struct rc_instruction * If; + struct rc_instruction * Brk; + struct rc_instruction * EndIf; + struct rc_instruction * EndLoop; +}; + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +void rc_transform_loops(struct radeon_compiler *c, void *user); + +void rc_unroll_loops(struct radeon_compiler * c, void *user); + +void rc_emulate_loops(struct radeon_compiler * c, void *user); + +#endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_list.c b/src/gallium/drivers/r300/compiler/radeon_list.c new file mode 100644 index 00000000000..811c908a81a --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_list.c @@ -0,0 +1,90 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_list.h" + +#include <stdlib.h> +#include <stdio.h> + +#include "memory_pool.h" + +struct rc_list * rc_list(struct memory_pool * pool, void * item) +{ + struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list)); + new->Item = item; + new->Next = NULL; + new->Prev = NULL; + + return new; +} + +void rc_list_add(struct rc_list ** list, struct rc_list * new_value) +{ + struct rc_list * temp; + + if (*list == NULL) { + *list = new_value; + return; + } + + for (temp = *list; temp->Next; temp = temp->Next); + + temp->Next = new_value; + new_value->Prev = temp; +} + +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value) +{ + if (*list == rm_value) { + *list = rm_value->Next; + return; + } + + rm_value->Prev->Next = rm_value->Next; + if (rm_value->Next) { + rm_value->Next->Prev = rm_value->Prev; + } +} + +unsigned int rc_list_count(struct rc_list * list) +{ + unsigned int count = 0; + while (list) { + count++; + list = list->Next; + } + return count; +} + +void rc_list_print(struct rc_list * list) +{ + while(list) { + fprintf(stderr, "%p->", list->Item); + list = list->Next; + } + fprintf(stderr, "\n"); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_list.h b/src/gallium/drivers/r300/compiler/radeon_list.h new file mode 100644 index 00000000000..b3c8f89cc68 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_list.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_LIST_H +#define RADEON_LIST_H + +struct memory_pool; + +struct rc_list { + void * Item; + struct rc_list * Prev; + struct rc_list * Next; +}; + +struct rc_list * rc_list(struct memory_pool * pool, void * item); +void rc_list_add(struct rc_list ** list, struct rc_list * new_value); +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value); +unsigned int rc_list_count(struct rc_list * list); +void rc_list_print(struct rc_list * list); + +#endif /* RADEON_LIST_H */ + diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c new file mode 100644 index 00000000000..afd78ad79dd --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c @@ -0,0 +1,546 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_opcodes.h" +#include "radeon_program.h" + +#include "radeon_program_constants.h" + +struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { + { + .Opcode = RC_OPCODE_NOP, + .Name = "NOP" + }, + { + .Opcode = RC_OPCODE_ILLEGAL_OPCODE, + .Name = "ILLEGAL OPCODE" + }, + { + .Opcode = RC_OPCODE_ABS, + .Name = "ABS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ADD, + .Name = "ADD", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ARL, + .Name = "ARL", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_CEIL, + .Name = "CEIL", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CMP, + .Name = "CMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CND, + .Name = "CND", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_COS, + .Name = "COS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_DDX, + .Name = "DDX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DDY, + .Name = "DDY", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DP2, + .Name = "DP2", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP3, + .Name = "DP3", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP4, + .Name = "DP4", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DPH, + .Name = "DPH", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DST, + .Name = "DST", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_EX2, + .Name = "EX2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_EXP, + .Name = "EXP", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_FLR, + .Name = "FLR", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_FRC, + .Name = "FRC", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_KIL, + .Name = "KIL", + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_LG2, + .Name = "LG2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_LIT, + .Name = "LIT", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LOG, + .Name = "LOG", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LRP, + .Name = "LRP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAD, + .Name = "MAD", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAX, + .Name = "MAX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MIN, + .Name = "MIN", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MOV, + .Name = "MOV", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MUL, + .Name = "MUL", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_POW, + .Name = "POW", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RCP, + .Name = "RCP", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RSQ, + .Name = "RSQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SCS, + .Name = "SCS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SEQ, + .Name = "SEQ", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SFL, + .Name = "SFL", + .NumSrcRegs = 0, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGE, + .Name = "SGE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGT, + .Name = "SGT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SIN, + .Name = "SIN", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SLE, + .Name = "SLE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SLT, + .Name = "SLT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SNE, + .Name = "SNE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SSG, + .Name = "SSG", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SUB, + .Name = "SUB", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SWZ, + .Name = "SWZ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_XPD, + .Name = "XPD", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TEX, + .Name = "TEX", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXB, + .Name = "TXB", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXD, + .Name = "TXD", + .HasTexture = 1, + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXL, + .Name = "TXL", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXP, + .Name = "TXP", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_IF, + .Name = "IF", + .IsFlowControl = 1, + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_ELSE, + .Name = "ELSE", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDIF, + .Name = "ENDIF", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BGNLOOP, + .Name = "BGNLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BRK, + .Name = "BRK", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDLOOP, + .Name = "ENDLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_CONT, + .Name = "CONT", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_REPL_ALPHA, + .Name = "REPL_ALPHA", + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_BEGIN_TEX, + .Name = "BEGIN_TEX" + }, + { + .Opcode = RC_OPCODE_KILP, + .Name = "KILP", + } +}; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; + + if (opcode->Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + else if (opcode->Opcode == RC_OPCODE_IF) + srcmasks[0] |= RC_MASK_X; + + if (!writemask) + return; + + if (opcode->IsComponentwise) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= RC_MASK_X; + } else { + switch(opcode->Opcode) { + case RC_OPCODE_ARL: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP2: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + break; + case RC_OPCODE_DP3: + case RC_OPCODE_XPD: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_DPH: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + case RC_OPCODE_TXL: + srcmasks[0] |= RC_MASK_W; + /* Fall through */ + case RC_OPCODE_TEX: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + break; + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_TXD: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_Y; + /* Fall through. */ + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + srcmasks[1] |= RC_MASK_X; + srcmasks[2] |= RC_MASK_X; + break; + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_Z; + /* Fall through. */ + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + srcmasks[2] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + srcmasks[2] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_DST: + srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; + srcmasks[1] |= RC_MASK_Y | RC_MASK_W; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; + break; + default: + break; + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h new file mode 100644 index 00000000000..b5868820611 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h @@ -0,0 +1,263 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_OPCODES_H +#define RADEON_OPCODES_H + +#include <assert.h> + +/** + * Opcodes understood by the Radeon compiler. + */ +typedef enum { + RC_OPCODE_NOP = 0, + RC_OPCODE_ILLEGAL_OPCODE, + + /** vec4 instruction: dst.c = abs(src0.c); */ + RC_OPCODE_ABS, + + /** vec4 instruction: dst.c = src0.c + src1.c; */ + RC_OPCODE_ADD, + + /** special instruction: load address register + * dst.x = floor(src.x), where dst must be an address register */ + RC_OPCODE_ARL, + + /** vec4 instruction: dst.c = ceil(src0.c) */ + RC_OPCODE_CEIL, + + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ + RC_OPCODE_CMP, + + /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */ + RC_OPCODE_CND, + + /** scalar instruction: dst = cos(src0.x) */ + RC_OPCODE_COS, + + /** special instruction: take vec4 partial derivative in X direction + * dst.c = d src0.c / dx */ + RC_OPCODE_DDX, + + /** special instruction: take vec4 partial derivative in Y direction + * dst.c = d src0.c / dy */ + RC_OPCODE_DDY, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ + RC_OPCODE_DP2, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ + RC_OPCODE_DP3, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ + RC_OPCODE_DP4, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ + RC_OPCODE_DPH, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_DST, + + /** scalar instruction: dst = 2**src0.x */ + RC_OPCODE_EX2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_EXP, + + /** vec4 instruction: dst.c = floor(src0.c) */ + RC_OPCODE_FLR, + + /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ + RC_OPCODE_FRC, + + /** special instruction: stop execution if any component of src0 is negative */ + RC_OPCODE_KIL, + + /** scalar instruction: dst = log_2(src0.x) */ + RC_OPCODE_LG2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LIT, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LOG, + + /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ + RC_OPCODE_LRP, + + /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ + RC_OPCODE_MAD, + + /** vec4 instruction: dst.c = max(src0.c, src1.c) */ + RC_OPCODE_MAX, + + /** vec4 instruction: dst.c = min(src0.c, src1.c) */ + RC_OPCODE_MIN, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_MOV, + + /** vec4 instruction: dst.c = src0.c*src1.c */ + RC_OPCODE_MUL, + + /** scalar instruction: dst = src0.x ** src1.x */ + RC_OPCODE_POW, + + /** scalar instruction: dst = 1 / src0.x */ + RC_OPCODE_RCP, + + /** scalar instruction: dst = 1 / sqrt(src0.x) */ + RC_OPCODE_RSQ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_SCS, + + /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SEQ, + + /** vec4 instruction: dst.c = 0.0 */ + RC_OPCODE_SFL, + + /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGE, + + /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGT, + + /** scalar instruction: dst = sin(src0.x) */ + RC_OPCODE_SIN, + + /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLE, + + /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLT, + + /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SNE, + + /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */ + RC_OPCODE_SSG, + + /** vec4 instruction: dst.c = src0.c - src1.c */ + RC_OPCODE_SUB, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_SWZ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_XPD, + + RC_OPCODE_TEX, + RC_OPCODE_TXB, + RC_OPCODE_TXD, + RC_OPCODE_TXL, + RC_OPCODE_TXP, + + /** branch instruction: + * If src0.x != 0.0, continue with the next instruction; + * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. + */ + RC_OPCODE_IF, + + /** branch instruction: jump to matching RC_OPCODE_ENDIF */ + RC_OPCODE_ELSE, + + /** branch instruction: has no effect */ + RC_OPCODE_ENDIF, + + RC_OPCODE_BGNLOOP, + + RC_OPCODE_BRK, + + RC_OPCODE_ENDLOOP, + + RC_OPCODE_CONT, + + /** special instruction, used in R300-R500 fragment program pair instructions + * indicates that the result of the alpha operation shall be replicated + * across all other channels */ + RC_OPCODE_REPL_ALPHA, + + /** special instruction, used in R300-R500 fragment programs + * to indicate the start of a block of texture instructions that + * can run simultaneously. */ + RC_OPCODE_BEGIN_TEX, + + /** Stop execution of the shader (GLSL discard) */ + RC_OPCODE_KILP, + + MAX_RC_OPCODE +} rc_opcode; + + +struct rc_opcode_info { + rc_opcode Opcode; + const char * Name; + + /** true if the instruction reads from a texture. + * + * \note This is false for the KIL instruction, even though KIL is + * a texture instruction from a hardware point of view. */ + unsigned int HasTexture:1; + + unsigned int NumSrcRegs:2; + unsigned int HasDstReg:1; + + /** true if this instruction affects control flow */ + unsigned int IsFlowControl:1; + + /** true if this is a vector instruction that operates on components in parallel + * without any cross-component interaction */ + unsigned int IsComponentwise:1; + + /** true if this instruction sources only its operands X components + * to compute one result which is smeared across all output channels */ + unsigned int IsStandardScalar:1; +}; + +extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; + +static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) +{ + assert((unsigned int)opcode < MAX_RC_OPCODE); + assert(rc_opcodes[opcode].Opcode == opcode); + + return &rc_opcodes[opcode]; +} + +struct rc_instruction; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks); + +#endif /* RADEON_OPCODES_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c new file mode 100644 index 00000000000..39dcb21d4f4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -0,0 +1,700 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_swizzle.h" + +struct src_clobbered_reads_cb_data { + rc_register_file File; + unsigned int Index; + unsigned int Mask; + struct rc_reader_data * ReaderData; +}; + +typedef void (*rc_presub_replace_fn)(struct rc_instruction *, + struct rc_instruction *, + unsigned int); + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, + struct rc_src_register * src) +{ + rc_register_file file = src->File; + struct rc_reader_data * reader_data = data; + + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + src, + &reader_data->Writer->U.I.PreSub.SrcReg[0], + &reader_data->Writer->U.I.PreSub.SrcReg[1])) { + reader_data->Abort = 1; + return; + } + + /* XXX This could probably be handled better. */ + if (file == RC_FILE_ADDRESS) { + reader_data->Abort = 1; + return; + } + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_TXD || + inst->U.I.Opcode == RC_OPCODE_TXL || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + reader_data->Abort = 1; + return; + } +} + +static void src_clobbered_reads_cb( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct src_clobbered_reads_cb_data * sc_data = data; + + if (src->File == sc_data->File + && src->Index == sc_data->Index + && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { + + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } + + if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } +} + +static void is_src_clobbered_scan_write( + void * data, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct src_clobbered_reads_cb_data sc_data; + struct rc_reader_data * reader_data = data; + sc_data.File = file; + sc_data.Index = index; + sc_data.Mask = mask; + sc_data.ReaderData = reader_data; + rc_for_all_reads_src(reader_data->Writer, + src_clobbered_reads_cb, &sc_data); +} + +static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct rc_reader_data reader_data; + unsigned int i; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst_mov->U.I.WriteALUResult || + inst_mov->U.I.SaturateMode) + return; + + /* Get a list of all the readers of this MOV instruction. */ + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_mov, &reader_data, + copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return; + + /* Propagate the MOV instruction. */ + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction * inst = reader_data.Readers[i].Inst; + *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); + + if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = inst_mov->U.I.PreSub; + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate= 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_constant * constant; + struct rc_src_register newsrc; + int have_real_reference; + unsigned int chan; + + /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ + for (chan = 0; chan < 4; ++chan) + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) + break; + if (chan == 4) { + inst->U.I.SrcReg[src].File = RC_FILE_NONE; + continue; + } + + /* Convert immediates to swizzles. */ + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + newsrc = inst->U.I.SrcReg[src]; + have_real_reference = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + unsigned int newswz; + float imm; + float baseimm; + + if (swz >= 4) + continue; + + imm = constant->u.Immediate[swz]; + baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5 && c->has_half_swizzles) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); + + /* In case this instruction has been converted, make sure all of the + * registers that are no longer used are empty. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + for(i = opcode->NumSrcRegs; i < 3; i++) { + memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); + } +} + +/** + * If src and dst use the same register, this function returns a writemask that + * indicates wich components are read by src. Otherwise zero is returned. + */ +static unsigned int src_reads_dst_mask(struct rc_src_register src, + struct rc_dst_register dst) +{ + if (dst.File != src.File || dst.Index != src.Index) { + return 0; + } + return rc_swizzle_to_writemask(src.Swizzle); +} + +/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) + * in any of its channels. Return 0 otherwise. */ +static int src_has_const_swz(struct rc_src_register src) { + int chan; + for(chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF + || swz == RC_SWIZZLE_ONE) { + return 1; + } + } + return 0; +} + +static void presub_scan_read( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = data; + rc_presubtract_op * presub_opcode = reader_data->CbData; + + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + src, + &reader_data->Writer->U.I.SrcReg[0], + &reader_data->Writer->U.I.SrcReg[1])) { + reader_data->Abort = 1; + return; + } +} + +static int presub_helper( + struct radeon_compiler * c, + struct rc_instruction * inst_add, + rc_presubtract_op presub_opcode, + rc_presub_replace_fn presub_replace) +{ + struct rc_reader_data reader_data; + unsigned int i; + rc_presubtract_op cb_op = presub_opcode; + + reader_data.CbData = &cb_op; + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return 0; + + for(i = 0; i < reader_data.ReaderCount; i++) { + unsigned int src_index; + struct rc_reader reader = reader_data.Readers[i]; + const struct rc_opcode_info * info = + rc_get_opcode_info(reader.Inst->U.I.Opcode); + + for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) + presub_replace(inst_add, reader.Inst, src_index); + } + } + return 1; +} + +/* This function assumes that inst_add->U.I.SrcReg[0] and + * inst_add->U.I.SrcReg[1] aren't both negative. */ +static void presub_replace_add( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + rc_presubtract_op presub_opcode; + if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) + presub_opcode = RC_PRESUB_SUB; + else + presub_opcode = RC_PRESUB_ADD; + + if (inst_add->U.I.SrcReg[1].Negate) { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; + } else { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; + } + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; + inst_reader->U.I.PreSub.Opcode = presub_opcode; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; +} + +static int is_presub_candidate( + struct radeon_compiler * c, + struct rc_instruction * inst) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + unsigned int is_constant[2] = {0, 0}; + + assert(inst->U.I.Opcode == RC_OPCODE_ADD); + + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE + || inst->U.I.SaturateMode + || inst->U.I.WriteALUResult) { + return 0; + } + + /* If both sources use a constant swizzle, then we can't convert it to + * a presubtract operation. In fact for the ADD and SUB presubtract + * operations neither source can contain a constant swizzle. This + * specific case is checked in peephole_add_presub_add() when + * we make sure the swizzles for both sources are equal, so we + * don't need to worry about it here. */ + for (i = 0; i < 2; i++) { + int chan; + for (chan = 0; chan < 4; chan++) { + rc_swizzle swz = + get_swz(inst->U.I.SrcReg[i].Swizzle, chan); + if (swz == RC_SWIZZLE_ONE + || swz == RC_SWIZZLE_ZERO + || swz == RC_SWIZZLE_HALF) { + is_constant[i] = 1; + } + } + } + if (is_constant[0] && is_constant[1]) + return 0; + + for(i = 0; i < info->NumSrcRegs; i++) { + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; + + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) + return 0; + } + return 1; +} + +static int peephole_add_presub_add( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned dstmask = inst_add->U.I.DstReg.WriteMask; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; + + if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) + return 0; + + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; + + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; + + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +static void presub_replace_inv( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + /* We must be careful not to modify inst_add, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; +} + +/** + * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] + * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source + * of the add instruction must have the constatnt 1 swizzle. This function + * does not check const registers to see if their value is 1.0, so it should + * be called after the constant_folding optimization. + * @return + * 0 if the ADD instruction is still part of the program. + * 1 if the ADD instruction is no longer part of the program. + */ +static int peephole_add_presub_inv( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned int i, swz; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + /* Check if src0 is 1. */ + /* XXX It would be nice to use is_src_uniform_constant here, but that + * function only works if the register's file is RC_FILE_NONE */ + for(i = 0; i < 4; i++ ) { + swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); + if(((1 << i) & inst_add->U.I.DstReg.WriteMask) + && swz != RC_SWIZZLE_ONE) { + return 0; + } + } + + /* Check src1. */ + if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != + inst_add->U.I.DstReg.WriteMask + || inst_add->U.I.SrcReg[1].Abs + || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY + && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) + || src_has_const_swz(inst_add->U.I.SrcReg[1])) { + + return 0; + } + + if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +/** + * @return + * 0 if inst is still part of the program. + * 1 if inst is no longer part of the program. + */ +static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) +{ + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + if (c->has_presub) { + if(peephole_add_presub_inv(c, inst)) + return 1; + if(peephole_add_presub_add(c, inst)) + return 1; + } + break; + default: + break; + } + return 0; +} + +void rc_optimize(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if(peephole(c, cur)) + continue; + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + copy_propagate(c, cur); + /* cur may no longer be part of the program */ + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c new file mode 100644 index 00000000000..1e9a2c09d44 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c @@ -0,0 +1,62 @@ + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_opcodes.h" +#include "radeon_program_pair.h" + +static void mark_used_presub(struct rc_pair_sub_instruction * sub) +{ + if (sub->Src[RC_PAIR_PRESUB_SRC].Used) { + unsigned int presub_reg_count = rc_presubtract_src_reg_count( + sub->Src[RC_PAIR_PRESUB_SRC].Index); + unsigned int i; + for (i = 0; i < presub_reg_count; i++) { + sub->Src[i].Used = 1; + } + } +} + +static void mark_used( + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub) +{ + unsigned int i; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + if (src_type & RC_SOURCE_RGB) { + inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; + } + + if (src_type & RC_SOURCE_ALPHA) { + inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; + } + } +} + +/** + * This pass finds sources that are not used by their instruction and marks + * them as unused. + */ +void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + unsigned int i; + if (inst->Type == RC_INSTRUCTION_NORMAL) + continue; + + /* Mark all sources as unused */ + for (i = 0; i < 4; i++) { + inst->U.P.RGB.Src[i].Used = 0; + inst->U.P.Alpha.Src[i].Used = 0; + } + mark_used(inst, &inst->U.P.RGB); + mark_used(inst, &inst->U.P.Alpha); + + mark_used_presub(&inst->U.P.RGB); + mark_used_presub(&inst->U.P.Alpha); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c new file mode 100644 index 00000000000..49983d6ce75 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c @@ -0,0 +1,706 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "main/glheader.h" +#include "program/register_allocate.h" +#include "ralloc.h" + +#include "r300_fragprog_swizzle.h" +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_variable.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + + +struct register_info { + struct live_intervals Live[4]; + + unsigned int Used:1; + unsigned int Allocated:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; + unsigned int Writemask; +}; + +struct regalloc_state { + struct radeon_compiler * C; + + struct register_info * Input; + unsigned int NumInputs; + + struct register_info * Temporary; + unsigned int NumTemporaries; + + unsigned int Simple; + int LoopEnd; +}; + +enum rc_reg_class { + RC_REG_CLASS_SINGLE, + RC_REG_CLASS_DOUBLE, + RC_REG_CLASS_TRIPLE, + RC_REG_CLASS_ALPHA, + RC_REG_CLASS_SINGLE_PLUS_ALPHA, + RC_REG_CLASS_DOUBLE_PLUS_ALPHA, + RC_REG_CLASS_TRIPLE_PLUS_ALPHA, + RC_REG_CLASS_X, + RC_REG_CLASS_Y, + RC_REG_CLASS_Z, + RC_REG_CLASS_XY, + RC_REG_CLASS_YZ, + RC_REG_CLASS_XZ, + RC_REG_CLASS_XW, + RC_REG_CLASS_YW, + RC_REG_CLASS_ZW, + RC_REG_CLASS_XYW, + RC_REG_CLASS_YZW, + RC_REG_CLASS_XZW, + RC_REG_CLASS_COUNT +}; + +struct rc_class { + enum rc_reg_class Class; + + unsigned int WritemaskCount; + + /** This is 1 if this class is being used by the register allocator + * and 0 otherwise */ + unsigned int Used; + + /** This is the ID number assigned to this class by ra. */ + unsigned int Id; + + /** List of writemasks that belong to this class */ + unsigned int Writemasks[3]; + + +}; + +static void print_live_intervals(struct live_intervals * src) +{ + if (!src || !src->Used) { + DBG("(null)"); + return; + } + + DBG("(%i,%i)", src->Start, src->End); +} + +static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) +{ + if (VERBOSE) { + DBG("overlap_live_intervals: "); + print_live_intervals(a); + DBG(" to "); + print_live_intervals(b); + DBG("\n"); + } + + if (!a->Used || !b->Used) { + DBG(" unused interval\n"); + return 0; + } + + if (a->Start > b->Start) { + if (a->Start < b->End) { + DBG(" overlap\n"); + return 1; + } + } else if (b->Start > a->Start) { + if (b->Start < a->End) { + DBG(" overlap\n"); + return 1; + } + } else { /* a->Start == b->Start */ + if (a->Start != a->End && b->Start != b->End) { + DBG(" overlap\n"); + return 1; + } + } + + DBG(" no overlap\n"); + + return 0; +} + +static void scan_read_callback(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct regalloc_state * s = data; + struct register_info * reg; + unsigned int i; + + if (file != RC_FILE_INPUT) + return; + + s->Input[index].Used = 1; + reg = &s->Input[index]; + + for (i = 0; i < 4; i++) { + if (!((mask >> i) & 0x1)) { + continue; + } + reg->Live[i].Used = 1; + reg->Live[i].Start = 0; + reg->Live[i].End = + s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; + } +} + +static void remap_register(void * data, struct rc_instruction * inst, + rc_register_file * file, unsigned int * index) +{ + struct regalloc_state * s = data; + const struct register_info * reg; + + if (*file == RC_FILE_TEMPORARY && s->Simple) + reg = &s->Temporary[*index]; + else if (*file == RC_FILE_INPUT) + reg = &s->Input[*index]; + else + return; + + if (reg->Allocated) { + *index = reg->Index; + } +} + +static void alloc_input_simple(void * data, unsigned int input, + unsigned int hwreg) +{ + struct regalloc_state * s = data; + + if (input >= s->NumInputs) + return; + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; +} + +/* This functions offsets the temporary register indices by the number + * of input registers, because input registers are actually temporaries and + * should not occupy the same space. + * + * This pass is supposed to be used to maintain correct allocation of inputs + * if the standard register allocation is disabled. */ +static void do_regalloc_inputs_only(struct regalloc_state * s) +{ + for (unsigned i = 0; i < s->NumTemporaries; i++) { + s->Temporary[i].Allocated = 1; + s->Temporary[i].File = RC_FILE_TEMPORARY; + s->Temporary[i].Index = i + s->NumInputs; + } +} + +static unsigned int is_derivative(rc_opcode op) +{ + return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); +} + +static int find_class( + struct rc_class * classes, + unsigned int writemask, + unsigned int max_writemask_count) +{ + unsigned int i; + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + unsigned int j; + if (classes[i].WritemaskCount > max_writemask_count) { + continue; + } + for (j = 0; j < 3; j++) { + if (classes[i].Writemasks[j] == writemask) { + return i; + } + } + } + return -1; +} + +static enum rc_reg_class variable_get_class( + struct rc_variable * variable, + struct rc_class * classes) +{ + unsigned int i; + unsigned int can_change_writemask= 1; + unsigned int writemask = rc_variable_writemask_sum(variable); + struct rc_list * readers = rc_variable_readers_union(variable); + int class_index; + + if (!variable->C->is_r500) { + struct rc_class c; + /* The assumption here is that if an instruction has type + * RC_INSTRUCTION_NORMAL then it is a TEX instruction. + * r300 and r400 can't swizzle the result of a TEX lookup. */ + if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = RC_MASK_XYZW; + } + + /* Check if it is possible to do swizzle packing for r300/r400 + * without creating non-native swizzles. */ + class_index = find_class(classes, writemask, 3); + if (class_index < 0) { + goto error; + } + c = classes[class_index]; + for (i = 0; i < c.WritemaskCount; i++) { + int j; + unsigned int conversion_swizzle = + rc_make_conversion_swizzle( + writemask, c.Writemasks[i]); + for (j = 0; j < variable->ReaderCount; j++) { + unsigned int old_swizzle; + unsigned int new_swizzle; + struct rc_reader r = variable->Readers[j]; + if (r.Inst->Type == RC_INSTRUCTION_PAIR ) { + old_swizzle = r.U.P.Arg->Swizzle; + } else { + old_swizzle = r.U.I.Src->Swizzle; + } + new_swizzle = rc_adjust_channels( + old_swizzle, conversion_swizzle); + if (!r300_swizzle_is_native_basic(new_swizzle)) { + can_change_writemask = 0; + break; + } + } + if (!can_change_writemask) { + break; + } + } + } + + if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { + /* DDX/DDY seem to always fail when their writemasks are + * changed.*/ + if (is_derivative(variable->Inst->U.P.RGB.Opcode) + || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + } + } + for ( ; readers; readers = readers->Next) { + struct rc_reader * r = readers->Item; + if (r->Inst->Type == RC_INSTRUCTION_PAIR) { + if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { + can_change_writemask = 0; + break; + } + /* DDX/DDY also fail when their swizzles are changed. */ + if (is_derivative(r->Inst->U.P.RGB.Opcode) + || is_derivative(r->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + break; + } + } + } + + class_index = find_class(classes, writemask, + can_change_writemask ? 3 : 1); + if (class_index > -1) { + return classes[class_index].Class; + } else { +error: + rc_error(variable->C, + "Could not find class for index=%u mask=%u\n", + variable->Dst.Index, writemask); + return 0; + } +} + +static unsigned int overlap_live_intervals_array( + struct live_intervals * a, + struct live_intervals * b) +{ + unsigned int a_chan, b_chan; + for (a_chan = 0; a_chan < 4; a_chan++) { + for (b_chan = 0; b_chan < 4; b_chan++) { + if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { + return 1; + } + } + } + return 0; +} + +static unsigned int reg_get_index(int reg) +{ + return reg / RC_MASK_XYZW; +} + +static unsigned int reg_get_writemask(int reg) +{ + return (reg % RC_MASK_XYZW) + 1; +} + +static int get_reg_id(unsigned int index, unsigned int writemask) +{ + assert(writemask); + if (writemask == 0) { + return 0; + } + return (index * RC_MASK_XYZW) + (writemask - 1); +} + +#if VERBOSE +static void print_reg(int reg) +{ + unsigned int index = reg_get_index(reg); + unsigned int mask = reg_get_writemask(reg); + fprintf(stderr, "Temp[%u].%c%c%c%c", index, + mask & RC_MASK_X ? 'x' : '_', + mask & RC_MASK_Y ? 'y' : '_', + mask & RC_MASK_Z ? 'z' : '_', + mask & RC_MASK_W ? 'w' : '_'); +} +#endif + +static void add_register_conflicts( + struct ra_regs * regs, + unsigned int max_temp_regs) +{ + unsigned int index, a_mask, b_mask; + for (index = 0; index < max_temp_regs; index++) { + for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { + for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; + b_mask++) { + if (a_mask & b_mask) { + ra_add_reg_conflict(regs, + get_reg_id(index, a_mask), + get_reg_id(index, b_mask)); + } + } + } + } +} + +static void do_advanced_regalloc(struct regalloc_state * s) +{ + struct rc_class rc_class_list [] = { + {RC_REG_CLASS_SINGLE, 3, 0, 0, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z}}, + {RC_REG_CLASS_DOUBLE, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_Y | RC_MASK_Z}}, + {RC_REG_CLASS_TRIPLE, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ALPHA, 1, 0, 0, + {RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_X, 1, 0, 0, + {RC_MASK_X, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Y, 1, 0, 0, + {RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Z, 1, 0, 0, + {RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XY, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZ, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZ, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XW, 1, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ZW, 1, 0, 0, + {RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XYW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}} + }; + + unsigned int i, j, index, input_node, node_count, node_index; + unsigned int * node_classes; + unsigned int * input_classes; + struct rc_instruction * inst; + struct rc_list * var_ptr; + struct rc_list * variables; + struct ra_regs * regs; + struct ra_graph * graph; + + /* Allocate the main ra data structure */ + regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW); + + /* Get list of program variables */ + variables = rc_get_variables(s->C); + node_count = rc_list_count(variables); + node_classes = memory_pool_malloc(&s->C->Pool, + node_count * sizeof(unsigned int)); + input_classes = memory_pool_malloc(&s->C->Pool, + s->NumInputs * sizeof(unsigned int)); + + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + unsigned int class_index; + /* Compute the live intervals */ + rc_variable_compute_live_intervals(var_ptr->Item); + + class_index = variable_get_class(var_ptr->Item, rc_class_list); + + /* If we haven't used this register class yet, mark it + * as used and allocate space for it. */ + if (!rc_class_list[class_index].Used) { + rc_class_list[class_index].Used = 1; + rc_class_list[class_index].Id = ra_alloc_reg_class(regs); + } + + node_classes[node_index] = rc_class_list[class_index].Id; + } + + + /* Assign registers to the classes */ + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + struct rc_class class = rc_class_list[i]; + if (!class.Used) { + continue; + } + + for (index = 0; index < s->C->max_temp_regs; index++) { + for (j = 0; j < class.WritemaskCount; j++) { + int reg_id = get_reg_id(index, + class.Writemasks[j]); + ra_class_add_reg(regs, class.Id, reg_id); + } + } + } + + /* Add register conflicts */ + add_register_conflicts(regs, s->C->max_temp_regs); + + /* Calculate live intervals for input registers */ + for (inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > s->LoopEnd) { + s->LoopEnd = endloop->IP; + } + } + rc_for_all_reads_mask(inst, scan_read_callback, s); + } + + /* Create classes for input registers */ + for (i = 0; i < s->NumInputs; i++) { + unsigned int chan, class_id, writemask = 0; + for (chan = 0; chan < 4; chan++) { + if (s->Input[i].Live[chan].Used) { + writemask |= (1 << chan); + } + } + s->Input[i].Writemask = writemask; + if (!writemask) { + continue; + } + + class_id = ra_alloc_reg_class(regs); + input_classes[i] = class_id; + ra_class_add_reg(regs, class_id, + get_reg_id(s->Input[i].Index, writemask)); + } + + ra_set_finalize(regs); + + graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); + + /* Build the interference graph */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next,node_index++) { + struct rc_list * a, * b; + unsigned int b_index; + + ra_set_node_class(graph, node_index, node_classes[node_index]); + + for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; + b; b = b->Next, b_index++) { + struct rc_variable * var_a = a->Item; + while (var_a) { + struct rc_variable * var_b = b->Item; + while (var_b) { + if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { + ra_add_node_interference(graph, + node_index, b_index); + } + var_b = var_b->Friend; + } + var_a = var_a->Friend; + } + } + } + + /* Add input registers to the interference graph */ + for (i = 0, input_node = 0; i< s->NumInputs; i++) { + if (!s->Input[i].Writemask) { + continue; + } + ra_set_node_class(graph, node_count + input_node, + input_classes[i]); + for (var_ptr = variables, node_index = 0; + var_ptr; var_ptr = var_ptr->Next, node_index++) { + struct rc_variable * var = var_ptr->Item; + if (overlap_live_intervals_array(s->Input[i].Live, + var->Live)) { + ra_add_node_interference(graph, node_index, + node_count + input_node); + } + } + /* Manually allocate a register for this input */ + ra_set_node_reg(graph, node_count + input_node, get_reg_id( + s->Input[i].Index, s->Input[i].Writemask)); + input_node++; + } + + if (!ra_allocate_no_spills(graph)) { + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + } + + /* Rewrite the registers */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + int reg = ra_get_node_reg(graph, node_index); + unsigned int writemask = reg_get_writemask(reg); + unsigned int index = reg_get_index(reg); + struct rc_variable * var = var_ptr->Item; + + if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = rc_variable_writemask_sum(var); + } + + if (var->Dst.File == RC_FILE_INPUT) { + continue; + } + rc_variable_change_dst(var, index, writemask); + } + + ralloc_free(graph); + ralloc_free(regs); +} + +/** + * @param user This parameter should be a pointer to an integer value. If this + * integer value is zero, then a simple register allocator will be used that + * only allocates space for input registers (\sa do_regalloc_inputs_only). If + * user is non-zero, then the regular register allocator will be used + * (\sa do_regalloc). + */ +void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = + (struct r300_fragment_program_compiler*)cc; + struct regalloc_state s; + int * do_full_regalloc = (int*)user; + + memset(&s, 0, sizeof(s)); + s.C = cc; + s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; + s.Input = memory_pool_malloc(&cc->Pool, + s.NumInputs * sizeof(struct register_info)); + memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); + + s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; + s.Temporary = memory_pool_malloc(&cc->Pool, + s.NumTemporaries * sizeof(struct register_info)); + memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); + + rc_recompute_ips(s.C); + + c->AllocateHwInputs(c, &alloc_input_simple, &s); + if (*do_full_regalloc) { + do_advanced_regalloc(&s); + } else { + s.Simple = 1; + do_regalloc_inputs_only(&s); + } + + /* Rewrite inputs and if we are doing the simple allocation, rewrite + * temporaries too. */ + for (struct rc_instruction *inst = s.C->Program.Instructions.Next; + inst != &s.C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_register, &s); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c new file mode 100644 index 00000000000..25cd52c9cd4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c @@ -0,0 +1,1010 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct schedule_instruction { + struct rc_instruction * Instruction; + + /** Next instruction in the linked list of ready instructions. */ + struct schedule_instruction *NextReady; + + /** Values that this instruction reads and writes */ + struct reg_value * WriteValues[4]; + struct reg_value * ReadValues[12]; + unsigned int NumWriteValues:3; + unsigned int NumReadValues:4; + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + unsigned int NumDependencies:5; + + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + struct schedule_instruction *Reader; + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * RC_FILE_TEMPORARY. + */ +struct reg_value { + struct schedule_instruction * Writer; + + /** + * Unordered linked list of instructions that read from this value. + * When this value becomes available, we increase all readers' + * dependency count. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is decremented each time + * a reader of the value is committed. + * When the reader cound reaches zero, the dependency count + * of the instruction writing \ref Next is decremented. + */ + unsigned int NumReaders; + + struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ +}; + +struct register_state { + struct reg_value * Values[4]; +}; + +struct remap_reg { + struct rc_instruciont * Inst; + unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int OldSwizzle:3; + unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int NewSwizzle:3; + unsigned int OnlyTexReads:1; + struct remap_reg * Next; +}; + +struct schedule_state { + struct radeon_compiler * C; + struct schedule_instruction * Current; + + struct register_state Temporary[RC_REGISTER_MAX_INDEX]; + + /** + * Linked lists of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + /*@{*/ + struct schedule_instruction *ReadyFullALU; + struct schedule_instruction *ReadyRGB; + struct schedule_instruction *ReadyAlpha; + struct schedule_instruction *ReadyTEX; + /*@}*/ +}; + +static struct reg_value ** get_reg_valuep(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + if (file != RC_FILE_TEMPORARY) + return 0; + + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->Temporary[index].Values[chan]; +} + +static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) +{ + inst->NextReady = *list; + *list = inst; +} + +static void add_inst_to_list_end(struct schedule_instruction ** list, + struct schedule_instruction * inst) +{ + if(!*list){ + *list = inst; + }else{ + struct schedule_instruction * temp = *list; + while(temp->NextReady){ + temp = temp->NextReady; + } + temp->NextReady = inst; + } +} + +static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i is now ready\n", sinst->Instruction->IP); + + /* Adding Ready TEX instructions to the end of the "Ready List" helps + * us emit TEX instructions in blocks without losing our place. */ + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) + add_inst_to_list_end(&s->ReadyTEX, sinst); + else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyRGB, sinst); + else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyAlpha, sinst); + else + add_inst_to_list(&s->ReadyFullALU, sinst); +} + +static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) +{ + assert(sinst->NumDependencies > 0); + sinst->NumDependencies--; + if (!sinst->NumDependencies) + instruction_ready(s, sinst); +} + +/** + * This function decreases the dependencies of the next instruction that + * wants to write to each of sinst's read values. + */ +static void commit_update_reads(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumReadValues; ++i) { + struct reg_value * v = sinst->ReadValues[i]; + assert(v->NumReaders > 0); + v->NumReaders--; + if (!v->NumReaders) { + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +static void commit_update_writes(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumWriteValues; ++i) { + struct reg_value * v = sinst->WriteValues[i]; + if (v->NumReaders) { + for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { + decrease_dependencies(s, r->Reader); + } + } else { + /* This happens in instruction sequences of the type + * OP r.x, ...; + * OP r.x, r.x, ...; + * See also the subtlety in how instructions that both + * read and write the same register are scanned. + */ + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i: commit\n", sinst->Instruction->IP); + + commit_update_reads(s, sinst); + + commit_update_writes(s, sinst); +} + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + */ +static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) +{ + struct schedule_instruction *readytex; + struct rc_instruction * inst_begin; + + assert(s->ReadyTEX); + + /* Node marker for R300 */ + inst_begin = rc_insert_new_instruction(s->C, before->Prev); + inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; + + /* Link texture instructions back in */ + readytex = s->ReadyTEX; + while(readytex) { + rc_insert_instruction(before->Prev, readytex->Instruction); + DBG("%i: commit TEX reads\n", readytex->Instruction->IP); + + /* All of the TEX instructions in the same TEX block have + * their source registers read from before any of the + * instructions in that block write to their destination + * registers. This means that when we commit a TEX + * instruction, any other TEX instruction that wants to write + * to one of the committed instruction's source register can be + * marked as ready and should be emitted in the same TEX + * block. This prevents the following sequence from being + * emitted in two different TEX blocks: + * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; + * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; + */ + commit_update_reads(s, readytex); + readytex = readytex->NextReady; + } + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + while(readytex){ + DBG("%i: commit TEX writes\n", readytex->Instruction->IP); + commit_update_writes(s, readytex); + readytex = readytex->NextReady; + } +} + +/* This is a helper function for destructive_merge_instructions(). It helps + * merge presubtract sources from two instructions and makes sure the + * presubtract sources end up in the correct spot. This function assumes that + * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb) + * but no scalar instruction (alpha). + * @return 0 if merging the presubtract sources fails. + * @retrun 1 if merging the presubtract sources succeeds. + */ +static int merge_presub_sources( + struct rc_pair_instruction * dst_full, + struct rc_pair_sub_instruction src, + unsigned int type) +{ + unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; + struct rc_pair_sub_instruction * dst_sub; + const struct rc_opcode_info * info; + + assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); + + switch(type) { + case RC_SOURCE_RGB: + is_rgb = 1; + is_alpha = 0; + dst_sub = &dst_full->RGB; + break; + case RC_SOURCE_ALPHA: + is_rgb = 0; + is_alpha = 1; + dst_sub = &dst_full->Alpha; + break; + default: + assert(0); + return 0; + } + + info = rc_get_opcode_info(dst_full->RGB.Opcode); + + if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) + return 0; + + srcp_regs = rc_presubtract_src_reg_count( + src.Src[RC_PAIR_PRESUB_SRC].Index); + for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { + unsigned int arg; + int free_source; + unsigned int one_way = 0; + struct rc_pair_instruction_source srcp = src.Src[srcp_src]; + struct rc_pair_instruction_source temp; + + free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, + srcp.File, srcp.Index); + + /* If free_source < 0 then there are no free source + * slots. */ + if (free_source < 0) + return 0; + + temp = dst_sub->Src[srcp_src]; + dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; + + /* srcp needs src0 and src1 to be the same */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(dst_full, is_rgb, + is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; + one_way = 1; + } else { + dst_sub->Src[free_source] = temp; + } + + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; + + /* Shuffle the sources, so we can put the + * presubtract source in the correct place. */ + for(arg = 0; arg < info->NumSrcRegs; arg++) { + /*If this arg does not read from an rgb source, + * do nothing. */ + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) + & type)) { + continue; + } + + if (dst_full->RGB.Arg[arg].Source == srcp_src) + dst_full->RGB.Arg[arg].Source = free_source; + /* We need to do this just in case register + * is one of the sources already, but in the + * wrong spot. */ + else if(dst_full->RGB.Arg[arg].Source == free_source + && !one_way) { + dst_full->RGB.Arg[arg].Source = srcp_src; + } + } + } + return 1; +} + + +/* This function assumes that rgb.Alpha and alpha.RGB are unused */ +static int destructive_merge_instructions( + struct rc_pair_instruction * rgb, + struct rc_pair_instruction * alpha) +{ + const struct rc_opcode_info * opcode; + + assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); + assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + + /* Presubtract registers need to be merged first so that registers + * needed by the presubtract operation can be placed in src0 and/or + * src1. */ + + /* Merge the rgb presubtract registers. */ + if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { + return 0; + } + } + /* Merge the alpha presubtract registers */ + if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ + return 0; + } + } + + /* Copy alpha args into rgb */ + opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; + rc_register_file file = 0; + unsigned int index = 0; + int source; + + if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { + srcrgb = 1; + file = alpha->RGB.Src[oldsrc].File; + index = alpha->RGB.Src[oldsrc].Index; + } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { + srcalpha = 1; + file = alpha->Alpha.Src[oldsrc].File; + index = alpha->Alpha.Src[oldsrc].Index; + } + + source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); + if (source < 0) + return 0; + + rgb->Alpha.Arg[arg].Source = source; + rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; + rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; + rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; + } + + /* Copy alpha opcode into rgb */ + rgb->Alpha.Opcode = alpha->Alpha.Opcode; + rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; + rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; + rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; + rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; + rgb->Alpha.Saturate = alpha->Alpha.Saturate; + + /* Merge ALU result writing */ + if (alpha->WriteALUResult) { + if (rgb->WriteALUResult) + return 0; + + rgb->WriteALUResult = alpha->WriteALUResult; + rgb->ALUResultCompare = alpha->ALUResultCompare; + } + + return 1; +} + +/** + * Try to merge the given instructions into the rgb instructions. + * + * Return true on success; on failure, return false, and keep + * the instructions untouched. + */ +static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) +{ + struct rc_pair_instruction backup; + + /*Instructions can't write output registers and ALU result at the + * same time. */ + if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) + || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { + return 0; + } + memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); + + if (destructive_merge_instructions(rgb, alpha)) + return 1; + + memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); + return 0; +} + +static void presub_nop(struct rc_instruction * emitted) { + int prev_rgb_index, prev_alpha_index, i, num_src; + + /* We don't need a nop if the previous instruction is a TEX. */ + if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { + return; + } + if (emitted->Prev->U.P.RGB.WriteMask) + prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; + else + prev_rgb_index = -1; + if (emitted->Prev->U.P.Alpha.WriteMask) + prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; + else + prev_alpha_index = 1; + + /* Check the previous rgb instruction */ + if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + num_src = rc_presubtract_src_reg_count( + emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.RGB.Src[i].Index; + if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index + || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } + } + + /* Check the previous alpha instruction. */ + if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + return; + + num_src = rc_presubtract_src_reg_count( + emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.Alpha.Src[i].Index; + if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } +} + +static void rgb_to_alpha_remap ( + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + rc_register_file old_file, + rc_swizzle old_swz, + unsigned int new_index) +{ + int new_src_index; + unsigned int i; + + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, + old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + assert(0); + return; + } + + arg->Source = new_src_index; +} + +static int can_remap(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static int can_convert_opcode_to_alpha(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DPH: + return 0; + default: + return 1; + } +} + +static void is_rgb_to_alpha_possible( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int chan_count = 0; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data * reader_data = userdata; + + if (!can_remap(inst->U.P.RGB.Opcode) + || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } + + if (!src) + return; + + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } + + /* Make sure the source only reads from one component. + * XXX We should allow the source to read from the same component twice. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch(swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + chan_count++; + break; + default: + break; + } + } + if (chan_count > 1) { + reader_data->Abort = 1; + return; + } + + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the subsitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } +} + +static int convert_rgb_to_alpha( + struct schedule_state * s, + struct schedule_instruction * sched_inst) +{ + struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info * info = + rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; + + if (sched_inst->GlobalReaders.Abort) + return 0; + + if (!pair_inst->RGB.WriteMask) + return 0; + + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) + || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } + + assert(sched_inst->NumWriteValues == 1); + + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } + + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value ** new_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value ** old_regvalp = + get_reg_valuep(s, + RC_FILE_TEMPORARY, + pair_inst->RGB.DestIndex, + rc_mask_to_swizzle(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + *old_regvalp = NULL; + new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + break; + } + } + if (new_index < 0) { + return 0; + } + + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = RC_MASK_W; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = + rc_init_swizzle(swz, 1); + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + + for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg, + RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; +} + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) +{ + struct schedule_instruction * sinst; + + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_alu_instruction(s, sinst); + } else { + struct schedule_instruction **prgb; + struct schedule_instruction **palpha; + struct schedule_instruction *prev; +pair: + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + struct schedule_instruction * psirgb = *prgb; + struct schedule_instruction * psialpha = *palpha; + + if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) + continue; + + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + rc_insert_instruction(before->Prev, psirgb->Instruction); + commit_alu_instruction(s, psirgb); + commit_alu_instruction(s, psialpha); + goto success; + } + } + prev = NULL; + /* No success in pairing, now try to convert one of the RGB + * instructions to an Alpha so we can pair it with another RGB. + */ + if (s->ReadyRGB && s->ReadyRGB->NextReady) { + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + if ((*prgb)->NumWriteValues == 1) { + struct schedule_instruction * prgb_next; + if (!convert_rgb_to_alpha(s, *prgb)) + goto cont_loop; + prgb_next = (*prgb)->NextReady; + /* Add instruction to the Alpha ready list. */ + (*prgb)->NextReady = s->ReadyAlpha; + s->ReadyAlpha = *prgb; + /* Remove instruction from the RGB ready list.*/ + if (prev) + prev->NextReady = prgb_next; + else + s->ReadyRGB = prgb_next; + goto pair; + } +cont_loop: + prev = *prgb; + } + } + /* Still no success in pairing, just take the first RGB + * or alpha instruction. */ + if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else if (s->ReadyAlpha) { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } else { + /*XXX Something real bad has happened. */ + assert(0); + } + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_alu_instruction(s, sinst); + success: ; + } + /* If the instruction we just emitted uses a presubtract value, and + * the presubtract sources were written by the previous intstruction, + * the previous instruction needs a nop. */ + presub_nop(before->Prev); +} + +static void scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** v = get_reg_valuep(s, file, index, chan); + struct reg_value_reader * reader; + + if (!v) + return; + + if (*v && (*v)->Writer == s->Current) { + /* The instruction reads and writes to a register component. + * In this case, we only want to increment dependencies by one. + */ + return; + } + + DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); + reader->Reader = s->Current; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; + + if (s->Current->NumReadValues >= 12) { + rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); + } else { + s->Current->ReadValues[s->Current->NumReadValues++] = *v; + } +} + +static void scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + struct reg_value * newv; + + if (!pv) + return; + + DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); + memset(newv, 0, sizeof(*newv)); + + newv->Writer = s->Current; + + if (*pv) { + (*pv)->Next = newv; + s->Current->NumDependencies++; + } + + *pv = newv; + + if (s->Current->NumWriteValues >= 4) { + rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); + } else { + s->Current->WriteValues[s->Current->NumWriteValues++] = newv; + } +} + +static void is_rgb_to_alpha_possible_normal( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = userdata; + reader_data->Abort = 1; + +} + +static void schedule_block(struct r300_fragment_program_compiler * c, + struct rc_instruction * begin, struct rc_instruction * end) +{ + struct schedule_state s; + unsigned int ip; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + + /* Scan instructions for data dependencies */ + ip = 0; + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); + memset(s.Current, 0, sizeof(struct schedule_instruction)); + + s.Current->Instruction = inst; + inst->IP = ip++; + + DBG("%i: Scanning\n", inst->IP); + + /* The order of things here is subtle and maybe slightly + * counter-intuitive, to account for the case where an + * instruction writes to the same register as it reads + * from. */ + rc_for_all_writes_chan(inst, &scan_write, &s); + rc_for_all_reads_chan(inst, &scan_read, &s); + + DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + + if (!s.Current->NumDependencies) + instruction_ready(&s, s.Current); + + /* Get global readers for possible RGB->Alpha conversion. */ + s.Current->GlobalReaders.ExitOnAbort = 1; + rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); + } + + /* Temporarily unlink all instructions */ + begin->Prev->Next = end; + end->Prev = begin->Prev; + + /* Schedule instructions back */ + while(!s.C->Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s, end); + + while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) + emit_one_alu(&s, end); + } +} + +static int is_controlflow(struct rc_instruction * inst) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + return opcode->IsFlowControl; + } + return 0; +} + +void rc_pair_schedule(struct radeon_compiler *cc, void *user) +{ + struct schedule_state s; + + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct rc_instruction * inst = c->Base.Program.Instructions.Next; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + while(inst != &c->Base.Program.Instructions) { + struct rc_instruction * first; + + if (is_controlflow(inst)) { + inst = inst->Next; + continue; + } + + first = inst; + + while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) + inst = inst->Next; + + DBG("Schedule one block\n"); + schedule_block(c, first, inst); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c new file mode 100644 index 00000000000..2dae56a2428 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct rc_sub_instruction *inst) +{ + struct rc_src_register tmp; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[1].Negate = RC_MASK_NONE; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case RC_OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_MUL: + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct rc_sub_instruction * inst, + int * needrgb, int * needalpha, int * istranscendent) +{ + *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; + *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; + *istranscendent = 0; + + if (inst->WriteALUResult == RC_ALURESULT_X) + *needrgb = 1; + else if (inst->WriteALUResult == RC_ALURESULT_W) + *needalpha = 1; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + case RC_OPCODE_CMP: + case RC_OPCODE_CND: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MAD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MOV: + case RC_OPCODE_MUL: + break; + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: + *istranscendent = 1; + *needalpha = 1; + break; + case RC_OPCODE_DP4: + *needalpha = 1; + /* fall through */ + case RC_OPCODE_DP3: + *needrgb = 1; + break; + default: + break; + } +} + +static void src_uses(struct rc_src_register src, unsigned int * rgb, + unsigned int * alpha) +{ + int j; + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(src.Swizzle, j); + if (swz < 3) + *rgb = 1; + else if (swz < 4) + *alpha = 1; + } +} + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static void set_pair_instruction(struct r300_fragment_program_compiler *c, + struct rc_pair_instruction * pair, + struct rc_sub_instruction * inst) +{ + int needrgb, needalpha, istranscendent; + const struct rc_opcode_info * opcode; + int i; + + memset(pair, 0, sizeof(struct rc_pair_instruction)); + + classify_instruction(inst, &needrgb, &needalpha, &istranscendent); + + if (needrgb) { + if (istranscendent) + pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (needalpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + opcode = rc_get_opcode_info(inst->Opcode); + + /* Presubtract handling: + * We need to make sure that the values used by the presubtract + * operation end up in src0 or src1. */ + if(inst->PreSub.Opcode != RC_PRESUB_NONE) { + /* rc_pair_alloc_source() will fill in data for + * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ + int j; + for(j = 0; j < 3; j++) { + int src_regs; + if(inst->SrcReg[j].File != RC_FILE_PRESUB) + continue; + + src_regs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + for(i = 0; i < src_regs; i++) { + unsigned int rgb = 0; + unsigned int alpha = 0; + src_uses(inst->SrcReg[j], &rgb, &alpha); + if(rgb) { + pair->RGB.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->RGB.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->RGB.Src[i].Used = 1; + } + if(alpha) { + pair->Alpha.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->Alpha.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->Alpha.Src[i].Used = 1; + } + } + } + } + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + int source; + if (needrgb && !istranscendent) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int srcmask = 0; + int j; + /* We don't care about the alpha channel here. We only + * want the part of the swizzle that writes to rgb, + * since we are creating an rgb instruction. */ + for(j = 0; j < 3; ++j) { + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + + if (swz < RC_SWIZZLE_W) + srcrgb = 1; + else if (swz == RC_SWIZZLE_W) + srcalpha = 1; + + if (swz < RC_SWIZZLE_UNUSED) + srcmask |= 1 << j; + } + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "rgb instruction.\n"); + return; + } + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = + rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + } + if (needalpha) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "alpha instruction.\n"); + return; + } + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); + } + } + + /* Destination handling */ + if (inst->DstReg.File == RC_FILE_OUTPUT) { + if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else { + for (i = 0; i < 4; i++) { + if (inst->DstReg.Index == c->OutputColor[i]) { + pair->RGB.Target = i; + pair->Alpha.Target = i; + pair->RGB.OutputWriteMask |= + inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= + GET_BIT(inst->DstReg.WriteMask, 3); + break; + } + } + } + } else { + if (needrgb) { + pair->RGB.DestIndex = inst->DstReg.Index; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + } + + if (needalpha) { + pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); + if (pair->Alpha.WriteMask) { + pair->Alpha.DestIndex = inst->DstReg.Index; + } + } + } + + if (inst->WriteALUResult) { + pair->WriteALUResult = inst->WriteALUResult; + pair->ALUResultCompare = inst->ALUResultCompare; + } +} + + +static void check_opcode_support(struct r300_fragment_program_compiler *c, + struct rc_sub_instruction *inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { + rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); + return; + } + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->SrcReg[i].RelAddr) { + rc_error(&c->Base, "Fragment program does not support relative addressing " + " of source operands.\n"); + return; + } + } +} + + +/** + * Translate all ALU instructions into corresponding pair instructions, + * performing no other changes. + */ +void rc_pair_translate(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + + for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; + inst != &c->Base.Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode; + struct rc_sub_instruction copy; + + if (inst->Type != RC_INSTRUCTION_NORMAL) + continue; + + opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) + continue; + + copy = inst->U.I; + + check_opcode_support(c, ©); + + final_rewrite(©); + inst->Type = RC_INSTRUCTION_PAIR; + set_pair_instruction(c, &inst->U.P, ©); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program.c b/src/gallium/drivers/r300/compiler/radeon_program.c new file mode 100644 index 00000000000..fe5756ebc45 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program.c @@ -0,0 +1,225 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void rc_local_transform( + struct radeon_compiler * c, + void *user) +{ + struct radeon_program_transformation *transformations = + (struct radeon_program_transformation*)user; + struct rc_instruction * inst = c->Program.Instructions.Next; + + while(inst != &c->Program.Instructions) { + struct rc_instruction * current = inst; + int i; + + inst = inst->Next; + + for(i = 0; transformations[i].function; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(c, current, t->userData)) + break; + } + } +} + +struct get_used_temporaries_data { + unsigned char * Used; + unsigned int UsedLength; +}; + +static void get_used_temporaries_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_used_temporaries_data * d = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= d->UsedLength) + return; + + d->Used[index] |= mask; +} + +/** + * This function fills in the parameter 'used' with a writemask that + * represent which components of each temporary register are used by the + * program. This is meant to be combined with rc_find_free_temporary_list as a + * more efficient version of rc_find_free_temporary. + * @param used The function does not initialize this parameter. + */ +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length) +{ + struct rc_instruction * inst; + struct get_used_temporaries_data d; + d.Used = used; + d.UsedLength = used_length; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); + rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); + } +} + +/* Search a list of used temporaries for a free one + * \sa rc_get_used_temporaries + * @note If this functions finds a free temporary, it will mark it as used + * in the used temporary list (param 'used') + * @param used list of used temporaries + * @param used_length number of items in param 'used' + * @param mask which components must be free in the temporary index that is + * returned. + * @return -1 If there are no more free temporaries, otherwise the index of + * a temporary register where the components specified in param 'mask' are + * not being used. + */ +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask) +{ + int i; + for(i = 0; i < used_length; i++) { + if ((~used[i] & mask) == mask) { + used[i] |= mask; + return i; + } + } + return -1; +} + +unsigned int rc_find_free_temporary(struct radeon_compiler * c) +{ + unsigned char used[RC_REGISTER_MAX_INDEX]; + int free; + + memset(used, 0, sizeof(used)); + + rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); + + free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, + RC_MASK_XYZW); + if (free < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return 0; + } + return free; +} + + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +{ + struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + + memset(inst, 0, sizeof(struct rc_instruction)); + + inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; + + return inst; +} + +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) +{ + inst->Prev = after; + inst->Next = after->Next; + + inst->Prev->Next = inst; + inst->Next->Prev = inst; +} + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + rc_insert_instruction(after, inst); + + return inst; +} + +void rc_remove_instruction(struct rc_instruction * inst) +{ + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; +} + +/** + * Return the number of instructions in the program. + */ +unsigned int rc_recompute_ips(struct radeon_compiler * c) +{ + unsigned int ip = 0; + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + inst->IP = ip++; + } + + c->Program.Instructions.IP = 0xcafedead; + + return ip; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h new file mode 100644 index 00000000000..b899eccbf53 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program.h @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include <stdint.h> +#include <string.h> + +#include "radeon_opcodes.h" +#include "radeon_code.h" +#include "radeon_program_constants.h" +#include "radeon_program_pair.h" + +struct radeon_compiler; + +struct rc_src_register { + unsigned int File:4; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int Swizzle:12; + + /** Take the component-wise absolute value */ + unsigned int Abs:1; + + /** Post-Abs negation. */ + unsigned int Negate:4; +}; + +struct rc_dst_register { + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:4; +}; + +struct rc_presub_instruction { + rc_presubtract_op Opcode; + struct rc_src_register SrcReg[2]; +}; + +/** + * Instructions are maintained by the compiler in a doubly linked list + * of these structures. + * + * This instruction format is intended to be expanded for hardware-specific + * trickery. At different stages of compilation, a different set of + * instruction types may be valid. + */ +struct rc_sub_instruction { + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; + + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + unsigned int Opcode:8; + + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + unsigned int SaturateMode:2; + + /** + * Writing to the special register RC_SPECIAL_ALU_RESULT + */ + /*@{*/ + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; + /*@}*/ + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit:5; + + /** Source texture target, one of the \ref rc_texture_target enums */ + unsigned int TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow:1; + + /**R500 Only. How to swizzle the result of a TEX lookup*/ + unsigned int TexSwizzle:12; + /*@}*/ + + /** This holds information about the presubtract operation used by + * this instruction. */ + struct rc_presub_instruction PreSub; +}; + +typedef enum { + RC_INSTRUCTION_NORMAL = 0, + RC_INSTRUCTION_PAIR +} rc_instruction_type; + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + + rc_instruction_type Type; + union { + struct rc_sub_instruction I; + struct rc_pair_instruction P; + } U; + + /** + * Warning: IPs are not stable. If you want to use them, + * you need to recompute them at the beginning of each pass + * using \ref rc_recompute_ips + */ + unsigned int IP; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; +}; + +/** + * A transformation that can be passed to \ref rc_local_transform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return true, or return false if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + int (*function)( + struct radeon_compiler*, + struct rc_instruction*, + void*); + void *userData; +}; + +void rc_local_transform( + struct radeon_compiler *c, + void *user); + +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length); + +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask); + +unsigned int rc_find_free_temporary(struct radeon_compiler * c); + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); +void rc_remove_instruction(struct rc_instruction * inst); + +unsigned int rc_recompute_ips(struct radeon_compiler * c); + +void rc_print_program(const struct rc_program *prog); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); +#endif diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c new file mode 100644 index 00000000000..e273bc40c26 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -0,0 +1,1154 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" + + +static struct rc_instruction *emit1( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg; + return fpi; +} + +static struct rc_instruction *emit2( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + return fpi; +} + +static struct rc_instruction *emit3( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, + struct rc_src_register SrcReg2) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + fpi->U.I.SrcReg[2] = SrcReg2; + return fpi; +} + +static struct rc_dst_register dstregtmpmask(int index, int mask) +{ + struct rc_dst_register dst = {0, 0, 0}; + dst.File = RC_FILE_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + return dst; +} + +static const struct rc_src_register builtin_zero = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_0000 +}; +static const struct rc_src_register builtin_one = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_1111 +}; +static const struct rc_src_register srcreg_undefined = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_XYZW +}; + +static struct rc_src_register srcreg(int file, int index) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct rc_src_register srcregswz(int file, int index, int swz) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct rc_src_register absolute(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Abs = 1; + newreg.Negate = RC_MASK_NONE; + return newreg; +} + +static struct rc_src_register negate(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; + return newreg; +} + +static struct rc_src_register swizzle(struct rc_src_register reg, + rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) +{ + struct rc_src_register swizzled = reg; + swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); + return swizzled; +} + +static struct rc_src_register swizzle_smear(struct rc_src_register reg, + rc_swizzle x) +{ + return swizzle(reg, x, x, x, x); +} + +static struct rc_src_register swizzle_xxxx(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_X); +} + +static struct rc_src_register swizzle_yyyy(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Y); +} + +static struct rc_src_register swizzle_zzzz(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Z); +} + +static struct rc_src_register swizzle_wwww(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_W); +} + +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + +static void transform_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src = inst->U.I.SrcReg[0]; + src.Abs = 1; + src.Negate = RC_MASK_NONE; + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); + rc_remove_instruction(inst); +} + +static void transform_CEIL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Assuming: + * ceil(x) = -floor(-x) + * + * After inlining floor: + * ceil(x) = -(-x-frac(-x)) + * + * After simplification: + * ceil(x) = x+frac(-x) + */ + + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); +} + +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + +static void transform_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src0.Swizzle &= ~(63 << (3 * 2)); + src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src1.Swizzle &= ~(63 << (3 * 2)); + src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_DPH(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); + rc_remove_instruction(inst); +} + +static void transform_FLR(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); + rc_remove_instruction(inst); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + unsigned int constant; + unsigned int constant_swizzle; + unsigned int temp; + struct rc_src_register srctemp; + + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); + + if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { + struct rc_instruction * inst_mov; + + inst_mov = emit1(c, inst, + RC_OPCODE_MOV, 0, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); + + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + temp = inst->U.I.DstReg.Index; + srctemp = srcreg(RC_FILE_TEMPORARY, temp); + + /* tmp.x = max(0.0, Src.x); */ + /* tmp.y = max(0.0, Src.y); */ + /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(temp, RC_MASK_XYW), + inst->U.I.SrcReg[0], + swizzle(srcreg(RC_FILE_CONSTANT, constant), + RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, + dstregtmpmask(temp, RC_MASK_Z), + swizzle_wwww(srctemp), + negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); + + /* tmp.w = Pow(tmp.y, tmp.w) */ + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_yyyy(srctemp)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp), + swizzle_zzzz(srctemp)); + emit1(c, inst->Prev, RC_OPCODE_EX2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp)); + + /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_Z), + negate(swizzle_xxxx(srctemp)), + swizzle_wwww(srctemp), + builtin_zero); + + /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_XYW), + swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); + + rc_remove_instruction(inst); +} + +static void transform_LRP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + dst, + inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); + + rc_remove_instruction(inst); +} + +static void transform_POW(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); + tempdst.WriteMask = RC_MASK_W; + tempsrc.Swizzle = RC_SWIZZLE_WWWW; + + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); + emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); + + rc_remove_instruction(inst); +} + +static void transform_RSQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); +} + +static void transform_SEQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SFL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); + rc_remove_instruction(inst); +} + +static void transform_SGE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SLT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SNE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * CMP tmp0, -x, 1, 0 + * CMP tmp1, x, 1, 0 + * ADD result, tmp0, -tmp1; + */ + struct rc_dst_register dst0; + unsigned tmp1; + + /* 0 < x */ + dst0 = try_to_reuse_dst(c, inst); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dst0, + negate(inst->U.I.SrcReg[0]), + builtin_one, + builtin_zero); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_one, + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst0.Index), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + +static void transform_SUB(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_ADD; + inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); +} + +static void transform_SWZ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_MOV; +} + +static void transform_XPD(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); + + rc_remove_instruction(inst); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; + case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_DST: transform_DST(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_POW: transform_POW(c, inst); return 1; + case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; + case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; + case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; + case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; + case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + + +static void transform_r300_vertex_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Note: r500 can take absolute values, but r300 cannot. */ + inst->U.I.Opcode = RC_OPCODE_MAX; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_CMP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* There is no decent CMP available, so let's rig one up. + * CMP is defined as dst = src0 < 0.0 ? src1 : src2 + * The following sequence consumes zero to two temps and two extra slots + * (the second temp and the second slot is consumed by transform_LRP), + * but should be equivalent: + * + * SLT tmp0, src0, 0.0 + * LRP dst, tmp0, src1, src2 + * + * Yes, I know, I'm a mad scientist. ~ C. & M. */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + /* SLT tmp0, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dst, + inst->U.I.SrcReg[0], builtin_zero); + + /* LRP dst, tmp0, src1, src2 */ + transform_LRP(c, + emit3(c, inst->Prev, RC_OPCODE_LRP, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_instruction *next_inst = inst->Next; + transform_DP2(c, inst); + next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; +} + +static void transform_r300_vertex_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + unsigned constant_swizzle; + int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, + 0.0000000000000000001, + &constant_swizzle); + + /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; + emit1(c, inst->Prev, RC_OPCODE_MOV, 0, + dst, + inst->U.I.SrcReg[0]); + + /* MAX dst.y, src, 0.00...001 */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); +} + +static void transform_r300_vertex_SEQ(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x = y <==> x >= y && y >= x */ + int tmp = rc_find_free_temporary(c); + + /* x <= y */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y <= x */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x && y = x * y */ + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SNE(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x != y <==> x < y || y < x */ + int tmp = rc_find_free_temporary(c); + + /* x < y */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y < x */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x || y = max(x, y) */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x > y <==> -x < -y */ + inst->U.I.Opcode = RC_OPCODE_SLT; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x <= y <==> -x >= -y */ + inst->U.I.Opcode = RC_OPCODE_SGE; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * SLT tmp0, 0, x; + * SLT tmp1, x, 0; + * ADD result, tmp0, -tmp1; + */ + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; + + /* 0 < x */ + dst0 = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dst0, + builtin_zero, + inst->U.I.SrcReg[0]); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst0.Index), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + +/** + * For use with rc_local_transform, this transforms non-native ALU + * instructions of the r300 up to r500 vertex engine. + */ +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; + case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; + case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; + case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_SEQ: + if (!c->is_r500) { + transform_r300_vertex_SEQ(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; + case RC_OPCODE_SNE: + if (!c->is_r500) { + transform_r300_vertex_SNE(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + +static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) +{ + static const float SinCosConsts[2][4] = { + { + 1.273239545, /* 4/PI */ + -0.405284735, /* -4/(PI*PI) */ + 3.141592654, /* PI */ + 0.2225 /* weight */ + }, + { + 0.75, + 0.5, + 0.159154943, /* 1/(2*PI) */ + 6.283185307 /* 2*PI */ + } + }; + int i; + + for(i = 0; i < 2; ++i) + constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx( + struct radeon_compiler* c, struct rc_instruction * inst, + struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) +{ + unsigned int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(src), + srcreg(RC_FILE_CONSTANT, constants[0])); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(src)), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), + negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +int r300_transform_trig_simple(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + unsigned int constants[2]; + unsigned int tempreg; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + tempreg = rc_find_free_temporary(c); + + sincos_constants(c, constants); + + if (inst->U.I.Opcode == RC_OPCODE_COS) { + /* MAD tmp.x, src, 1/(2*PI), 0.75 */ + /* FRC tmp.x, tmp.x */ + /* MAD tmp.z, tmp.x, 2*PI, -PI */ + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else { + struct rc_dst_register dst; + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + dst = inst->U.I.DstReg; + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; + sin_approx(c, inst, dst, + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; + sin_approx(c, inst, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } + + rc_remove_instruction(inst); + + return 1; +} + +static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, + struct rc_instruction *inst, + unsigned srctmp) +{ + if (inst->U.I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->U.I.DstReg; + + if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { + moddst.WriteMask = RC_MASK_X; + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { + moddst.WriteMask = RC_MASK_Y; + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + } + + rc_remove_instruction(inst); +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +int radeonTransformTrigScale(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + static const float RCP_2PI = 0.15915494309189535; + unsigned int temp; + unsigned int constant; + unsigned int constant_swizzle; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * so that the input to COS and SIN is always in the range [-PI, PI]. + * SCS is replaced by one COS and one SIN instruction. + */ +int r300_transform_trig_scale_vertex(struct radeon_compiler *c, + struct rc_instruction *inst, + void *unused) +{ + static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; + unsigned int temp; + unsigned int constant; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + /* Repeat x in the range [-PI, PI]: + * + * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI + */ + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +int radeonTransformDeriv(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) + return 0; + + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + return 1; +} + +/** + * IF Temp[0].x -\ + * KILP - > KIL -abs(Temp[0].x) + * ENDIF -/ + * + * This needs to be done in its own pass, because it modifies the instructions + * before and after KILP. + */ +void rc_transform_KILP(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode != RC_OPCODE_KILP) + continue; + + inst->U.I.Opcode = RC_OPCODE_KIL; + + if (inst->Prev->U.I.Opcode != RC_OPCODE_IF + || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + inst->U.I.SrcReg[0] = negate(builtin_one); + } else { + + inst->U.I.SrcReg[0] = + negate(absolute(inst->Prev->U.I.SrcReg[0])); + /* Remove IF */ + rc_remove_instruction(inst->Prev); + /* Remove ENDIF */ + rc_remove_instruction(inst->Next); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.h b/src/gallium/drivers/r300/compiler/radeon_program_alu.h new file mode 100644 index 00000000000..b5f361e624f --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_trig_simple( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int radeonTransformTrigScale( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_trig_scale_vertex( + struct radeon_compiler *c, + struct rc_instruction *inst, + void*); + +int radeonTransformDeriv( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +void rc_transform_KILP(struct radeon_compiler * c, + void *user); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h new file mode 100644 index 00000000000..24577333450 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_PROGRAM_CONSTANTS_H +#define RADEON_PROGRAM_CONSTANTS_H + +typedef enum { + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE +} rc_saturate_mode; + +typedef enum { + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D +} rc_texture_target; + +typedef enum { + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, + + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, + + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, + + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT, + + /** + * Indicates a special register, see RC_SPECIAL_xxx. + */ + RC_FILE_SPECIAL, + + /** + * Indicates this register should use the result of the presubtract + * operation. + */ + RC_FILE_PRESUB +} rc_register_file; + +enum { + /** R500 fragment program ALU result "register" */ + RC_SPECIAL_ALU_RESULT = 0, + + /** Must be last */ + RC_NUM_SPECIAL_REGISTERS +}; + +#define RC_REGISTER_INDEX_BITS 10 +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) + +typedef enum { + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED +} rc_swizzle; + +#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) +#define SET_SWZ(swz, idx, newv) \ + do { \ + (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ + } while(0) + +#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z) +#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) +#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) +#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) +#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) +#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) +#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) +#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED) + +/** + * \name Bitmasks for components of vectors. + * + * Used for write masks, negation masks, etc. + */ +/*@{*/ +#define RC_MASK_NONE 0 +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +/*@}*/ + +typedef enum { + RC_ALURESULT_NONE = 0, + RC_ALURESULT_X, + RC_ALURESULT_W +} rc_write_aluresult; + +typedef enum { + RC_PRESUB_NONE = 0, + + /** 1 - 2 * src0 */ + RC_PRESUB_BIAS, + + /** src1 - src0 */ + RC_PRESUB_SUB, + + /** src1 + src0 */ + RC_PRESUB_ADD, + + /** 1 - src0 */ + RC_PRESUB_INV +} rc_presubtract_op; + +static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ + switch(op){ + case RC_PRESUB_BIAS: + case RC_PRESUB_INV: + return 1; + case RC_PRESUB_ADD: + case RC_PRESUB_SUB: + return 2; + default: + return 0; + } +} + +#define RC_SOURCE_NONE 0x0 +#define RC_SOURCE_RGB 0x1 +#define RC_SOURCE_ALPHA 0x2 + +#endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.c b/src/gallium/drivers/r300/compiler/radeon_program_pair.c new file mode 100644 index 00000000000..52315957520 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2008-2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler_util.h" + +#include <stdlib.h> + +/** + * Return the source slot where we installed the given register access, + * or -1 if no slot was free anymore. + */ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index) +{ + int candidate = -1; + int candidate_quality = -1; + unsigned int alpha_used = 0; + unsigned int rgb_used = 0; + int i; + + if ((!rgb && !alpha) || file == RC_FILE_NONE) + return 0; + + /* Make sure only one presubtract operation is used per instruction. */ + if (file == RC_FILE_PRESUB) { + if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used + && index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + + if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used + && index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + } + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].File != file || + pair->RGB.Src[i].Index != index) { + rgb_used++; + continue; + } + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].File != file || + pair->Alpha.Src[i].Index != index) { + alpha_used++; + continue; + } + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (file == RC_FILE_PRESUB) { + candidate = RC_PAIR_PRESUB_SRC; + } else if (candidate < 0 || (rgb && rgb_used > 2) + || (alpha && alpha_used > 2)) { + return -1; + } + + /* candidate >= 0 */ + + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].File = file; + pair->RGB.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i = 0; i < src_regs; i++) { + pair->RGB.Src[i].Used = 1; + } + } + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].File = file; + pair->Alpha.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i=0; i < src_regs; i++) { + pair->Alpha.Src[i].Used = 1; + } + } + } + + return candidate; +} + +static void pair_foreach_source_callback( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb, + unsigned int swz, + unsigned int src) +{ + /* swz > 3 means that the swizzle is either not used, or a constant + * swizzle (e.g. 0, 1, 0.5). */ + if(swz > 3) + return; + + if(swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->Alpha.Src[i]); + } + } else { + cb(data, &pair->Alpha.Src[src]); + } + } else { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->RGB.Src[i]); + } + } + else { + cb(data, &pair->RGB.Src[src]); + } + } +} + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->Alpha.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + pair_foreach_source_callback(pair, data, cb, + GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), + pair->Alpha.Arg[i].Source); + } +} + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->RGB.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + /* Find a swizzle that is either X,Y,Z,or W. We assume here + * that if one channel swizzles X,Y, or Z, then none of the + * other channels swizzle W, and vice-versa. */ + for(chan = 0; chan < 4; chan++) { + swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); + if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W) + continue; + } + pair_foreach_source_callback(pair, data, cb, + swz, + pair->RGB.Arg[i].Source); + } +} + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg) +{ + unsigned int type; + + type = rc_source_type_swz(arg->Swizzle); + + if (type & RC_SOURCE_RGB) { + return &pair_inst->RGB.Src[arg->Source]; + } else if (type & RC_SOURCE_ALPHA) { + return &pair_inst->Alpha.Src[arg->Source]; + } else { + return NULL; + } +} + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src) +{ + int i; + for (i = 0; i < 3; i++) { + if (&pair_inst->RGB.Src[i] == src + || &pair_inst->Alpha.Src[i] == src) { + return i; + } + } + return -1; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h new file mode 100644 index 00000000000..a957ea9f7a0 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_code.h" +#include "radeon_opcodes.h" +#include "radeon_program_constants.h" + +struct radeon_compiler; + + +/** + * \file + * Represents a paired ALU instruction, as found in R300 and R500 + * fragment programs. + * + * Note that this representation is taking some liberties as far + * as register files are concerned, to allow separate register + * allocation. + * + * Also note that there are some subtleties in that the semantics + * of certain opcodes are implicitly changed in this representation; + * see \ref rc_pair_translate + */ + +/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then + * the presubtract value will be used, and + * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB. + */ +#define RC_PAIR_PRESUB_SRC 3 + +struct rc_pair_instruction_source { + unsigned int Used:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct rc_pair_instruction_arg { + unsigned int Source:2; + unsigned int Swizzle:12; + unsigned int Abs:1; + unsigned int Negate:1; +}; + +struct rc_pair_sub_instruction { + unsigned int Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:4; + unsigned int Target:2; + unsigned int OutputWriteMask:3; + unsigned int DepthWriteMask:1; + unsigned int Saturate:1; + + struct rc_pair_instruction_source Src[4]; + struct rc_pair_instruction_arg Arg[3]; +}; + +struct rc_pair_instruction { + struct rc_pair_sub_instruction RGB; + struct rc_pair_sub_instruction Alpha; + + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; + unsigned int Nop:1; +}; + +typedef void (*rc_pair_foreach_src_fn) + (void *, struct rc_pair_instruction_source *); + +/** + * General helper functions for dealing with the paired instruction format. + */ +/*@{*/ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index); + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg); + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src); +/*@}*/ + + +/** + * Compiler passes that operate with the paired format. + */ +/*@{*/ +struct radeon_pair_handler; + +void rc_pair_translate(struct radeon_compiler *cc, void *user); +void rc_pair_schedule(struct radeon_compiler *cc, void *user); +void rc_pair_regalloc(struct radeon_compiler *cc, void *user); +void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user); +void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user); +/*@}*/ + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c new file mode 100644 index 00000000000..390d1319460 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c @@ -0,0 +1,418 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_program.h" + +#include <stdio.h> + +static const char * textarget_to_string(rc_texture_target target) +{ + switch(target) { + case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; + case RC_TEXTURE_CUBE: return "CUBE"; + case RC_TEXTURE_3D: return "3D"; + case RC_TEXTURE_RECT: return "RECT"; + case RC_TEXTURE_2D: return "2D"; + case RC_TEXTURE_1D: return "1D"; + default: return "BAD_TEXTURE_TARGET"; + } +} + +static const char * presubtract_op_to_string(rc_presubtract_op op) +{ + switch(op) { + case RC_PRESUB_NONE: + return "NONE"; + case RC_PRESUB_BIAS: + return "(1 - 2 * src0)"; + case RC_PRESUB_SUB: + return "(src1 - src0)"; + case RC_PRESUB_ADD: + return "(src1 + src0)"; + case RC_PRESUB_INV: + return "(1 - src0)"; + default: + return "BAD_PRESUBTRACT_OP"; + } +} + +static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) +{ + if (func == RC_COMPARE_FUNC_NEVER) { + fprintf(f, "false"); + } else if (func == RC_COMPARE_FUNC_ALWAYS) { + fprintf(f, "true"); + } else { + const char * op; + switch(func) { + case RC_COMPARE_FUNC_LESS: op = "<"; break; + case RC_COMPARE_FUNC_EQUAL: op = "=="; break; + case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; + case RC_COMPARE_FUNC_GREATER: op = ">"; break; + case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; + case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; + default: op = "???"; break; + } + fprintf(f, "%s %s %s", lhs, op, rhs); + } +} + +static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +{ + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else if (file == RC_FILE_SPECIAL) { + switch(index) { + case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; + default: fprintf(f, "special[%i]", index); break; + } + } else { + const char * filename; + switch(file) { + case RC_FILE_TEMPORARY: filename = "temp"; break; + case RC_FILE_INPUT: filename = "input"; break; + case RC_FILE_OUTPUT: filename = "output"; break; + case RC_FILE_ADDRESS: filename = "addr"; break; + case RC_FILE_CONSTANT: filename = "const"; break; + default: filename = "BAD FILE"; break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } +} + +static void rc_print_mask(FILE * f, unsigned int mask) +{ + if (mask & RC_MASK_X) fprintf(f, "x"); + if (mask & RC_MASK_Y) fprintf(f, "y"); + if (mask & RC_MASK_Z) fprintf(f, "z"); + if (mask & RC_MASK_W) fprintf(f, "w"); +} + +static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +{ + rc_print_register(f, dst.File, dst.Index, 0); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } +} + +static char rc_swizzle_char(unsigned int swz) +{ + switch(swz) { + case RC_SWIZZLE_X: return 'x'; + case RC_SWIZZLE_Y: return 'y'; + case RC_SWIZZLE_Z: return 'z'; + case RC_SWIZZLE_W: return 'w'; + case RC_SWIZZLE_ZERO: return '0'; + case RC_SWIZZLE_ONE: return '1'; + case RC_SWIZZLE_HALF: return 'H'; + case RC_SWIZZLE_UNUSED: return '_'; + } + fprintf(stderr, "bad swz: %u\n", swz); + return '?'; +} + +static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +{ + unsigned int comp; + for(comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + fprintf(f, "%c", rc_swizzle_char(swz)); + } +} + +static void rc_print_presub_instruction(FILE * f, + struct rc_presub_instruction inst) +{ + fprintf(f,"("); + switch(inst.Opcode){ + case RC_PRESUB_BIAS: + fprintf(f, "1 - 2 * "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_SUB: + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + fprintf(f, " - "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_ADD: + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + fprintf(f, " + "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_INV: + fprintf(f, "1 - "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + default: + break; + } + fprintf(f, ")"); +} + +static void rc_print_src_register(FILE * f, struct rc_instruction * inst, + struct rc_src_register src) +{ + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); + + if(src.File == RC_FILE_PRESUB) + rc_print_presub_instruction(f, inst->U.I.PreSub); + else + rc_print_register(f, src.File, src.Index, src.RelAddr); + + if (src.Abs && !trivial_negate) + fprintf(f, "|"); + + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } + + if (src.Abs && trivial_negate) + fprintf(f, "|"); +} + +static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth) +{ + switch (opcode) { + case RC_OPCODE_IF: + case RC_OPCODE_BGNLOOP: + return (*branch_depth)++ * 2; + + case RC_OPCODE_ENDIF: + case RC_OPCODE_ENDLOOP: + assert(*branch_depth > 0); + return --(*branch_depth) * 2; + + case RC_OPCODE_ELSE: + assert(*branch_depth > 0); + return (*branch_depth - 1) * 2; + + default: + return *branch_depth * 2; + } +} + +static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int reg; + unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, "%s", opcode->Name); + + switch(inst->U.I.SaturateMode) { + case RC_SATURATE_NONE: break; + case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; + case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; + default: fprintf(f, "_BAD_SAT"); break; + } + + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->U.I.DstReg); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); + } + + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]", + textarget_to_string(inst->U.I.TexSrcTarget), + inst->U.I.TexShadow ? "SHADOW" : "", + inst->U.I.TexSrcUnit); + } + + fprintf(f, ";"); + + if (inst->U.I.WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, + (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->U.I.ALUResultCompare, "0"); + fprintf(f, ")]"); + } + + fprintf(f, "\n"); +} + +static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + int printedsrc = 0; + unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ? + inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.xyz = ", src); + rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); + printedsrc = 1; + } + if (inst->Alpha.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.w = ", src); + rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); + printedsrc = 1; + } + } + if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.xyz = %s", + presubtract_op_to_string( + inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index)); + } + if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.w = %s", + presubtract_op_to_string( + inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index)); + } + fprintf(f, "\n"); + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + if (inst->WriteALUResult == RC_ALURESULT_X) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->RGB.Arg[arg].Source); + fprintf(f,".%c%c%c%s", + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), + abs); + } + fprintf(f, "\n"); + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + fprintf(f, " color[%i].w", inst->Alpha.Target); + if (inst->Alpha.DepthWriteMask) + fprintf(f, " depth.w"); + if (inst->WriteALUResult == RC_ALURESULT_W) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->Alpha.Arg[arg].Source); + fprintf(f,".%c%s", + rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); + } + fprintf(f, "\n"); + } + + if (inst->WriteALUResult) { + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); + fprintf(f, ")]\n"); + } +} + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + unsigned int linenum = 0; + unsigned branch_depth = 0; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + if (inst->Type == RC_INSTRUCTION_PAIR) + rc_print_pair_instruction(stderr, inst, &branch_depth); + else + rc_print_normal_instruction(stderr, inst, &branch_depth); + + linenum++; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c new file mode 100644 index 00000000000..9d69ebd18fb --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.c @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_tex.h" + +#include "radeon_compiler_util.h" + +/* Series of transformations to be done on textures. */ + +static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; + + if (compiler->enable_shadow_ambient) { + reg.File = RC_FILE_CONSTANT; + reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = RC_SWIZZLE_WWWW; + } else { + reg.File = RC_FILE_NONE; + reg.Swizzle = RC_SWIZZLE_0000; + } + + reg.Swizzle = combine_swizzles(reg.Swizzle, + compiler->state.unit[tmu].texture_swizzle); + return reg; +} + +static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; + + reg.File = RC_FILE_NONE; + reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, + compiler->state.unit[tmu].texture_swizzle); + return reg; +} + +static void scale_texcoords(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst, + unsigned state_constant) +{ + struct rc_instruction *inst_mov; + + unsigned temp = rc_find_free_temporary(&compiler->Base); + + inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MUL; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mov->U.I.SrcReg[1].Index = + rc_constants_add_state(&compiler->Base.Program.Constants, + state_constant, inst->U.I.TexSrcUnit); + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; +} + +static void projective_divide(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst) +{ + struct rc_instruction *inst_mul, *inst_rcp; + + unsigned temp = rc_find_free_temporary(&compiler->Base); + + inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = temp; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + /* Because the input can be arbitrarily swizzled, + * read the component mapped to W. */ + inst_rcp->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + + inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = temp; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.Opcode = RC_OPCODE_TEX; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following ways: + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; + int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords; + + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_TXD && + inst->U.I.Opcode != RC_OPCODE_TXL && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || + (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; + + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + } else { + inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + } + + return 1; + } else { + struct rc_instruction * inst_rcp = NULL; + struct rc_instruction *inst_mul, *inst_add, *inst_cmp; + unsigned tmp_texsample; + unsigned tmp_sum; + int pass, fail; + + /* Save the output register. */ + struct rc_dst_register output_reg = inst->U.I.DstReg; + unsigned saturate_mode = inst->U.I.SaturateMode; + + /* Redirect TEX to a new temp. */ + tmp_texsample = rc_find_free_temporary(c); + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tmp_texsample; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + tmp_sum = rc_find_free_temporary(c); + + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + /* Compute 1/W. */ + inst_rcp = rc_insert_new_instruction(c, inst); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tmp_sum; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + } + + /* Divide Z by W (if it's TXP) and saturate. */ + inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); + inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tmp_sum; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tmp_sum; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + } + + /* Add the depth texture value. */ + inst_add = rc_insert_new_instruction(c, inst_mul); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tmp_sum; + inst_add->U.I.DstReg.WriteMask = RC_MASK_W; + inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[0].Index = tmp_sum; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = tmp_texsample; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + + /* Note that SrcReg[0] is r, SrcReg[1] is tex and: + * LESS: r < tex <=> -tex+r < 0 + * GEQUAL: r >= tex <=> not (-tex+r < 0) + * GREATER: r > tex <=> tex-r < 0 + * LEQUAL: r <= tex <=> not ( tex-r < 0) + * EQUAL: GEQUAL + * NOTEQUAL:LESS + */ + + /* This negates either r or tex: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || + comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) + inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; + else + inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + + /* This negates the whole expresion: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || + comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp = rc_insert_new_instruction(c, inst_add); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.SaturateMode = saturate_mode; + inst_cmp->U.I.DstReg = output_reg; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = tmp_sum; + inst_cmp->U.I.SrcReg[0].Swizzle = + combine_swizzles(RC_SWIZZLE_WWWW, + compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); + inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + + assert(tmp_texsample != tmp_sum); + } + } + + /* R300 cannot sample from rectangles and the wrap mode fallback needs + * normalized coordinates anyway. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { + scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + } + + /* Divide by W if needed. */ + if (inst->U.I.Opcode == RC_OPCODE_TXP && + (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { + projective_divide(compiler, inst); + } + + /* Texture wrap modes don't work on NPOT textures. + * + * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and + * mirroring are not. If we need to repeat, we do: + * + * MUL temp, texcoord, <scaling factor constant> + * FRC temp, temp ; Discard integer portion of coords + * + * This gives us coords in [0, 1]. + * + * Mirroring is trickier. We're going to start out like repeat: + * + * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes + * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] + * ; so scale to [0, 1] + * FRC temp, temp ; Make the pattern repeat + * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] + * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. + * ; The pattern is backwards, so reverse it (1-x). + * + * This gives us coords in [0, 1]. + * + * ~ C & M. ;) + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + wrapmode != RC_WRAP_NONE) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + if (wrapmode == RC_WRAP_REPEAT) { + /* Both instructions will be paired up. */ + struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { + /* + * Function: + * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) + * + * Code: + * MUL temp, src0, 0.5 + * FRC temp, temp + * MAD temp, temp, 2, -1 + * ADD temp, 1, -abs(temp) + */ + + struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; + unsigned two, two_swizzle; + + inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; + + inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_frc->U.I.SrcReg[0].Index = temp; + inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); + inst_mad = rc_insert_new_instruction(c, inst->Prev); + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = temp; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = temp; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = two; + inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; + + inst_add = rc_insert_new_instruction(c, inst->Prev); + + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = temp; + inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = temp; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + inst_add->U.I.SrcReg[1].Abs = 1; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; + } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { + /* + * Mirrored clamp modes are bloody simple, we just use abs + * to mirror [0, 1] into [-1, 0]. This works for + * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. + */ + struct rc_instruction *inst_mov; + + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Abs = 1; + } + + /* Preserve W for TXP/TXB. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + } + + /* NPOT -> POT conversion for 3D textures. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + /* Saturate XYZ. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + /* Copy W. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + + scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); + } + + /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM. + * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2 + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) { + unsigned two, two_swizzle; + struct rc_instruction *inst_mul, *inst_mad, *inst_cnd; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle); + + inst_mul = rc_insert_new_instruction(c, inst); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */ + inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */ + inst_mul->U.I.SrcReg[1].Index = two; + inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle; + + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW; + + inst_cnd = rc_insert_new_instruction(c, inst_mad); + inst_cnd->U.I.Opcode = RC_OPCODE_CND; + inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_cnd->U.I.DstReg = inst->U.I.DstReg; + inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot write texture to output registers or with saturate (all chips), + * or with masks (non-r500). */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst->U.I.SaturateMode || + (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot read texture coordinate from constants file */ + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + } + + return 1; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.h b/src/gallium/drivers/r300/compiler/radeon_program_tex.h new file mode 100644 index 00000000000..a0105051ac4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_TEX_H_ +#define __RADEON_PROGRAM_TEX_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + +#endif /* __RADEON_PROGRAM_TEX_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c new file mode 100644 index 00000000000..7d76585a593 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_remove_constants.h" +#include "radeon_dataflow.h" + +struct mark_used_data { + unsigned char * const_used; + unsigned * has_rel_addr; +}; + +static void remap_regs(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + unsigned *inv_remap_table = userdata; + + if (*pfile == RC_FILE_CONSTANT) { + *pindex = inv_remap_table[*pindex]; + } +} + +static void mark_used(void * userdata, struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct mark_used_data * d = userdata; + + if (src->File == RC_FILE_CONSTANT) { + if (src->RelAddr) { + *d->has_rel_addr = 1; + } else { + d->const_used[src->Index] = 1; + } + } +} + +void rc_remove_unused_constants(struct radeon_compiler *c, void *user) +{ + unsigned **out_remap_table = (unsigned**)user; + unsigned char *const_used; + unsigned *remap_table; + unsigned *inv_remap_table; + unsigned has_rel_addr = 0; + unsigned is_identity = 1; + unsigned are_externals_remapped = 0; + struct rc_constant *constants = c->Program.Constants.Constants; + struct mark_used_data d; + unsigned new_count; + + if (!c->Program.Constants.Count) { + *out_remap_table = NULL; + return; + } + + const_used = malloc(c->Program.Constants.Count); + memset(const_used, 0, c->Program.Constants.Count); + + d.const_used = const_used; + d.has_rel_addr = &has_rel_addr; + + /* Pass 1: Mark used constants. */ + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + rc_for_all_reads_src(inst, mark_used, &d); + } + + /* Pass 2: If there is relative addressing or dead constant elimination + * is disabled, mark all externals as used. */ + if (has_rel_addr || !c->remove_unused_constants) { + for (unsigned i = 0; i < c->Program.Constants.Count; i++) + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + const_used[i] = 1; + } + + /* Pass 3: Make the remapping table and remap constants. + * This pass removes unused constants simply by overwriting them by other constants. */ + remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + new_count = 0; + + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (const_used[i]) { + remap_table[new_count] = i; + inv_remap_table[i] = new_count; + + if (i != new_count) { + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + are_externals_remapped = 1; + + constants[new_count] = constants[i]; + is_identity = 0; + } + new_count++; + } + } + + /* is_identity ==> new_count == old_count + * !is_identity ==> new_count < old_count */ + assert( is_identity || new_count < c->Program.Constants.Count); + assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); + + /* Pass 4: Redirect reads of all constants to their new locations. */ + if (!is_identity) { + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + rc_remap_registers(inst, remap_regs, inv_remap_table); + } + } + + /* Set the new constant count. Note that new_count may be less than + * Count even though the remapping function is identity. In that case, + * the constants have been removed at the end of the array. */ + c->Program.Constants.Count = new_count; + + if (are_externals_remapped) { + *out_remap_table = remap_table; + } else { + *out_remap_table = NULL; + free(remap_table); + } + + free(const_used); + free(inv_remap_table); + + if (c->Debug & RC_DBG_LOG) + rc_constants_print(&c->Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.h b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h new file mode 100644 index 00000000000..f29113b922b --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_REMOVE_CONSTANTS_H +#define RADEON_REMOVE_CONSTANTS_H + +#include "radeon_compiler.h" + +void rc_remove_unused_constants(struct radeon_compiler *c, void *user); + +#endif diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.c b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c new file mode 100644 index 00000000000..cafa0579734 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c @@ -0,0 +1,92 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_rename_regs.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" +#include "radeon_program.h" + +/** + * This function renames registers in an attempt to get the code close to + * SSA form. After this function has completed, most of the register are only + * written to one time, with a few exceptions. + * + * This function assumes all the instructions are still of type + * RC_INSTRUCTION_NORMAL. + */ +void rc_rename_regs(struct radeon_compiler *c, void *user) +{ + unsigned int i, used_length; + int new_index; + struct rc_instruction * inst; + struct rc_reader_data reader_data; + unsigned char * used; + + /* XXX Remove this once the register allocation works with flow control. */ + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) + return; + } + + used_length = 2 * rc_recompute_ips(c); + used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); + memset(used, 0, sizeof(unsigned char) * used_length); + + rc_get_used_temporaries(c, used, used_length); + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + continue; + + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + continue; + + new_index = rc_find_free_temporary_list(c, used, used_length, + RC_MASK_XYZW); + if (new_index < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return; + } + + reader_data.Writer->U.I.DstReg.Index = new_index; + for(i = 0; i < reader_data.ReaderCount; i++) { + reader_data.Readers[i].U.I.Src->Index = new_index; + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.h b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h new file mode 100644 index 00000000000..3baf29f6120 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h @@ -0,0 +1,9 @@ + +#ifndef RADEON_RENAME_REGS_H +#define RADEON_RENAME_REGS_H + +struct radeon_compiler; + +void rc_rename_regs(struct radeon_compiler *c, void *user); + +#endif /* RADEON_RENAME_REGS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_swizzle.h b/src/gallium/drivers/r300/compiler/radeon_swizzle.h new file mode 100644 index 00000000000..c81d5f7a5e9 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_swizzle.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_SWIZZLE_H +#define RADEON_SWIZZLE_H + +#include "radeon_program.h" + +struct rc_swizzle_split { + unsigned char NumPhases; + unsigned char Phase[4]; +}; + +/** + * Describe the swizzling capability of target hardware. + */ +struct rc_swizzle_caps { + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode + */ + int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); + + /** + * Determine how to split access to the masked channels of the + * given source register to obtain ALU-native swizzles. + */ + void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); +}; + +#endif /* RADEON_SWIZZLE_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c new file mode 100644 index 00000000000..938fb8421f2 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_variable.c @@ -0,0 +1,517 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_variable.h" + +#include "memory_pool.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_opcodes.h" +#include "radeon_program.h" + +/** + * Rewrite the index and writemask for the destination register of var + * and its friends to new_index and new_writemask. This function also takes + * care of rewriting the swizzles for the sources of var. + */ +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask) +{ + struct rc_variable * var_ptr; + struct rc_list * readers; + unsigned int old_mask = rc_variable_writemask_sum(var); + unsigned int conversion_swizzle = + rc_make_conversion_swizzle(old_mask, new_writemask); + + for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { + if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { + rc_normal_rewrite_writemask(var_ptr->Inst, + conversion_swizzle); + var_ptr->Inst->U.I.DstReg.Index = new_index; + } else { + struct rc_pair_sub_instruction * sub; + if (var_ptr->Dst.WriteMask == RC_MASK_W) { + assert(new_writemask & RC_MASK_W); + sub = &var_ptr->Inst->U.P.Alpha; + } else { + sub = &var_ptr->Inst->U.P.RGB; + rc_pair_rewrite_writemask(sub, + conversion_swizzle); + } + sub->DestIndex = new_index; + } + } + + readers = rc_variable_readers_union(var); + + for ( ; readers; readers = readers->Next) { + struct rc_reader * reader = readers->Item; + if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { + reader->U.I.Src->Index = new_index; + reader->U.I.Src->Swizzle = rc_rewrite_swizzle( + reader->U.I.Src->Swizzle, conversion_swizzle); + } else { + struct rc_pair_instruction * pair_inst = + &reader->Inst->U.P; + unsigned int src_type = rc_source_type_swz( + reader->U.P.Arg->Swizzle); + + int src_index = reader->U.P.Arg->Source; + if (src_index == RC_PAIR_PRESUB_SRC) { + src_index = rc_pair_get_src_index( + pair_inst, reader->U.P.Src); + } + /* Try to delete the old src, it is OK if this fails, + * because rc_pair_alloc_source might be able to + * find a source the ca be reused. + */ + if (rc_pair_remove_src(reader->Inst, src_type, + src_index, old_mask)) { + /* Reuse the source index of the source that + * was just deleted and set its register + * index. We can't use rc_pair_alloc_source + * for this becuase it might return a source + * index that is already being used. */ + if (src_type & RC_SOURCE_RGB) { + pair_inst->RGB.Src[src_index] + .Used = 1; + pair_inst->RGB.Src[src_index] + .Index = new_index; + pair_inst->RGB.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + if (src_type & RC_SOURCE_ALPHA) { + pair_inst->Alpha.Src[src_index] + .Used = 1; + pair_inst->Alpha.Src[src_index] + .Index = new_index; + pair_inst->Alpha.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + } else { + src_index = rc_pair_alloc_source( + &reader->Inst->U.P, + src_type & RC_SOURCE_RGB, + src_type & RC_SOURCE_ALPHA, + RC_FILE_TEMPORARY, + new_index); + if (src_index < 0) { + rc_error(var->C, "Rewrite of inst %u failed " + "Can't allocate source for " + "Inst %u src_type=%x " + "new_index=%u new_mask=%u\n", + var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask); + continue; + } + } + reader->U.P.Arg->Swizzle = rc_rewrite_swizzle( + reader->U.P.Arg->Swizzle, conversion_swizzle); + if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { + reader->U.P.Arg->Source = src_index; + } + } + } +} + +/** + * Compute the live intervals for var and its friends. + */ +void rc_variable_compute_live_intervals(struct rc_variable * var) +{ + while(var) { + unsigned int i; + unsigned int start = var->Inst->IP; + + for (i = 0; i < var->ReaderCount; i++) { + unsigned int chan; + unsigned int chan_start = start; + unsigned int chan_end = var->Readers[i].Inst->IP; + unsigned int mask = var->Readers[i].WriteMask; + struct rc_instruction * inst; + + /* Extend the live interval of T0 to the start of the + * loop for sequences like: + * BGNLOOP + * read T0 + * ... + * write T0 + * ENDLOOP + */ + if (var->Readers[i].Inst->IP < start) { + struct rc_instruction * bgnloop = + rc_match_endloop(var->Readers[i].Inst); + chan_start = bgnloop->IP; + } + + /* Extend the live interval of T0 to the start of the + * loop in case there is a BRK instruction in the loop + * (we don't actually check for a BRK instruction we + * assume there is one somewhere in the loop, which + * there usually is) for sequences like: + * BGNLOOP + * ... + * conditional BRK + * ... + * write T0 + * ENDLOOP + * read T0 + *************************************************** + * Extend the live interval of T0 to the end of the + * loop for sequences like: + * write T0 + * BGNLOOP + * ... + * read T0 + * ENDLOOP + */ + for (inst = var->Inst; inst != var->Readers[i].Inst; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + struct rc_instruction * bgnloop = + rc_match_endloop(inst); + if (bgnloop->IP < chan_start) { + chan_start = bgnloop->IP; + } + } else if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > chan_end) { + chan_end = endloop->IP; + } + } + } + + for (chan = 0; chan < 4; chan++) { + if ((mask >> chan) & 0x1) { + if (!var->Live[chan].Used + || chan_start < var->Live[chan].Start) { + var->Live[chan].Start = + chan_start; + } + if (!var->Live[chan].Used + || chan_end > var->Live[chan].End) { + var->Live[chan].End = chan_end; + } + var->Live[chan].Used = 1; + } + } + } + var = var->Friend; + } +} + +/** + * @return 1 if a and b share a reader + * @return 0 if they do not + */ +static unsigned int readers_intersect( + struct rc_variable * a, + struct rc_variable * b) +{ + unsigned int a_index, b_index; + for (a_index = 0; a_index < a->ReaderCount; a_index++) { + struct rc_reader reader_a = a->Readers[a_index]; + for (b_index = 0; b_index < b->ReaderCount; b_index++) { + struct rc_reader reader_b = b->Readers[b_index]; + if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_a.U.I.Src == reader_b.U.I.Src) { + + return 1; + } + if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR + && reader_b.Inst->Type == RC_INSTRUCTION_PAIR + && reader_a.U.P.Src == reader_b.U.P.Src) { + + return 1; + } + } + } + return 0; +} + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend) +{ + assert(var->Dst.Index == friend->Dst.Index); + while(var->Friend) { + var = var->Friend; + } + var->Friend = friend; +} + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data) +{ + struct rc_variable * new = + memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); + memset(new, 0, sizeof(struct rc_variable)); + new->C = c; + new->Dst.File = DstFile; + new->Dst.Index = DstIndex; + new->Dst.WriteMask = DstWriteMask; + if (reader_data) { + new->Inst = reader_data->Writer; + new->ReaderCount = reader_data->ReaderCount; + new->Readers = reader_data->Readers; + } + return new; +} + +static void get_variable_helper( + struct rc_list ** variable_list, + struct rc_variable * variable) +{ + struct rc_list * list_ptr; + for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) { + if (readers_intersect(variable, list_ptr->Item)) { + rc_variable_add_friend(list_ptr->Item, variable); + return; + } + } + rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); +} + +static void get_variable_pair_helper( + struct rc_list ** variable_list, + struct radeon_compiler * c, + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub_inst) +{ + struct rc_reader_data reader_data; + struct rc_variable * new_var; + rc_register_file file; + unsigned int writemask; + + if (sub_inst->Opcode == RC_OPCODE_NOP) { + return; + } + memset(&reader_data, 0, sizeof(struct rc_reader_data)); + rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.ReaderCount == 0) { + return; + } + + if (sub_inst->WriteMask) { + file = RC_FILE_TEMPORARY; + writemask = sub_inst->WriteMask; + } else if (sub_inst->OutputWriteMask) { + file = RC_FILE_OUTPUT; + writemask = sub_inst->OutputWriteMask; + } else { + writemask = 0; + file = RC_FILE_NONE; + } + new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, + &reader_data); + get_variable_helper(variable_list, new_var); +} + +/** + * Generate a list of variables used by the shader program. Each instruction + * that writes to a register is considered a variable. The struct rc_variable + * data structure includes a list of readers and is essentially a + * definition-use chain. Any two variables that share a reader are considered + * "friends" and they are linked together via the Friend attribute. + */ +struct rc_list * rc_get_variables(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + struct rc_list * variable_list = NULL; + + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + struct rc_reader_data reader_data; + struct rc_variable * new_var; + memset(&reader_data, 0, sizeof(reader_data)); + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + if (reader_data.ReaderCount == 0) { + continue; + } + new_var = rc_variable(c, inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask, &reader_data); + get_variable_helper(&variable_list, new_var); + } else { + get_variable_pair_helper(&variable_list, c, inst, + &inst->U.P.RGB); + get_variable_pair_helper(&variable_list, c, inst, + &inst->U.P.Alpha); + } + } + + return variable_list; +} + +/** + * @return The bitwise or of the writemasks of a variable and all of its + * friends. + */ +unsigned int rc_variable_writemask_sum(struct rc_variable * var) +{ + unsigned int writemask = 0; + while(var) { + writemask |= var->Dst.WriteMask; + var = var->Friend; + } + return writemask; +} + +/* + * @return A list of readers for a variable and its friends. Readers + * that read from two different variable friends are only included once in + * this list. + */ +struct rc_list * rc_variable_readers_union(struct rc_variable * var) +{ + struct rc_list * list = NULL; + while (var) { + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + struct rc_list * temp; + struct rc_reader * a = &var->Readers[i]; + unsigned int match = 0; + for (temp = list; temp; temp = temp->Next) { + struct rc_reader * b = temp->Item; + if (a->Inst->Type != b->Inst->Type) { + continue; + } + if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { + if (a->U.I.Src == b->U.I.Src) { + match = 1; + break; + } + } + if (a->Inst->Type == RC_INSTRUCTION_PAIR) { + if (a->U.P.Arg == b->U.P.Arg + && a->U.P.Src == b->U.P.Src) { + match = 1; + break; + } + } + } + if (match) { + continue; + } + rc_list_add(&list, rc_list(&var->C->Pool, a)); + } + var = var->Friend; + } + return list; +} + +static unsigned int reader_equals_src( + struct rc_reader reader, + unsigned int src_type, + void * src) +{ + if (reader.Inst->Type != src_type) { + return 0; + } + if (src_type == RC_INSTRUCTION_NORMAL) { + return reader.U.I.Src == src; + } else { + return reader.U.P.Src == src; + } +} + +static unsigned int variable_writes_src( + struct rc_variable * var, + unsigned int src_type, + void * src) +{ + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + if (reader_equals_src(var->Readers[i], src_type, src)) { + return 1; + } + } + return 0; +} + + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src) +{ + struct rc_list * list_ptr; + struct rc_list * writer_list = NULL; + for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { + struct rc_variable * var = list_ptr->Item; + if (variable_writes_src(var, src_type, src)) { + struct rc_variable * friend; + rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); + for (friend = var->Friend; friend; + friend = friend->Friend) { + if (variable_writes_src(friend, src_type, src)) { + rc_list_add(&writer_list, + rc_list(&var->C->Pool, friend)); + } + } + /* Once we have indentifed the variable and its + * friends that write this source, we can stop + * stop searching, because we know know of the + * other variables in the list will write this source. + * If they did they would be friends of var. + */ + break; + } + } + return writer_list; +} + +void rc_variable_print(struct rc_variable * var) +{ + unsigned int i; + while (var) { + fprintf(stderr, "%u: TEMP[%u].%u: ", + var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); + for (i = 0; i < 4; i++) { + fprintf(stderr, "chan %u: start=%u end=%u ", i, + var->Live[i].Start, var->Live[i].End); + } + fprintf(stderr, "%u readers\n", var->ReaderCount); + if (var->Friend) { + fprintf(stderr, "Friend: \n\t"); + } + var = var->Friend; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.h b/src/gallium/drivers/r300/compiler/radeon_variable.h new file mode 100644 index 00000000000..9427bee18a7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_variable.h @@ -0,0 +1,89 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_VARIABLE_H +#define RADEON_VARIABLE_H + +#include "radeon_compiler.h" + +struct radeon_compiler; +struct rc_list; +struct rc_reader_data; +struct rc_readers; + +struct live_intervals { + int Start; + int End; + int Used; +}; + +struct rc_variable { + struct radeon_compiler * C; + struct rc_dst_register Dst; + + struct rc_instruction * Inst; + unsigned int ReaderCount; + struct rc_reader * Readers; + struct live_intervals Live[4]; + + /* A friend is a variable that shares a reader with another variable. + */ + struct rc_variable * Friend; +}; + +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask); + +void rc_variable_compute_live_intervals(struct rc_variable * var); + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend); + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data); + +struct rc_list * rc_get_variables(struct radeon_compiler * c); + +unsigned int rc_variable_writemask_sum(struct rc_variable * var); + +struct rc_list * rc_variable_readers_union(struct rc_variable * var); + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src); + +void rc_variable_print(struct rc_variable * var); + +#endif /* RADEON_VARIABLE_H */ diff --git a/src/gallium/drivers/r300/compiler/tests/.gitignore b/src/gallium/drivers/r300/compiler/tests/.gitignore new file mode 100644 index 00000000000..85672fed777 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/.gitignore @@ -0,0 +1 @@ +radeon_compiler_util_tests diff --git a/src/gallium/drivers/r300/compiler/tests/Makefile b/src/gallium/drivers/r300/compiler/tests/Makefile new file mode 100644 index 00000000000..6eda34a2c00 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/Makefile @@ -0,0 +1,53 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +CFLAGS += -Wall -Werror + +### Basic defines ### +TESTS = radeon_compiler_util_tests + +TEST_SOURCES := $(TESTS:=.c) + +SHARED_SOURCES = \ + rc_test_helpers.c \ + unit_test.c + +C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES) + +INCLUDES = \ + -I. \ + -I.. + +COMPILER_LIB = ../../libr300.a + +##### TARGETS ##### + +default: depend run_tests + +depend: $(C_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null + +# Remove .o and backup files +clean: + rm -f $(TESTS) depend depend.bak + +$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB) + $(APP_CC) -o $@ $^ + +run_tests: $(TESTS) + @echo "RUNNING TESTS:" + @echo "" + $(foreach test, $^, @./$(test)) + +.PHONY: $(COMPILER_LIB) +$(COMPILER_LIB): + $(MAKE) -C ../.. + +##### RULES ##### +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c new file mode 100644 index 00000000000..a2e3f2ab2e5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c @@ -0,0 +1,76 @@ +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#include "radeon_compiler_util.h" +#include "radeon_program.h" + +#include "rc_test_helpers.h" +#include "unit_test.h" + +static void test_rc_inst_can_use_presub( + struct test_result * result, + int expected, + const char * add_str, + const char * replace_str) +{ + struct rc_instruction add_inst, replace_inst; + int ret; + + test_begin(result); + init_rc_normal_instruction(&add_inst, add_str); + init_rc_normal_instruction(&replace_inst, replace_str); + + ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0, + &replace_inst.U.I.SrcReg[0], + &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]); + + test_check(result, ret == expected); +} + +static void test_runner_rc_inst_can_use_presub(struct test_result * result) +{ + + /* This tests the case where the source being replace has the same + * register file and register index as another source register in the + * CMP instruction. A previous version of this function was ignoring + * all registers that shared the same file and index as the replacement + * register when counting the number of source selects. + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[0].z, temp[6].__x_, const[1].__x_;", + "CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;"); + + + /* Testing a random case that should fail + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3], temp[1], temp[2];", + "MAD temp[1], temp[0], const[0].xxxx, -temp[3];"); + + /* This tests the case where the arguments of the ADD + * instruction share the same register file and index. Normally, we + * would need only one source select for these two arguments, but since + * they will be part of a presubtract operation we need to use the two + * source selects that the presubtract instruction expects + * (src0 and src1). + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3].x, temp[0].x___, temp[0].x___;", + "MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;"); +} + +int main(int argc, char ** argv) +{ + struct test tests[] = { + {"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub}, + {NULL, NULL} + }; + run_tests(tests); +} diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c new file mode 100644 index 00000000000..ca4738af54d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c @@ -0,0 +1,380 @@ +#include <errno.h> +#include <regex.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> + +#include "../radeon_compiler_util.h" +#include "../radeon_opcodes.h" +#include "../radeon_program.h" + +#include "rc_test_helpers.h" + +/* This file contains some helper functions for filling out the rc_instruction + * data structures. These functions take a string as input based on the format + * output by rc_program_print(). + */ + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +#define REGEX_ERR_BUF_SIZE 50 + +struct match_info { + const char * String; + int Length; +}; + +static int match_length(regmatch_t * matches, int index) +{ + return matches[index].rm_eo - matches[index].rm_so; +} + +static int regex_helper( + const char * regex_str, + const char * search_str, + regmatch_t * matches, + int num_matches) +{ + char err_buf[REGEX_ERR_BUF_SIZE]; + regex_t regex; + int err_code; + unsigned int i; + + err_code = regcomp(®ex, regex_str, REG_EXTENDED); + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to compile regex: %s\n", err_buf); + return 0; + } + + err_code = regexec(®ex, search_str, num_matches, matches, 0); + DBG("Search string: '%s'\n", search_str); + for (i = 0; i < num_matches; i++) { + DBG("Match %u start = %d end = %d\n", i, + matches[i].rm_so, matches[i].rm_eo); + } + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to match regex: %s\n", err_buf); + return 0; + } + return 1; +} + +#define REGEX_SRC_MATCHES 6 + +struct src_tokens { + struct match_info Negate; + struct match_info Abs; + struct match_info File; + struct match_info Index; + struct match_info Swizzle; +}; + +/** + * Initialize the source register at index src_index for the instruction based + * on src_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param src_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str) +{ + const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)"; + regmatch_t matches[REGEX_SRC_MATCHES]; + struct src_tokens tokens; + struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index]; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) { + fprintf(stderr, "Failed to execute regex for src register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.Negate.String = src_str + matches[1].rm_so; + tokens.Negate.Length = match_length(matches, 1); + tokens.Abs.String = src_str + matches[2].rm_so; + tokens.Abs.Length = match_length(matches, 2); + tokens.File.String = src_str + matches[3].rm_so; + tokens.File.Length = match_length(matches, 3); + tokens.Index.String = src_str + matches[4].rm_so; + tokens.Index.Length = match_length(matches, 4); + tokens.Swizzle.String = src_str + matches[5].rm_so; + tokens.Swizzle.Length = match_length(matches, 5); + + /* Negate */ + if (tokens.Negate.Length > 0) { + src_reg->Negate = RC_MASK_XYZW; + } + + /* Abs */ + if (tokens.Abs.Length > 0) { + src_reg->Abs = 1; + } + + /* File */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + src_reg->File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) { + src_reg->File = RC_FILE_INPUT; + } else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) { + src_reg->File = RC_FILE_CONSTANT; + } else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) { + src_reg->File = RC_FILE_NONE; + } + + /* Index */ + errno = 0; + src_reg->Index = strtol(tokens.Index.String, NULL, 10); + if (errno > 0) { + fprintf(stderr, "Could not convert src register index.\n"); + return 0; + } + + /* Swizzle */ + if (tokens.Swizzle.Length == 0) { + src_reg->Swizzle = RC_SWIZZLE_XYZW; + } else { + int str_index = 1; + src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED); + if (tokens.Swizzle.String[0] != '.') { + fprintf(stderr, "First char of swizzle is not valid.\n"); + return 0; + } + for (i = 0; i < 4; i++, str_index++) { + if (tokens.Swizzle.String[str_index] == '-') { + src_reg->Negate |= (1 << i); + str_index++; + } + switch(tokens.Swizzle.String[str_index]) { + case 'x': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X); + break; + case 'y': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y); + break; + case 'z': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z); + break; + case 'w': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W); + break; + case '1': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE); + break; + case '0': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO); + break; + case 'H': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF); + break; + case '_': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED); + break; + default: + fprintf(stderr, "Unknown src register swizzle.\n"); + return 0; + } + } + } + DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n", + src_reg->File, src_reg->Index, src_reg->Swizzle, + src_reg->Negate, src_reg->Abs); + return 1; +} + +#define REGEX_DST_MATCHES 4 + +struct dst_tokens { + struct match_info File; + struct match_info Index; + struct match_info WriteMask; +}; + +/** + * Initialize the destination for the instruction based on dst_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param dst_str A string that represents the destination register. The format + * for this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str) +{ + const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)"; + regmatch_t matches[REGEX_DST_MATCHES]; + struct dst_tokens tokens; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) { + fprintf(stderr, "Failed to execute regex for dst register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.File.String = dst_str + matches[1].rm_so; + tokens.File.Length = match_length(matches, 1); + tokens.Index.String = dst_str + matches[2].rm_so; + tokens.Index.Length = match_length(matches, 2); + tokens.WriteMask.String = dst_str + matches[3].rm_so; + tokens.WriteMask.Length = match_length(matches, 3); + + /* File Type */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + } else { + fprintf(stderr, "Unknown dst register file type.\n"); + return 0; + } + + /* File Index */ + errno = 0; + inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10); + + if (errno > 0) { + fprintf(stderr, "Could not convert dst register index\n"); + return 0; + } + + /* WriteMask */ + if (tokens.WriteMask.Length == 0) { + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } else { + /* The first character should be '.' */ + if (tokens.WriteMask.String[0] != '.') { + fprintf(stderr, "1st char of writemask is not valid.\n"); + return 0; + } + for (i = 1; i < tokens.WriteMask.Length; i++) { + switch(tokens.WriteMask.String[i]) { + case 'x': + inst->U.I.DstReg.WriteMask |= RC_MASK_X; + break; + case 'y': + inst->U.I.DstReg.WriteMask |= RC_MASK_Y; + break; + case 'z': + inst->U.I.DstReg.WriteMask |= RC_MASK_Z; + break; + case 'w': + inst->U.I.DstReg.WriteMask |= RC_MASK_W; + break; + default: + fprintf(stderr, "Unknown swizzle in writemask.\n"); + return 0; + } + } + } + DBG("Dst Reg File=%u Index=%d Writemask=%d\n", + inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask); + return 1; +} + +#define REGEX_INST_MATCHES 7 + +struct inst_tokens { + struct match_info Opcode; + struct match_info Sat; + struct match_info Dst; + struct match_info Srcs[3]; +}; + +/** + * Initialize a normal instruction based on inst_str. + * + * WARNING: This function might not be able to handle every kind of format that + * rc_program_print() can output. If you are having problems with a + * particular string, you may need to add support for it to this functions. + * + * @param inst_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str) +{ + const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)"; + int i; + regmatch_t matches[REGEX_INST_MATCHES]; + struct inst_tokens tokens; + + /* Initialize inst */ + memset(inst, 0, sizeof(struct rc_instruction)); + inst->Type = RC_INSTRUCTION_NORMAL; + + /* Execute the regex */ + if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) { + return 0; + } + memset(&tokens, 0, sizeof(tokens)); + + /* Create Tokens */ + tokens.Opcode.String = inst_str + matches[1].rm_so; + tokens.Opcode.Length = match_length(matches, 1); + if (matches[2].rm_so > -1) { + tokens.Sat.String = inst_str + matches[2].rm_so; + tokens.Sat.Length = match_length(matches, 2); + } + + + /* Fill out the rest of the instruction. */ + for (i = 0; i < MAX_RC_OPCODE; i++) { + const struct rc_opcode_info * info = rc_get_opcode_info(i); + unsigned int first_src = 3; + unsigned int j; + if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) { + continue; + } + inst->U.I.Opcode = info->Opcode; + if (info->HasDstReg) { + char * dst_str; + tokens.Dst.String = inst_str + matches[3].rm_so; + tokens.Dst.Length = match_length(matches, 3); + first_src++; + + dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1)); + strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length); + dst_str[tokens.Dst.Length] = '\0'; + init_rc_normal_dst(inst, dst_str); + free(dst_str); + } + for (j = 0; j < info->NumSrcRegs; j++) { + char * src_str; + tokens.Srcs[j].String = + inst_str + matches[first_src + j].rm_so; + tokens.Srcs[j].Length = + match_length(matches, first_src + j); + + src_str = malloc(sizeof(char) * + (tokens.Srcs[j].Length + 1)); + strncpy(src_str, tokens.Srcs[j].String, + tokens.Srcs[j].Length); + src_str[tokens.Srcs[j].Length] = '\0'; + init_rc_normal_src(inst, j, src_str); + } + break; + } + return 1; +} diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h new file mode 100644 index 00000000000..1a6bf9699ba --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h @@ -0,0 +1,13 @@ + +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str); + +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str); + +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str); diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.c b/src/gallium/drivers/r300/compiler/tests/unit_test.c new file mode 100644 index 00000000000..266f3365c58 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/unit_test.c @@ -0,0 +1,35 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "unit_test.h" + +void run_tests(struct test tests[]) +{ + int i; + for (i = 0; tests[i].name; i++) { + printf("Test %s\n", tests[i].name); + memset(&tests[i].result, 0, sizeof(tests[i].result)); + tests[i].test_func(&tests[i].result); + printf("Test %s (%d/%d) pass\n", tests[i].name, + tests[i].result.pass, tests[i].result.test_count); + } +} + +void test_begin(struct test_result * result) +{ + result->test_count++; +} + +void test_check(struct test_result * result, int cond) +{ + printf("Subtest %u -> ", result->test_count); + if (cond) { + result->pass++; + printf("Pass"); + } else { + result->fail++; + printf("Fail"); + } + printf("\n"); +} diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.h b/src/gallium/drivers/r300/compiler/tests/unit_test.h new file mode 100644 index 00000000000..441e8b655a5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/unit_test.h @@ -0,0 +1,17 @@ + +struct test_result { + unsigned int test_count; + unsigned int pass; + unsigned int fail; +}; + +struct test { + const char * name; + void (*test_func)(struct test_result * result); + struct test_result result; +}; + +void run_tests(struct test tests[]); + +void test_begin(struct test_result * result); +void test_check(struct test_result * result, int cond); diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 388ebcdbf32..ddf5448a34b 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -247,7 +247,7 @@ static void r300_clear(struct pipe_context* pipe, if (!r300->hyperz_enabled) { r300->hyperz_enabled = r300->rws->cs_request_feature(r300->cs, - RADEON_FID_HYPERZ_RAM_ACCESS, + RADEON_FID_R300_HYPERZ_ACCESS, TRUE); if (r300->hyperz_enabled) { /* Need to emit HyperZ buffer regs for the first time. */ @@ -409,10 +409,11 @@ void r300_decompress_zmask(struct r300_context *r300) void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) { - struct pipe_framebuffer_state fb = {0}; + struct pipe_framebuffer_state fb; + + memset(&fb, 0, sizeof(fb)); fb.width = r300->locked_zbuffer->width; fb.height = r300->locked_zbuffer->height; - fb.nr_cbufs = 0; fb.zsbuf = r300->locked_zbuffer; r300->context.set_framebuffer_state(&r300->context, &fb); @@ -421,8 +422,9 @@ void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) void r300_decompress_zmask_locked(struct r300_context *r300) { - struct pipe_framebuffer_state saved_fb = {0}; + struct pipe_framebuffer_state saved_fb; + memset(&saved_fb, 0, sizeof(saved_fb)); util_copy_framebuffer_state(&saved_fb, r300->fb_state.state); r300_decompress_zmask_locked_unsafe(r300); r300->context.set_framebuffer_state(&r300->context, &saved_fb); @@ -443,8 +445,8 @@ static void r300_hw_copy_region(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_begin(r300, R300_COPY); - util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box, TRUE); + util_blitter_copy_texture(r300->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); r300_blitter_end(r300); } diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 571986c3011..80148b80afb 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -31,9 +31,9 @@ * Radeons. */ /* Parse a PCI ID and fill an r300_capabilities struct with information. */ -void r300_parse_chipset(struct r300_capabilities* caps) +void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps) { - switch (caps->pci_id) { + switch (pci_id) { #define CHIPSET(pci_id, name, chipfamily) \ case pci_id: \ caps->family = CHIP_FAMILY_##chipfamily; \ @@ -43,7 +43,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) default: fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...", - caps->pci_id); + pci_id); abort(); } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 4df6b5b6292..f96cdaf2580 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -43,16 +43,10 @@ enum r300_zmask_compression { /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { - /* PCI ID */ - uint32_t pci_id; /* Chipset family */ int family; /* The number of vertex floating-point units */ unsigned num_vert_fpus; - /* The number of fragment pipes */ - unsigned num_frag_pipes; - /* The number of z pipes */ - unsigned num_z_pipes; /* The number of texture units. */ unsigned num_tex_units; /* Whether or not TCL is physically present */ @@ -121,6 +115,6 @@ enum { CHIP_FAMILY_RV570 }; -void r300_parse_chipset(struct r300_capabilities* caps); +void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps); #endif /* R300_CHIPSET_H */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d94ac74f0e5..b304999d424 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -99,7 +99,7 @@ static void r300_destroy_context(struct pipe_context* context) struct r300_context* r300 = r300_context(context); if (r300->cs && r300->hyperz_enabled) { - r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, FALSE); + r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } if (r300->blitter) @@ -173,7 +173,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_6_0 = r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0); + boolean drm_2_6_0 = r300->screen->info.drm_minor >= 6; /* Create the actual atom list. * @@ -380,7 +380,7 @@ static void r300_init_states(struct pipe_context *pipe) if (r300->screen->caps.is_r500 || (r300->screen->caps.is_rv350 && - r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0))) { + r300->screen->info.drm_minor >= 6)) { OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); } END_CB; @@ -457,7 +457,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_render_functions(r300); r300_init_states(&r300->context); - rws->cs_set_flush(r300->cs, r300_flush_callback, r300); + rws->cs_set_flush_callback(r300->cs, r300_flush_callback, r300); /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this @@ -520,15 +520,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" "r300: GART size: %d MB, VRAM size: %d MB\n" "r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n", - rws->get_value(rws, RADEON_VID_DRM_MAJOR), - rws->get_value(rws, RADEON_VID_DRM_MINOR), - rws->get_value(rws, RADEON_VID_DRM_PATCHLEVEL), + r300->screen->info.drm_major, + r300->screen->info.drm_minor, + r300->screen->info.drm_patchlevel, screen->get_name(screen), - rws->get_value(rws, RADEON_VID_PCI_ID), - rws->get_value(rws, RADEON_VID_R300_GB_PIPES), - rws->get_value(rws, RADEON_VID_R300_Z_PIPES), - rws->get_value(rws, RADEON_VID_GART_SIZE) >> 20, - rws->get_value(rws, RADEON_VID_VRAM_SIZE) >> 20, + r300->screen->info.pci_id, + r300->screen->info.r300_num_gb_pipes, + r300->screen->info.r300_num_z_pipes, + r300->screen->info.gart_size >> 20, + r300->screen->info.vram_size >> 20, "YES", /* XXX really? */ r300->screen->caps.zmask_ram ? "YES" : "NO", r300->screen->caps.hiz_ram ? "YES" : "NO"); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index d214af4cd5b..b953bd10f43 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -574,11 +574,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; + uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes; CS_LOCALS(r300); - assert(caps->num_frag_pipes); + assert(gb_pipes); - BEGIN_CS(6 * caps->num_frag_pipes + 2); + BEGIN_CS(6 * gb_pipes + 2); /* I'm not so sure I like this switch, but it's hard to be elegant * when there's so many special cases... * @@ -587,7 +588,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, * 4-byte offset for each pipe. RV380 and older are special; they have * only two pipes, and the second pipe's enable is on bit 3, not bit 1, * so there's a chipset cap for that. */ - switch (caps->num_frag_pipes) { + switch (gb_pipes) { case 4: /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); @@ -613,7 +614,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" - " pixel pipes!\n", caps->num_frag_pipes); + " pixel pipes!\n", gb_pipes); abort(); } @@ -663,7 +664,7 @@ void r300_emit_query_end(struct r300_context* r300) return; if (caps->family == CHIP_FAMILY_RV530) { - if (caps->num_z_pipes == 2) + if (r300->screen->info.r300_num_z_pipes == 2) rv530_emit_query_end_double_z(r300, query); else rv530_emit_query_end_single_z(r300, query); @@ -1237,13 +1238,12 @@ validate: r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, r300_resource(index_buffer)->domain, 0); - /* Now do the validation. */ + /* Now do the validation (flush is called inside cs_validate on failure). */ if (!r300->rws->cs_validate(r300->cs)) { /* Ooops, an infinite loop, give up. */ if (flushed) return FALSE; - r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL); flushed = TRUE; goto validate; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 6c1c9d2fb13..234e043b071 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -24,7 +24,6 @@ #define R300_EMIT_H #include "r300_context.h" -#include "radeon_code.h" struct rX00_fragment_program_code; struct r300_vertex_program_code; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 34f5419a864..dc596c4122a 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -76,7 +76,6 @@ void r300_flush(struct pipe_context *pipe, /* Create a fence, which is a dummy BO. */ *rfence = r300->rws->buffer_create(r300->rws, 1, 1, PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STATIC, RADEON_DOMAIN_GTT); /* Add the fence as a dummy relocation. */ r300->rws->cs_add_reloc(r300->cs, @@ -121,7 +120,7 @@ void r300_flush(struct pipe_context *pipe, } /* Release HyperZ. */ - r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, + r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } r300->num_z_clears = 0; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index e3a1bc4a0f4..6f21125f70a 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -38,8 +38,7 @@ #include "r300_texture.h" #include "r300_tgsi_to_rc.h" -#include "radeon_code.h" -#include "radeon_compiler.h" +#include "compiler/radeon_compiler.h" /* Convert info about FS input semantics to r300_shader_semantics. */ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, @@ -181,9 +180,10 @@ static void get_external_state( v->base.format == PIPE_FORMAT_LATC1_SNORM) { unsigned char swizzle[4]; - util_format_combine_swizzles(swizzle, + util_format_compose_swizzles( util_format_description(v->base.format)->swizzle, - v->swizzle); + v->swizzle, + swizzle); state->unit[i].texture_swizzle = RC_MAKE_SWIZZLE(swizzle[0], swizzle[1], diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index c86a90b85ae..45c9e8801c3 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -27,7 +27,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -#include "radeon_code.h" +#include "compiler/radeon_code.h" #include "r300_shader_semantics.h" struct r300_fragment_shader_code { diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 782f041e926..c0357f9d035 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -49,16 +49,15 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->buffer_size = 4096; if (r300screen->caps.family == CHIP_FAMILY_RV530) - q->num_pipes = r300screen->caps.num_z_pipes; + q->num_pipes = r300screen->info.r300_num_z_pipes; else - q->num_pipes = r300screen->caps.num_frag_pipes; + q->num_pipes = r300screen->info.r300_num_gb_pipes; insert_at_tail(&r300->query_list, q); /* Open up the occlusion query buffer. */ q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, - PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, - q->domain); + PIPE_BIND_CUSTOM, q->domain); q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf); return (struct pipe_query*)q; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index bb30b1ab0be..5edbb22a743 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2078,7 +2078,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_OUTC_D2A (3 << 23) # define R300_ALU_OUTC_MIN (4 << 23) # define R300_ALU_OUTC_MAX (5 << 23) -# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CND (7 << 23) # define R300_ALU_OUTC_CMP (8 << 23) # define R300_ALU_OUTC_FRC (9 << 23) # define R300_ALU_OUTC_REPL_ALPHA (10 << 23) @@ -2944,6 +2944,23 @@ enum { /*\}*/ +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + /* BEGIN: Packet 3 commands */ /* A primitive emission dword. */ @@ -3249,6 +3266,8 @@ enum { # define R500_INST_RGB_CLAMP (1 << 19) # define R500_INST_ALPHA_CLAMP (1 << 20) # define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALU_RESULT_SEL_RED (0 << 21) +# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21) # define R500_INST_ALPHA_PRED_INV (1 << 22) # define R500_INST_ALU_RESULT_OP_EQ (0 << 23) # define R500_INST_ALU_RESULT_OP_LT (1 << 23) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b31141a518e..d69b4cf4275 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -1267,33 +1267,31 @@ done: r300->sprite_coord_enable = last_sprite_coord_enable; } -static void r300_resource_resolve(struct pipe_context* pipe, - struct pipe_resource* dest, - unsigned dst_layer, - struct pipe_resource* src, - unsigned src_layer) +static void r300_resource_resolve(struct pipe_context *pipe, + const struct pipe_resolve_info *info) { - struct r300_context* r300 = r300_context(pipe); - struct pipe_surface* srcsurf, surf_tmpl; + struct r300_context *r300 = r300_context(pipe); + struct pipe_surface *srcsurf, *dstsurf, surf_tmpl; struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; float color[] = {0, 0, 0, 0}; memset(&surf_tmpl, 0, sizeof(surf_tmpl)); - surf_tmpl.format = src->format; - surf_tmpl.usage = 0; /* not really a surface hence no bind flags */ - surf_tmpl.u.tex.level = 0; /* msaa resources cannot have mipmaps */ - surf_tmpl.u.tex.first_layer = src_layer; - surf_tmpl.u.tex.last_layer = src_layer; - srcsurf = pipe->create_surface(pipe, src, &surf_tmpl); - surf_tmpl.format = dest->format; - surf_tmpl.u.tex.first_layer = dst_layer; - surf_tmpl.u.tex.last_layer = dst_layer; + surf_tmpl.format = info->src.res->format; + surf_tmpl.u.tex.first_layer = + surf_tmpl.u.tex.last_layer = info->src.layer; + srcsurf = pipe->create_surface(pipe, info->src.res, &surf_tmpl); + /* XXX Offset both surfaces by x0,y1. */ + + surf_tmpl.format = info->dst.res->format; + surf_tmpl.u.tex.level = info->dst.level; + surf_tmpl.u.tex.first_layer = + surf_tmpl.u.tex.last_layer = info->dst.layer; + dstsurf = pipe->create_surface(pipe, info->dst.res, &surf_tmpl); DBG(r300, DBG_DRAW, "r300: Resolving resource...\n"); /* Enable AA resolve. */ - aa->dest = r300_surface(pipe->create_surface(pipe, dest, &surf_tmpl)); - + aa->dest = r300_surface(dstsurf); aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; @@ -1301,16 +1299,19 @@ static void r300_resource_resolve(struct pipe_context* pipe, r300_mark_atom_dirty(r300, &r300->aa_state); /* Resolve the surface. */ + /* XXX: y1 < 0 ==> Y flip */ r300->context.clear_render_target(pipe, - srcsurf, color, 0, 0, src->width0, src->height0); + srcsurf, color, 0, 0, + info->dst.x1 - info->dst.x0, + info->dst.y1 - info->dst.y0); /* Disable AA resolve. */ aa->aaresolve_ctl = 0; r300->aa_state.size = 4; r300_mark_atom_dirty(r300, &r300->aa_state); - pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); - pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL); + pipe_surface_reference(&srcsurf, NULL); + pipe_surface_reference(&dstsurf, NULL); } void r300_init_render_functions(struct r300_context *r300) diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index c8df45fb3e7..8c0500c7dfd 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -142,6 +142,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_SCALED_RESOLVE: return 0; /* SWTCL-only features. */ @@ -211,13 +212,12 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_PREDS: return is_r500 ? 1 : 0; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 0; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 0; case PIPE_SHADER_CAP_SUBROUTINES: + case PIPE_SHADER_CAP_INTEGERS: return 0; } break; @@ -248,18 +248,15 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e return 1; /* XXX guessed */ case PIPE_SHADER_CAP_MAX_PREDS: return is_r500 ? 4 : 0; /* XXX guessed. */ + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 0; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: - return 0; - case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; case PIPE_SHADER_CAP_SUBROUTINES: + case PIPE_SHADER_CAP_INTEGERS: return 0; - default: - break; } break; default: @@ -316,6 +313,8 @@ static int r300_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(screen, profile); default: return 0; } @@ -327,9 +326,8 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, unsigned sample_count, unsigned usage) { - struct radeon_winsys *rws = r300_screen(screen)->rws; uint32_t retval = 0; - boolean drm_2_8_0 = rws->get_value(rws, RADEON_VID_DRM_2_8_0); + boolean drm_2_8_0 = r300_screen(screen)->info.drm_minor >= 8; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -458,7 +456,7 @@ static boolean r300_fence_signalled(struct pipe_screen *screen, struct radeon_winsys *rws = r300_screen(screen)->rws; struct pb_buffer *rfence = (struct pb_buffer*)fence; - return !rws->buffer_is_busy(rfence); + return !rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE); } static boolean r300_fence_finish(struct pipe_screen *screen, @@ -475,7 +473,7 @@ static boolean r300_fence_finish(struct pipe_screen *screen, timeout /= 1000; /* Wait in a loop. */ - while (rws->buffer_is_busy(rfence)) { + while (rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) { if (os_time_get() - start_time >= timeout) { return FALSE; } @@ -484,7 +482,7 @@ static boolean r300_fence_finish(struct pipe_screen *screen, return TRUE; } - rws->buffer_wait(rfence); + rws->buffer_wait(rfence, RADEON_USAGE_READWRITE); return TRUE; } @@ -497,19 +495,17 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws) return NULL; } - r300screen->caps.pci_id = rws->get_value(rws, RADEON_VID_PCI_ID); - r300screen->caps.num_frag_pipes = rws->get_value(rws, RADEON_VID_R300_GB_PIPES); - r300screen->caps.num_z_pipes = rws->get_value(rws, RADEON_VID_R300_Z_PIPES); + rws->query_info(rws, &r300screen->info); r300_init_debug(r300screen); - r300_parse_chipset(&r300screen->caps); + r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps); if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK)) r300screen->caps.zmask_ram = 0; if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) r300screen->caps.hiz_ram = 0; - if (!rws->get_value(rws, RADEON_VID_DRM_2_8_0)) + if (r300screen->info.drm_minor < 8) r300screen->caps.has_us_format = FALSE; pipe_mutex_init(r300screen->num_contexts_mutex); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index e5c53bf3500..82b2068e7a0 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -24,23 +24,20 @@ #ifndef R300_SCREEN_H #define R300_SCREEN_H -#include "pipe/p_screen.h" - #include "r300_chipset.h" - +#include "../../winsys/radeon/drm/radeon_winsys.h" +#include "pipe/p_screen.h" #include "util/u_slab.h" - #include <stdio.h> -struct radeon_winsys; - struct r300_screen { /* Parent class */ struct pipe_screen screen; struct radeon_winsys *rws; - /* Chipset capabilities */ + /* Chipset info and capabilities. */ + struct radeon_info info; struct r300_capabilities caps; /* Memory pools. */ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 4154c81512e..c751a943b96 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -201,8 +201,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.b.width0, alignment, - rbuf->b.b.b.bind, rbuf->b.b.b.usage, - rbuf->domain); + rbuf->b.b.b.bind, rbuf->domain); if (!rbuf->buf) { util_slab_free(&r300screen->pool_buffers, rbuf); return NULL; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index f63114e7eb7..45c11fce1fe 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -605,7 +605,6 @@ static uint32_t r300_get_border_color(enum pipe_format format, { const struct util_format_description *desc; float border_swizzled[4] = {0}; - unsigned i; union util_color uc = {0}; desc = util_format_description(format); @@ -629,22 +628,7 @@ static uint32_t r300_get_border_color(enum pipe_format format, } /* Apply inverse swizzle of the format. */ - for (i = 0; i < 4; i++) { - switch (desc->swizzle[i]) { - case UTIL_FORMAT_SWIZZLE_X: - border_swizzled[0] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_Y: - border_swizzled[1] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_Z: - border_swizzled[2] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_W: - border_swizzled[3] = border[i]; - break; - } - } + util_format_unswizzle_4f(border_swizzled, border, desc->swizzle); /* Compressed formats. */ if (util_format_is_compressed(format)) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 62c2f1fff6c..fc84004fb97 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -38,18 +38,6 @@ #include "pipe/p_screen.h" -void util_format_combine_swizzles(unsigned char *dst, - const unsigned char *swz1, - const unsigned char *swz2) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ? - swz1[swz2[i]] : swz2[i]; - } -} - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle) @@ -72,7 +60,7 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, if (swizzle_view) { /* Combine two sets of swizzles. */ - util_format_combine_swizzles(swizzle, swizzle_format, swizzle_view); + util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); } else { memcpy(swizzle, swizzle_format, 4); } @@ -926,7 +914,7 @@ r300_texture_create_object(struct r300_screen *rscreen, if (!buffer) { tex->buf_size = tex->tex.size_in_bytes; tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, - base->bind, base->usage, tex->domain); + base->bind, tex->domain); if (!tex->buf) { FREE(tex); diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 4586bb2e4dc..158a387478f 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,10 +35,6 @@ struct r300_texture_desc; struct r300_resource; struct r300_screen; -void util_format_combine_swizzles(unsigned char *dst, - const unsigned char *swz1, - const unsigned char *swz2); - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index da5778be65e..fe4f8dd5679 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -360,9 +360,9 @@ static void r300_setup_hyperz_properties(struct r300_screen *screen, unsigned i, pipes; if (screen->caps.family == CHIP_FAMILY_RV530) { - pipes = screen->caps.num_z_pipes; + pipes = screen->info.r300_num_z_pipes; } else { - pipes = screen->caps.num_frag_pipes; + pipes = screen->info.r300_num_gb_pipes; } for (i = 0; i <= tex->b.b.b.last_level; i++) { diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 0561ab9bfa4..07a3f3caee7 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -22,8 +22,7 @@ #include "r300_tgsi_to_rc.h" -#include "radeon_compiler.h" -#include "radeon_program.h" +#include "compiler/radeon_compiler.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index e2ea4cbf6c5..65964020adc 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -97,7 +97,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, referenced_hw = TRUE; } else { referenced_hw = - r300->rws->buffer_is_busy(tex->buf); + r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE); } blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index b319890157f..a5e8fd680ff 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -32,7 +32,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" -#include "radeon_compiler.h" +#include "compiler/radeon_compiler.h" /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 170de6c79db..a482ddce9c9 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -26,7 +26,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -#include "radeon_code.h" +#include "compiler/radeon_code.h" #include "r300_context.h" #include "r300_shader_semantics.h" diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk new file mode 100644 index 00000000000..994ae07789c --- /dev/null +++ b/src/gallium/drivers/r600/Android.mk @@ -0,0 +1,42 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_CFLAGS := -std=c99 + +LOCAL_C_INCLUDES := \ + $(DRM_TOP) \ + $(DRM_TOP)/include/drm + +LOCAL_MODULE := libmesa_pipe_r600 + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index 7e21e3e32b1..0e68fe99345 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -6,20 +6,7 @@ LIBNAME = r600 LIBRARY_INCLUDES = \ $(shell pkg-config libdrm --cflags-only-I) -C_SOURCES = \ - r600_asm.c \ - r600_blit.c \ - r600_buffer.c \ - r600_pipe.c \ - r600_query.c \ - r600_resource.c \ - r600_shader.c \ - r600_state.c \ - r600_texture.c \ - r700_asm.c \ - evergreen_state.c \ - eg_asm.c \ - r600_translate.c \ - r600_state_common.c +# get C_SOURCES +include Makefile.sources include ../../Makefile.template diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources new file mode 100644 index 00000000000..0366394e5a2 --- /dev/null +++ b/src/gallium/drivers/r600/Makefile.sources @@ -0,0 +1,15 @@ +C_SOURCES := \ + r600_asm.c \ + r600_blit.c \ + r600_buffer.c \ + r600_pipe.c \ + r600_query.c \ + r600_resource.c \ + r600_shader.c \ + r600_state.c \ + r600_texture.c \ + r700_asm.c \ + evergreen_state.c \ + eg_asm.c \ + r600_translate.c \ + r600_state_common.c diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 19f07b2bef8..be12255e4d0 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -11,22 +11,8 @@ env.Append(CPPPATH = [ r600 = env.ConvenienceLibrary( target = 'r600', - source = [ - 'r600_asm.c', - 'r600_buffer.c', - 'r600_blit.c', - 'r600_pipe.c', - 'r600_query.c', - 'r600_resource.c', - 'r600_shader.c', - 'r600_state.c', - 'r600_state_common.c', - 'r600_texture.c', - 'r600_translate.c', - 'r700_asm.c', - 'evergreen_state.c', - 'eg_asm.c', - ]) + source = env.ParseSourceList('Makefile.sources', 'C_SOURCES') + ) env.Alias('r600', r600) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index c95872b0809..ca25b341ffd 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -29,7 +29,7 @@ #include "r600_opcodes.h" #include "evergreend.h" -int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) { unsigned id = cf->id; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4605c833dea..2135b8ac580 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -262,22 +262,16 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028040_Z_16; case PIPE_FORMAT_Z24X8_UNORM: - return V_028040_Z_24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028040_Z_24; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028040_Z_32_FLOAT; default: return ~0U; } } -static uint32_t r600_translate_stencilformat(enum pipe_format format) -{ - if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) - return 1; - else - return 0; -} - static uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { @@ -360,6 +354,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -369,6 +364,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -453,7 +449,11 @@ static uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return V_028C70_COLOR_24_8; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028C70_COLOR_X24_8_32_FLOAT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_028C70_COLOR_32_FLOAT; case PIPE_FORMAT_R16G16_FLOAT: @@ -541,6 +541,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_32_FLOAT: case V_028C70_COLOR_32_32: + case V_028C70_COLOR_X24_8_32_FLOAT: return ENDIAN_8IN32; /* 96-bit buffers. */ @@ -638,10 +639,10 @@ static void evergreen_set_blend_color(struct pipe_context *ctx, return; rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; @@ -686,13 +687,13 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, blend->cb_target_mask = target_mask; r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFD, NULL); + color_control, 0xFFFFFFFD, NULL, 0); if (rctx->chip_class != CAYMAN) - r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); else { - r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); } for (int i = 0; i < 8; i++) { @@ -723,7 +724,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } } for (int i = 0; i < 8; i++) { - r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL, 0); } return rstate; @@ -791,27 +792,27 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, stencil_ref_mask, - 0xFFFFFFFF & C_028430_STENCILREF, NULL); + 0xFFFFFFFF & C_028430_STENCILREF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, - 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0); /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by * evergreen_pipe_shader_ps().*/ - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); - r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -856,7 +857,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); } } - r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0); polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL); @@ -870,44 +871,44 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | S_028814_POLY_MODE(polygon_dual_mode) | S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); /* point size 12.4 fixed point */ tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0); tmp = (unsigned)state->line_width * 8; - r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); if (rctx->chip_class == CAYMAN) { - r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); } else { - r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } - r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -933,22 +934,22 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) | S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) | S_03C008_TYPE(1), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); if (uc.ui) { - r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0); } return rstate; } @@ -957,43 +958,39 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); + struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view); struct r600_pipe_resource_state *rstate; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; + struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture; struct r600_resource *rbuffer; unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; - if (resource == NULL) + if (view == NULL) return NULL; - rstate = &resource->state; + rstate = &view->state; /* initialize base object */ - resource->base = *state; - resource->base.texture = NULL; + view->base = *state; + view->base.texture = NULL; pipe_reference(NULL, &texture->reference); - resource->base.texture = texture; - resource->base.reference.count = 1; - resource->base.context = ctx; + view->base.texture = texture; + view->base.reference.count = 1; + view->base.context = ctx; swizzle[0] = state->swizzle_r; swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { format = 0; } - desc = util_format_description(state->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", state->format); - } - tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; @@ -1016,6 +1013,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->bo[0] = bo[0]; rstate->bo[1] = bo[1]; + rstate->bo_usage[0] = RADEON_USAGE_READ; + rstate->bo_usage[1] = RADEON_USAGE_READ; rstate->val[0] = (S_030000_DIM(r600_tex_dim(texture->target)) | S_030000_PITCH((pitch / 8) - 1) | S_030000_NON_DISP_TILING_ORDER(tile_type) | @@ -1023,8 +1022,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->val[1] = (S_030004_TEX_HEIGHT(texture->height0 - 1) | S_030004_TEX_DEPTH(texture->depth0 - 1) | S_030004_ARRAY_MODE(array_mode)); - rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = tmp->offset[0] >> 8; + rstate->val[3] = tmp->offset[1] >> 8; rstate->val[4] = (word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_030010_ENDIAN_SWAP(endian) | @@ -1036,7 +1035,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->val[7] = (S_03001C_DATA_FORMAT(format) | S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)); - return &resource->base; + return &view->base; } static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, @@ -1131,21 +1130,21 @@ static void evergreen_set_clip_state(struct pipe_context *ctx, for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, R_0285BC_PA_CL_UCP0_X + i * 16, - fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0285C0_PA_CL_UCP0_Y + i * 16, - fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0285C4_PA_CL_UCP0_Z + i * 16, - fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0285C8_PA_CL_UCP0_W + i * 16, - fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_CLIP]); rctx->states[R600_PIPE_STATE_CLIP] = rstate; @@ -1176,28 +1175,28 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx, br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -1219,11 +1218,11 @@ static void evergreen_set_stencil_ref(struct pipe_context *ctx, tmp = S_028430_STENCILREF(state->ref_value[0]); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, tmp, - ~C_028430_STENCILREF, NULL); + ~C_028430_STENCILREF, NULL, 0); tmp = S_028434_STENCILREF_BF(state->ref_value[1]); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, tmp, - ~C_028434_STENCILREF_BF, NULL); + ~C_028434_STENCILREF_BF, NULL, 0); free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; @@ -1241,15 +1240,15 @@ static void evergreen_set_viewport_state(struct pipe_context *ctx, rctx->viewport = *state; rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_VIEWPORT]); rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; @@ -1354,86 +1353,82 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, - 0x0, 0xFFFFFFFF, NULL); + 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C70_CB_COLOR0_INFO + cb * 0x3C, - color_info, 0xFFFFFFFF, bo[0]); + color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028C64_CB_COLOR0_PITCH + cb * 0x3C, S_028C64_PITCH_TILE_MAX(pitch), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C68_CB_COLOR0_SLICE + cb * 0x3C, S_028C68_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, S_028C74_NON_DISP_TILING_ORDER(tile_type), - 0xFFFFFFFF, bo[0]); + 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); } static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, - const struct pipe_framebuffer_state *state) + const struct pipe_framebuffer_state *state) { struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; struct r600_surface *surf; - unsigned level; - unsigned pitch, slice, format, stencil_format; + unsigned level, first_layer; + unsigned pitch, slice, format; unsigned offset; if (state->zsbuf == NULL) return; - level = state->zsbuf->u.tex.level; - surf = (struct r600_surface *)state->zsbuf; - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - - rbuffer = &rtex->resource; + rtex = (struct r600_resource_texture*)surf->base.texture; - /* XXX quite sure for dx10+ hw don't need any offset hacks */ - offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, - level, state->zsbuf->u.tex.first_layer); + level = surf->base.u.tex.level; + first_layer = surf->base.u.tex.first_layer; + offset = r600_texture_get_offset(rtex, level, first_layer); pitch = rtex->pitch_in_blocks[level] / 8 - 1; slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); + format = r600_translate_dbformat(rtex->real_format); r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE); + r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0); - if (stencil_format) { - uint32_t stencil_offset; + if (rtex->stencil) { + uint32_t stencil_offset = + r600_texture_get_offset(rtex->stencil, level, first_layer); - stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255; r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, - (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + stencil_offset >> 8, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, - (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + stencil_offset >> 8, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE); + r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, + 1, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE); + } else { + r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, + 0, 0xFFFFFFFF, NULL, RADEON_USAGE_READWRITE); } - r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, - S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format), - 0xFFFFFFFF, rbuffer->bo); + 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } static void evergreen_set_framebuffer_state(struct pipe_context *ctx, @@ -1492,49 +1487,49 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028244_PA_SC_GENERIC_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, - 0x00000000, target_mask, NULL); + 0x00000000, target_mask, NULL, 0); r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, - shader_mask, 0xFFFFFFFF, NULL); + shader_mask, 0xFFFFFFFF, NULL, 0); if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); } else { r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); } free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); @@ -1609,78 +1604,78 @@ static void cayman_init_config(struct r600_pipe_context *rctx) tmp = 0x00000000; tmp |= S_008C00_EXPORT_SRC_C(1); - r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0); /* always set the temp clauses */ - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, 0); - r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, 0); - - r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xffffffff, NULL); - r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xffffffff, NULL); - - r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xffffffff, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, NULL, 0); + + r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -1964,39 +1959,39 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_VS_PRIO(vs_prio); tmp |= S_008C00_GS_PRIO(gs_prio); tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0); /* enable dynamic GPR resource management */ if (r600_get_minor_version(rctx->radeon) >= 7) { /* always set temp clauses */ r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, - S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); + S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, S_028838_PS_GPRS(0x1e) | S_028838_VS_GPRS(0x1e) | S_028838_GS_GPRS(0x1e) | S_028838_ES_GPRS(0x1e) | S_028838_HS_GPRS(0x1e) | - S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ + S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL, 0); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ } else { tmp = 0; tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs); - r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0); } tmp = 0; @@ -2004,109 +1999,109 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads); tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads); tmp |= S_008C18_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C1C_NUM_HS_THREADS(num_hs_threads); tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads); - r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008E2C_NUM_PS_LDS(0x1000); tmp |= S_008E2C_NUM_LS_LDS(0x1000); - r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL, 0); #if 0 - r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL, 0); #endif - r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -2128,6 +2123,7 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); @@ -2143,19 +2139,19 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); r600_pipe_state_add_reg(&state, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &state); } } @@ -2252,32 +2248,32 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader S_0286E0_LINEAR_CENTROID_ENA(have_centroid); r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, - spi_ps_in_control_0, 0xFFFFFFFF, NULL); + spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, - spi_ps_in_control_1, 0xFFFFFFFF, NULL); + spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, - 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_028844_SQ_PGM_RESOURCES_PS, S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | S_028844_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028848_SQ_PGM_RESOURCES_2_PS, - 0x0, 0xFFFFFFFF, NULL); + 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); + exports_ps, 0xFFFFFFFF, NULL, 0); /* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */ /* only set some bits here, the other bits are set in the dsa state */ r600_pipe_state_add_reg(rstate, @@ -2286,10 +2282,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader S_02880C_Z_EXPORT_ENABLE(1) | S_02880C_STENCIL_EXPORT_ENABLE(1) | S_02880C_KILL_ENABLE(1), - NULL); + NULL, 0); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -2298,7 +2294,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; - unsigned i, tmp; + unsigned i, tmp, nparams; /* clear previous register */ rstate->nregs = 0; @@ -2314,28 +2310,36 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader for (i = 0; i < 10; i++) { r600_pipe_state_add_reg(rstate, R_02861C_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); + spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0); } + /* Certain attributes (position, psize, etc.) don't count as params. + * VS is required to export at least one param and r600_shader_from_tgsi() + * takes care of adding a dummy export. + */ + nparams = rshader->noutput - rshader->npos; + if (nparams < 1) + nparams = 1; + r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); + S_0286C4_VS_EXPORT_COUNT(nparams - 1), + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028860_SQ_PGM_RESOURCES_VS, S_028860_NUM_GPRS(rshader->bc.ngpr) | S_028860_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028864_SQ_PGM_RESOURCES_2_VS, - 0x0, 0xFFFFFFFF, NULL); + 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void evergreen_fetch_shader(struct pipe_context *ctx, @@ -2346,10 +2350,10 @@ void evergreen_fetch_shader(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(ve->fetch_shader)) >> 8, - 0xFFFFFFFF, ve->fetch_shader); + 0, + 0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ); } void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) @@ -2363,7 +2367,7 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, 0x0, - S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0); r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, S_028000_DEPTH_COPY_ENABLE(1) | @@ -2371,7 +2375,7 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028000_COPY_CENTROID(1), S_028000_DEPTH_COPY_ENABLE(1) | S_028000_STENCIL_COPY_ENABLE(1) | - S_028000_COPY_CENTROID(1), NULL); + S_028000_COPY_CENTROID(1), NULL, 0); return rstate; } @@ -2397,9 +2401,11 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + unsigned offset, unsigned stride, + enum radeon_bo_usage usage) { rstate->bo[0] = rbuffer->bo; + rstate->bo_usage[0] = usage; rstate->val[0] = offset; rstate->val[1] = rbuffer->bo_size - offset - 1; rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 96dbd4da91b..9a8c353e4ee 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -1697,6 +1697,10 @@ #define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL 0x00028A3C #define R_028A48_PA_SC_MODE_CNTL_0 0x00028A48 #define R_028A4C_PA_SC_MODE_CNTL_1 0x00028A4C +#define R_028A94_VGT_MULTI_PRIM_IB_RESET_EN 0x00028A94 +#define S_028A94_RESET_EN(x) (((x) & 0x1) << 0) +#define G_028A94_RESET_EN(x) (((x) >> 0) & 0x1) +#define C_028A94_RESET_EN 0xFFFFFFFE #define R_028AB4_VGT_REUSE_OFF 0x00028AB4 #define R_028AB8_VGT_VTX_CNT_EN 0x00028AB8 #define R_028ABC_DB_HTILE_SURFACE 0x00028ABC diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 61adc7ed988..f24146edcf1 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -26,14 +26,8 @@ #ifndef R600_H #define R600_H -#include <assert.h> -#include <stdint.h> -#include <stdio.h> -#include <util/u_double_list.h> -#include <util/u_inlines.h> -#include <pipe/p_compiler.h> - -#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) +#include "../../winsys/radeon/drm/radeon_winsys.h" +#include "util/u_double_list.h" #define R600_ERR(fmt, args...) \ fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) @@ -94,31 +88,32 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); unsigned r600_get_clock_crystal_freq(struct radeon *radeon); unsigned r600_get_minor_version(struct radeon *radeon); unsigned r600_get_num_backends(struct radeon *radeon); +unsigned r600_get_num_tile_pipes(struct radeon *radeon); +unsigned r600_get_backend_map(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; +struct radeon_winsys_cs; + struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned binding, unsigned usage); -struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode); -void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); +struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle, + unsigned *stride, unsigned *array_mode); +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, - unsigned stride, struct winsys_handle *whandle); -static INLINE unsigned r600_bo_offset(struct r600_bo *bo) -{ - return 0; -} -void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); + unsigned stride, struct winsys_handle *whandle); + +void r600_bo_destroy(struct r600_bo *bo); /* this relies on the pipe_reference being the first member of r600_bo */ -static INLINE void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src) +static INLINE void r600_bo_reference(struct r600_bo **dst, struct r600_bo *src) { struct r600_bo *old = *dst; if (pipe_reference((struct pipe_reference *)(*dst), (struct pipe_reference *)src)) { - r600_bo_destroy(radeon, old); + r600_bo_destroy(old); } *dst = src; } @@ -145,6 +140,7 @@ struct r600_pipe_reg { u32 mask; struct r600_block *block; struct r600_bo *bo; + enum radeon_bo_usage bo_usage; u32 id; }; @@ -157,7 +153,8 @@ struct r600_pipe_state { struct r600_pipe_resource_state { unsigned id; u32 val[8]; - struct r600_bo *bo[2]; + struct r600_bo *bo[2]; + enum radeon_bo_usage bo_usage[2]; /* XXX set these */ }; #define R600_BLOCK_STATUS_ENABLED (1 << 0) @@ -168,6 +165,7 @@ struct r600_pipe_resource_state { struct r600_block_reloc { struct r600_bo *bo; + enum radeon_bo_usage bo_usage; unsigned flush_flags; unsigned flush_mask; unsigned bo_pm4_index; @@ -195,18 +193,6 @@ struct r600_range { }; /* - * relocation - */ -#pragma pack(1) -struct r600_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -/* * query */ struct r600_query { @@ -243,6 +229,8 @@ struct r600_query { struct r600_context { struct radeon *radeon; + struct radeon_winsys_cs *cs; + struct r600_range *range; unsigned nblocks; struct r600_block **blocks; @@ -250,18 +238,19 @@ struct r600_context { struct list_head resource_dirty; struct list_head enable_list; unsigned pm4_ndwords; - unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; unsigned ctx_pm4_ndwords; unsigned init_dwords; - unsigned nreloc; + unsigned creloc; - struct r600_reloc *reloc; - struct radeon_bo **bo; + struct r600_bo **bo; + u32 *pm4; + unsigned pm4_cdwords; + struct list_head query_list; unsigned num_query_running; - struct list_head fenced_bo; + unsigned backend_mask; unsigned max_db; /* for OQ */ unsigned num_dest_buffers; unsigned flags; @@ -282,6 +271,7 @@ struct r600_draw { struct r600_bo *indices; }; +void r600_get_backend_mask(struct r600_context *ctx); int r600_context_init(struct r600_context *ctx, struct radeon *radeon); void r600_context_fini(struct r600_context *ctx); void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state); @@ -290,8 +280,7 @@ void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r6 void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); -void r600_context_flush(struct r600_context *ctx); -void r600_context_dump_bof(struct r600_context *ctx, const char *file); +void r600_context_flush(struct r600_context *ctx, unsigned flags); void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw); struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type); @@ -319,18 +308,21 @@ void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, stru void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); -struct radeon *radeon_decref(struct radeon *radeon); +struct radeon *radeon_destroy(struct radeon *radeon); void _r600_pipe_state_add_reg(struct r600_context *ctx, struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, u32 range_id, u32 block_id, - struct r600_bo *bo); + struct r600_bo *bo, + enum radeon_bo_usage usage); void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, - struct r600_bo *bo); -#define r600_pipe_state_add_reg(state, offset, value, mask, bo) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo) + struct r600_bo *bo, + enum radeon_bo_usage usage); + +#define r600_pipe_state_add_reg(state, offset, value, mask, bo, usage) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo, usage) static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state, u32 value) @@ -340,10 +332,12 @@ static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state, } static inline void r600_pipe_state_mod_reg_bo(struct r600_pipe_state *state, - u32 value, struct r600_bo *bo) + u32 value, struct r600_bo *bo, + enum radeon_bo_usage usage) { state->regs[state->nregs].value = value; state->regs[state->nregs].bo = bo; + state->regs[state->nregs].bo_usage = usage; state->nregs++; } diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 5fae2b00c8b..27febdf9d03 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -36,7 +36,7 @@ #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 -static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu) +static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { if(alu->is_op3) return 3; @@ -88,6 +88,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: return 1; default: R600_ERR( "Need instruction operand number for 0x%x.\n", alu->inst); @@ -140,6 +141,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: return 1; default: R600_ERR( "Need instruction operand number for 0x%x.\n", alu->inst); @@ -150,11 +152,11 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r return 3; } -int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); +int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); -static struct r600_bc_cf *r600_bc_cf(void) +static struct r600_bytecode_cf *r600_bytecode_cf(void) { - struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf); + struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf); if (cf == NULL) return NULL; @@ -165,9 +167,9 @@ static struct r600_bc_cf *r600_bc_cf(void) return cf; } -static struct r600_bc_alu *r600_bc_alu(void) +static struct r600_bytecode_alu *r600_bytecode_alu(void) { - struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu); + struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu); if (alu == NULL) return NULL; @@ -175,9 +177,9 @@ static struct r600_bc_alu *r600_bc_alu(void) return alu; } -static struct r600_bc_vtx *r600_bc_vtx(void) +static struct r600_bytecode_vtx *r600_bytecode_vtx(void) { - struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx); + struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx); if (vtx == NULL) return NULL; @@ -185,9 +187,9 @@ static struct r600_bc_vtx *r600_bc_vtx(void) return vtx; } -static struct r600_bc_tex *r600_bc_tex(void) +static struct r600_bytecode_tex *r600_bytecode_tex(void) { - struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex); + struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex); if (tex == NULL) return NULL; @@ -195,15 +197,15 @@ static struct r600_bc_tex *r600_bc_tex(void) return tex; } -void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class) +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class) { LIST_INITHEAD(&bc->cf); bc->chip_class = chip_class; } -static int r600_bc_add_cf(struct r600_bc *bc) +static int r600_bytecode_add_cf(struct r600_bytecode *bc) { - struct r600_bc_cf *cf = r600_bc_cf(); + struct r600_bytecode_cf *cf = r600_bytecode_cf(); if (cf == NULL) return -ENOMEM; @@ -217,7 +219,7 @@ static int r600_bc_add_cf(struct r600_bc *bc) return 0; } -int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) +int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output) { int r; @@ -252,16 +254,16 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) } } - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) return r; bc->cf_last->inst = output->inst; - memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output)); + memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output)); return 0; } /* alu instructions that can ony exits once per group */ -static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -342,7 +344,7 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } -static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -363,7 +365,7 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } -static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_cube_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -378,7 +380,7 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } -static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -396,7 +398,7 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } /* alu instructions that can only execute on the vector unit */ -static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { return is_alu_reduction_inst(bc, alu) || is_alu_mova_inst(bc, alu) || @@ -405,7 +407,7 @@ static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } /* alu instructions that can only execute on the trans unit */ -static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -476,23 +478,23 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } /* alu instructions that can execute on any unit */ -static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { return !is_alu_vec_unit_inst(bc, alu) && !is_alu_trans_unit_inst(bc, alu); } -static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, - struct r600_bc_alu *assignment[5]) +static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first, + struct r600_bytecode_alu *assignment[5]) { - struct r600_bc_alu *alu; + struct r600_bytecode_alu *alu; unsigned i, chan, trans; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) assignment[i] = NULL; - for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { + for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) { chan = alu->dst.chan; if (max_slots == 4) trans = 0; @@ -571,7 +573,7 @@ static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, return 0; } -static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) +static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { int res, num_res = 4; if (bc->chip_class >= R700) { @@ -613,12 +615,12 @@ static int is_const(int sel) sel <= V_SQ_ALU_SRC_LITERAL); } -static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, +static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) { int r, src, num_src, sel, elem, cycle; - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; src++) { sel = alu->src[src].sel; elem = alu->src[src].chan; @@ -643,12 +645,12 @@ static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, return 0; } -static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, +static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) { int r, src, num_src, const_count, sel, elem, cycle; - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (const_count = 0, src = 0; src < num_src; ++src) { sel = alu->src[src].sel; elem = alu->src[src].chan; @@ -689,20 +691,24 @@ static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, return 0; } -static int check_and_set_bank_swizzle(struct r600_bc *bc, - struct r600_bc_alu *slots[5]) +static int check_and_set_bank_swizzle(struct r600_bytecode *bc, + struct r600_bytecode_alu *slots[5]) { struct alu_bank_swizzle bs; int bank_swizzle[5]; - int i, r = 0, forced = 0; + int i, r = 0, forced = 1; boolean scalar_only = bc->chip_class == CAYMAN ? false : true; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) { - if (slots[i] && slots[i]->bank_swizzle_force) { - slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; - forced = 1; + if (slots[i]) { + if (slots[i]->bank_swizzle_force) { + slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; + } else { + forced = 0; + } } + if (i < 4 && slots[i]) scalar_only = false; } @@ -712,7 +718,11 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, /* Just check every possible combination of bank swizzle. * Not very efficent, but works on the first try in most of the cases. */ for (i = 0; i < 4; i++) - bank_swizzle[i] = SQ_ALU_VEC_012; + if (!slots[i] || !slots[i]->bank_swizzle_force) + bank_swizzle[i] = SQ_ALU_VEC_012; + else + bank_swizzle[i] = slots[i]->bank_swizzle; + bank_swizzle[4] = SQ_ALU_SCL_210; while(bank_swizzle[4] <= SQ_ALU_SCL_221) { @@ -749,11 +759,13 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, bank_swizzle[4]++; } else { for (i = 0; i < max_slots; i++) { - bank_swizzle[i]++; - if (bank_swizzle[i] <= SQ_ALU_VEC_210) - break; - else - bank_swizzle[i] = SQ_ALU_VEC_012; + if (!slots[i] || !slots[i]->bank_swizzle_force) { + bank_swizzle[i]++; + if (bank_swizzle[i] <= SQ_ALU_VEC_210) + break; + else + bank_swizzle[i] = SQ_ALU_VEC_012; + } } } } @@ -762,10 +774,10 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, return -1; } -static int replace_gpr_with_pv_ps(struct r600_bc *bc, - struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev) +static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, + struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev) { - struct r600_bc_alu *prev[5]; + struct r600_bytecode_alu *prev[5]; int gpr[5], chan[5]; int i, j, r, src, num_src; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; @@ -775,7 +787,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, return r; for (i = 0; i < max_slots; ++i) { - if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) { + if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) { gpr[i] = prev[i]->dst.sel; /* cube writes more than PV.X */ if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i])) @@ -787,11 +799,11 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, } for (i = 0; i < max_slots; ++i) { - struct r600_bc_alu *alu = slots[i]; + struct r600_bytecode_alu *alu = slots[i]; if(!alu) continue; - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) continue; @@ -819,7 +831,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, return 0; } -void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) +void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg) { switch(value) { case 0: @@ -852,10 +864,10 @@ void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) } /* compute how many literal are needed */ -static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, +static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, uint32_t literal[4], unsigned *nliteral) { - unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned num_src = r600_bytecode_get_num_operands(bc, alu); unsigned i, j; for (i = 0; i < num_src; ++i) { @@ -878,11 +890,11 @@ static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, return 0; } -static void r600_bc_alu_adjust_literals(struct r600_bc *bc, - struct r600_bc_alu *alu, +static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc, + struct r600_bytecode_alu *alu, uint32_t literal[4], unsigned nliteral) { - unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned num_src = r600_bytecode_get_num_operands(bc, alu); unsigned i, j; for (i = 0; i < num_src; ++i) { @@ -898,11 +910,11 @@ static void r600_bc_alu_adjust_literals(struct r600_bc *bc, } } -static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], - struct r600_bc_alu *alu_prev) +static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5], + struct r600_bytecode_alu *alu_prev) { - struct r600_bc_alu *prev[5]; - struct r600_bc_alu *result[5] = { NULL }; + struct r600_bytecode_alu *prev[5]; + struct r600_bytecode_alu *result[5] = { NULL }; uint32_t literal[4], prev_literal[4]; unsigned nliteral = 0, prev_nliteral = 0; @@ -917,13 +929,13 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], return r; for (i = 0; i < max_slots; ++i) { - struct r600_bc_alu *alu; + struct r600_bytecode_alu *alu; /* check number of literals */ if (prev[i]) { - if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral)) + if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral)) return 0; - if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) + if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) return 0; if (is_alu_mova_inst(bc, prev[i])) { if (have_rel) @@ -932,7 +944,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } num_once_inst += is_alu_once_inst(bc, prev[i]); } - if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral)) + if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral)) return 0; /* Let's check used slots. */ @@ -968,7 +980,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } /* Let's check source gprs */ - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (alu->src[src].rel) { if (have_mova) @@ -1018,7 +1030,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } /* determine new last instruction */ - LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1; + LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1; /* determine new first instruction */ for (i = 0; i < max_slots; ++i) { @@ -1038,9 +1050,9 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], * probably do slightly better by recognizing that we actually have two * consecutive lines of 16 constants, but the resulting code would also be * somewhat more complicated. */ -static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type) +static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, int type) { - struct r600_bc_kcache *kcache = bc->cf_last->kcache; + struct r600_bytecode_kcache *kcache = bc->cf_last->kcache; unsigned int required_lines; unsigned int free_lines = 0; unsigned int cache_line[3]; @@ -1093,7 +1105,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al /* Start a new ALU clause if needed. */ if (required_lines > free_lines) { - if ((r = r600_bc_add_cf(bc))) { + if ((r = r600_bytecode_add_cf(bc))) { return r; } bc->cf_last->inst = (type << 3); @@ -1148,15 +1160,15 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al return 0; } -int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type) +int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type) { - struct r600_bc_alu *nalu = r600_bc_alu(); - struct r600_bc_alu *lalu; + struct r600_bytecode_alu *nalu = r600_bytecode_alu(); + struct r600_bytecode_alu *lalu; int i, r; if (nalu == NULL) return -ENOMEM; - memcpy(nalu, alu, sizeof(struct r600_bc_alu)); + memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) { /* check if we could add it anyway */ @@ -1174,7 +1186,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->force_add_cf) { - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) { free(nalu); return r; @@ -1184,7 +1196,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* Setup the kcache for this ALU instruction. This will start a new * ALU clause if needed. */ - if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) { + if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) { free(nalu); return r; } @@ -1198,7 +1210,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->ngpr = nalu->src[i].sel + 1; } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) - r600_bc_special_constants(nalu->src[i].value, + r600_bytecode_special_constants(nalu->src[i].value, &nalu->src[i].sel, &nalu->src[i].neg); } if (nalu->dst.sel >= bc->ngpr) { @@ -1213,7 +1225,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (nalu->last) { uint32_t literal[4]; unsigned nliteral; - struct r600_bc_alu *slots[5]; + struct r600_bytecode_alu *slots[5]; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); if (r) @@ -1237,7 +1249,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int for (i = 0, nliteral = 0; i < max_slots; i++) { if (slots[i]) { - r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral); + r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral); if (r) return r; } @@ -1257,12 +1269,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int return 0; } -int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu) { - return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + return r600_bytecode_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); } -static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) +static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc) { switch (bc->chip_class) { case R600: @@ -1281,7 +1293,7 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) } } -static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) +static inline boolean last_inst_was_vtx_fetch(struct r600_bytecode *bc) { if (bc->chip_class == CAYMAN) { if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC) @@ -1294,20 +1306,20 @@ static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) return FALSE; } -int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) +int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) { - struct r600_bc_vtx *nvtx = r600_bc_vtx(); + struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx(); int r; if (nvtx == NULL) return -ENOMEM; - memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx)); + memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx)); /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || last_inst_was_vtx_fetch(bc) || bc->force_add_cf) { - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) { free(nvtx); return r; @@ -1321,24 +1333,24 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) + if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } -int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) +int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex) { - struct r600_bc_tex *ntex = r600_bc_tex(); + struct r600_bytecode_tex *ntex = r600_bytecode_tex(); int r; if (ntex == NULL) return -ENOMEM; - memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + memcpy(ntex, tex, sizeof(struct r600_bytecode_tex)); /* we can't fetch data und use it as texture lookup address in the same TEX clause */ if (bc->cf_last != NULL && bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { - struct r600_bc_tex *ttex; + struct r600_bytecode_tex *ttex; LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { if (ttex->dst_gpr == ntex->src_gpr) { bc->force_add_cf = 1; @@ -1354,7 +1366,7 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) if (bc->cf_last == NULL || bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || bc->force_add_cf) { - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) { free(ntex); return r; @@ -1371,15 +1383,15 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) + if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } -int r600_bc_add_cfinst(struct r600_bc *bc, int inst) +int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst) { int r; - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) return r; @@ -1388,13 +1400,13 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) return 0; } -int cm_bc_add_cf_end(struct r600_bc *bc) +int cm_bytecode_add_cf_end(struct r600_bytecode *bc) { - return r600_bc_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); + return r600_bytecode_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); } /* common to all 3 families */ -static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) +static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) { bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | @@ -1423,7 +1435,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign } /* common to all 3 families */ -static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id) +static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) { bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | @@ -1453,7 +1465,7 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign } /* r600 only, r700/eg bits in r700_asm.c */ -static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) { /* don't replace gpr by pv or ps for destination register */ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | @@ -1494,7 +1506,7 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign return 0; } -static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) { *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | @@ -1503,7 +1515,7 @@ static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf } /* common for r600/r700 - eg in eg_asm.c */ -static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) { unsigned id = cf->id; @@ -1529,9 +1541,9 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: if (bc->chip_class == R700) - r700_bc_cf_vtx_build(&bc->bytecode[id], cf); + r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf); else - r600_bc_cf_vtx_build(&bc->bytecode[id], cf); + r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf); break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: @@ -1571,12 +1583,12 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) return 0; } -int r600_bc_build(struct r600_bc *bc) +int r600_bytecode_build(struct r600_bytecode *bc) { - struct r600_bc_cf *cf; - struct r600_bc_alu *alu; - struct r600_bc_vtx *vtx; - struct r600_bc_tex *tex; + struct r600_bytecode_cf *cf; + struct r600_bytecode_alu *alu; + struct r600_bytecode_vtx *vtx; + struct r600_bytecode_tex *tex; uint32_t literal[4]; unsigned nliteral; unsigned addr; @@ -1636,9 +1648,9 @@ int r600_bc_build(struct r600_bc *bc) LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; if (bc->chip_class >= EVERGREEN) - r = eg_bc_cf_build(bc, cf); + r = eg_bytecode_cf_build(bc, cf); else - r = r600_bc_cf_build(bc, cf); + r = r600_bytecode_cf_build(bc, cf); if (r) return r; switch (cf->inst) { @@ -1649,18 +1661,18 @@ int r600_bc_build(struct r600_bc *bc) nliteral = 0; memset(literal, 0, sizeof(literal)); LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { - r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); if (r) return r; - r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); + r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral); switch(bc->chip_class) { case R600: - r = r600_bc_alu_build(bc, alu, addr); + r = r600_bytecode_alu_build(bc, alu, addr); break; case R700: case EVERGREEN: /* eg alu is same encoding as r700 */ case CAYMAN: /* eg alu is same encoding as r700 */ - r = r700_bc_alu_build(bc, alu, addr); + r = r700_bytecode_alu_build(bc, alu, addr); break; default: R600_ERR("unknown chip class %d.\n", bc->chip_class); @@ -1681,7 +1693,7 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - r = r600_bc_vtx_build(bc, vtx, addr); + r = r600_bytecode_vtx_build(bc, vtx, addr); if (r) return r; addr += 4; @@ -1690,14 +1702,14 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_TEX: if (bc->chip_class == CAYMAN) { LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - r = r600_bc_vtx_build(bc, vtx, addr); + r = r600_bytecode_vtx_build(bc, vtx, addr); if (r) return r; addr += 4; } } LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { - r = r600_bc_tex_build(bc, tex, addr); + r = r600_bytecode_tex_build(bc, tex, addr); if (r) return r; addr += 4; @@ -1726,17 +1738,17 @@ int r600_bc_build(struct r600_bc *bc) return 0; } -void r600_bc_clear(struct r600_bc *bc) +void r600_bytecode_clear(struct r600_bytecode *bc) { - struct r600_bc_cf *cf = NULL, *next_cf; + struct r600_bytecode_cf *cf = NULL, *next_cf; free(bc->bytecode); bc->bytecode = NULL; LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { - struct r600_bc_alu *alu = NULL, *next_alu; - struct r600_bc_tex *tex = NULL, *next_tex; - struct r600_bc_tex *vtx = NULL, *next_vtx; + struct r600_bytecode_alu *alu = NULL, *next_alu; + struct r600_bytecode_tex *tex = NULL, *next_tex; + struct r600_bytecode_tex *vtx = NULL, *next_vtx; LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { free(alu); @@ -1762,12 +1774,12 @@ void r600_bc_clear(struct r600_bc *bc) LIST_INITHEAD(&cf->list); } -void r600_bc_dump(struct r600_bc *bc) +void r600_bytecode_dump(struct r600_bytecode *bc) { - struct r600_bc_cf *cf = NULL; - struct r600_bc_alu *alu = NULL; - struct r600_bc_vtx *vtx = NULL; - struct r600_bc_tex *tex = NULL; + struct r600_bytecode_cf *cf = NULL; + struct r600_bytecode_alu *alu = NULL; + struct r600_bytecode_vtx *vtx = NULL; + struct r600_bytecode_tex *tex = NULL; unsigned i, id; uint32_t literal[4]; @@ -1866,7 +1878,7 @@ void r600_bc_dump(struct r600_bc *bc) id = cf->addr; nliteral = 0; LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { - r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel); @@ -2120,8 +2132,8 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru { static int dump_shaders = -1; - struct r600_bc bc; - struct r600_bc_vtx vtx; + struct r600_bytecode bc; + struct r600_bytecode_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160; @@ -2142,11 +2154,11 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } memset(&bc, 0, sizeof(bc)); - r600_bc_init(&bc, rctx->chip_class); + r600_bytecode_init(&bc, rctx->chip_class); for (i = 0; i < ve->count; i++) { if (elements[i].instance_divisor > 1) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(alu)); alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); @@ -2161,8 +2173,8 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(&bc, &alu))) { - r600_bc_clear(&bc); + if ((r = r600_bytecode_add_alu(&bc, &alu))) { + r600_bytecode_clear(&bc); return r; } } @@ -2173,7 +2185,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp, &endian); desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); R600_ERR("unknown format %d\n", ve->elements[i].src_format); return -EINVAL; } @@ -2198,16 +2210,16 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru vtx.offset = elements[i].src_offset; vtx.endian = endian; - if ((r = r600_bc_add_vtx(&bc, &vtx))) { - r600_bc_clear(&bc); + if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { + r600_bytecode_clear(&bc); return r; } } - r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); + r600_bytecode_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); - if ((r = r600_bc_build(&bc))) { - r600_bc_clear(&bc); + if ((r = r600_bytecode_build(&bc))) { + r600_bytecode_clear(&bc); return r; } @@ -2216,7 +2228,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); - r600_bc_dump(&bc); + r600_bytecode_dump(&bc); fprintf(stderr, "______________________________________________________________\n"); } @@ -2225,14 +2237,14 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); if (ve->fetch_shader == NULL) { - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); return -ENOMEM; } - bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, rctx->ctx.cs, PIPE_TRANSFER_WRITE); if (bytecode == NULL) { - r600_bc_clear(&bc); - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + r600_bytecode_clear(&bc); + r600_bo_reference(&ve->fetch_shader, NULL); return -ENOMEM; } @@ -2245,7 +2257,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } r600_bo_unmap(rctx->radeon, ve->fetch_shader); - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); if (rctx->chip_class >= EVERGREEN) evergreen_fetch_shader(&rctx->context, ve); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index cbdaacf7178..61caa4b915e 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -23,12 +23,10 @@ #ifndef R600_ASM_H #define R600_ASM_H -#include "util/u_double_list.h" - struct r600_vertex_element; struct r600_pipe_context; -struct r600_bc_alu_src { +struct r600_bytecode_alu_src { unsigned sel; unsigned chan; unsigned neg; @@ -37,7 +35,7 @@ struct r600_bc_alu_src { uint32_t value; }; -struct r600_bc_alu_dst { +struct r600_bytecode_alu_dst { unsigned sel; unsigned chan; unsigned clamp; @@ -45,10 +43,10 @@ struct r600_bc_alu_dst { unsigned rel; }; -struct r600_bc_alu { +struct r600_bytecode_alu { struct list_head list; - struct r600_bc_alu_src src[3]; - struct r600_bc_alu_dst dst; + struct r600_bytecode_alu_src src[3]; + struct r600_bytecode_alu_dst dst; unsigned inst; unsigned last; unsigned is_op3; @@ -58,7 +56,7 @@ struct r600_bc_alu { unsigned omod; }; -struct r600_bc_tex { +struct r600_bytecode_tex { struct list_head list; unsigned inst; unsigned resource_id; @@ -85,7 +83,7 @@ struct r600_bc_tex { unsigned src_sel_w; }; -struct r600_bc_vtx { +struct r600_bytecode_vtx { struct list_head list; unsigned inst; unsigned fetch_type; @@ -107,7 +105,7 @@ struct r600_bc_vtx { unsigned endian; }; -struct r600_bc_output { +struct r600_bytecode_output { unsigned array_base; unsigned type; unsigned end_of_program; @@ -122,13 +120,13 @@ struct r600_bc_output { unsigned barrier; }; -struct r600_bc_kcache { +struct r600_bytecode_kcache { unsigned bank; unsigned mode; unsigned addr; }; -struct r600_bc_cf { +struct r600_bytecode_cf { struct list_head list; unsigned inst; unsigned addr; @@ -137,15 +135,15 @@ struct r600_bc_cf { unsigned cond; unsigned pop_count; unsigned cf_addr; /* control flow addr */ - struct r600_bc_kcache kcache[2]; + struct r600_bytecode_kcache kcache[2]; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; - struct r600_bc_output output; - struct r600_bc_alu *curr_bs_head; - struct r600_bc_alu *prev_bs_head; - struct r600_bc_alu *prev2_bs_head; + struct r600_bytecode_output output; + struct r600_bytecode_alu *curr_bs_head; + struct r600_bytecode_alu *prev_bs_head; + struct r600_bytecode_alu *prev2_bs_head; }; #define FC_NONE 0 @@ -157,8 +155,8 @@ struct r600_bc_cf { struct r600_cf_stack_entry { int type; - struct r600_bc_cf *start; - struct r600_bc_cf **mid; /* used to store the else point */ + struct r600_bytecode_cf *start; + struct r600_bytecode_cf **mid; /* used to store the else point */ int num_mid; }; @@ -170,11 +168,11 @@ struct r600_cf_callstack { int max; }; -struct r600_bc { +struct r600_bytecode { enum chip_class chip_class; int type; struct list_head cf; - struct r600_bc_cf *cf_last; + struct r600_bytecode_cf *cf_last; unsigned ndw; unsigned ncf; unsigned ngpr; @@ -189,27 +187,27 @@ struct r600_bc { }; /* eg_asm.c */ -int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); +int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf); /* r600_asm.c */ -void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class); -void r600_bc_clear(struct r600_bc *bc); -int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); -int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); -int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); -int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); -int r600_bc_build(struct r600_bc *bc); -int r600_bc_add_cfinst(struct r600_bc *bc, int inst); -int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); -void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); -void r600_bc_dump(struct r600_bc *bc); - -int cm_bc_add_cf_end(struct r600_bc *bc); +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class); +void r600_bytecode_clear(struct r600_bytecode *bc); +int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu); +int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx); +int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex); +int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output); +int r600_bytecode_build(struct r600_bytecode *bc); +int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst); +int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type); +void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg); +void r600_bytecode_dump(struct r600_bytecode *bc); + +int cm_bytecode_add_cf_end(struct r600_bytecode *bc); int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ -void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf); -int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); +void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf); +int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); #endif diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 35e68b6e222..2f7e871448a 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -111,7 +111,7 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t if (!texture->dirty_db) return; - surf_tmpl.format = texture->resource.b.b.b.format; + surf_tmpl.format = texture->real_format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = 0; surf_tmpl.u.tex.last_layer = 0; @@ -119,7 +119,7 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl); - surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format; + surf_tmpl.format = texture->flushed_depth_texture->real_format; surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; cbsurf = ctx->create_surface(ctx, (struct pipe_resource*)texture->flushed_depth_texture, &surf_tmpl); @@ -233,8 +233,8 @@ static void r600_hw_copy_region(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_blitter_begin(ctx, R600_COPY); - util_blitter_copy_region(rctx->blitter, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box, TRUE); + util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); r600_blitter_end(ctx); } @@ -249,7 +249,7 @@ static void r600_compressed_to_blittable(struct pipe_resource *tex, struct texture_orig_info *orig) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; - unsigned pixsize = util_format_get_blocksize(tex->format); + unsigned pixsize = util_format_get_blocksize(rtex->real_format); int new_format; int new_height, new_width; @@ -269,7 +269,6 @@ static void r600_compressed_to_blittable(struct pipe_resource *tex, tex->width0 = new_width; tex->height0 = new_height; tex->format = new_format; - } static void r600_reset_blittable_to_compressed(struct pipe_resource *tex, diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 72f352df3c3..ca2415adb28 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -33,11 +33,6 @@ #include <util/u_memory.h> #include "util/u_upload_mgr.h" -#include "state_tracker/drm_driver.h" - -#include <xf86drm.h> -#include "radeon_drm.h" - #include "r600.h" #include "r600_pipe.h" @@ -48,7 +43,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, struct r600_resource_buffer *rbuffer = r600_buffer(buf); if (rbuffer->r.bo) { - r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); + r600_bo_reference(&rbuffer->r.bo, NULL); } rbuffer->r.bo = NULL; util_slab_free(&rscreen->pool_buffers, rbuffer); @@ -81,12 +76,13 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe; uint8_t *data; if (rbuffer->r.b.user_ptr) return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x; - data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe); + data = r600_bo_map(rctx->screen->radeon, rbuffer->r.bo, rctx->ctx.cs, transfer->usage); if (!data) return NULL; @@ -97,12 +93,13 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe; if (rbuffer->r.b.user_ptr) return; if (rbuffer->r.bo) - r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); + r600_bo_unmap(rctx->screen->radeon, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, @@ -127,20 +124,20 @@ static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, unsigned stride, unsigned layer_stride) { - struct radeon *ws = (struct radeon*)pipe->winsys; + struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe; + struct radeon *radeon = rctx->screen->radeon; struct r600_resource_buffer *rbuffer = r600_buffer(resource); uint8_t *map = NULL; assert(rbuffer->r.b.user_ptr == NULL); - map = r600_bo_map(ws, rbuffer->r.bo, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage, - pipe); + map = r600_bo_map(radeon, rbuffer->r.bo, rctx->ctx.cs, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage); memcpy(map + box->x, data, box->width); if (rbuffer->r.bo) - r600_bo_unmap(ws, rbuffer->r.bo); + r600_bo_unmap(radeon, rbuffer->r.bo); } static const struct u_resource_vtbl r600_buffer_vtbl = @@ -175,7 +172,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, rbuffer->r.size = rbuffer->r.b.b.b.width0; rbuffer->r.bo_size = rbuffer->r.size; - bo = r600_bo((struct radeon*)screen->winsys, + bo = r600_bo(rscreen->radeon, rbuffer->r.b.b.b.width0, alignment, rbuffer->r.b.b.b.bind, rbuffer->r.b.b.b.usage); @@ -219,18 +216,18 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle) { - struct radeon *rw = (struct radeon*)screen->winsys; + struct radeon *rw = ((struct r600_screen*)screen)->radeon; struct r600_resource *rbuffer; struct r600_bo *bo = NULL; - bo = r600_bo_handle(rw, whandle->handle, NULL); + bo = r600_bo_handle(rw, whandle, NULL, NULL); if (bo == NULL) { return NULL; } rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) { - r600_bo_reference(rw, &bo, NULL); + r600_bo_reference(&bo, NULL); return NULL; } diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h index 1c1089d89d2..b822cba9293 100644 --- a/src/gallium/drivers/r600/r600_formats.h +++ b/src/gallium/drivers/r600/r600_formats.h @@ -99,7 +99,6 @@ static INLINE bool r600_is_vertex_format_supported(enum pipe_format format) /* No fixed, no double. */ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || - desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || (desc->channel[i].size == 64 && desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) return false; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 615f0688eb3..ceaebbb4431 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -47,12 +47,14 @@ #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" +#include "../../winsys/r600/drm/r600_drm_public.h" /* * pipe_context */ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_fence *fence = NULL; if (!ctx->fences.bo) { @@ -62,7 +64,8 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) R600_ERR("r600: failed to create bo for fence objects\n"); return NULL; } - ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL); + ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, rctx->ctx.cs, + PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_WRITE); } if (!LIST_IS_EMPTY(&ctx->fences.pool)) { @@ -113,29 +116,28 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) return fence; } -static void r600_flush(struct pipe_context *ctx, - struct pipe_fence_handle **fence) + +void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, + unsigned flags) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_fence **rfence = (struct r600_fence**)fence; -#if 0 - static int dc = 0; - char dname[256]; -#endif - if (rfence) *rfence = r600_create_fence(rctx); -#if 0 - sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 20) { - r600_context_dump_bof(&rctx->ctx, dname); - R600_ERR("dumped %s\n", dname); - } - dc++; -#endif - r600_context_flush(&rctx->ctx); + r600_context_flush(&rctx->ctx, flags); +} + +static void r600_flush_from_st(struct pipe_context *ctx, + struct pipe_fence_handle **fence) +{ + r600_flush(ctx, fence, 0); +} + +static void r600_flush_from_winsys(void *ctx, unsigned flags) +{ + r600_flush((struct pipe_context*)ctx, NULL, flags); } static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) @@ -184,7 +186,7 @@ static void r600_destroy_context(struct pipe_context *context) } r600_bo_unmap(rctx->radeon, rctx->fences.bo); - r600_bo_reference(rctx->radeon, &rctx->fences.bo, NULL); + r600_bo_reference(&rctx->fences.bo, NULL); } r600_update_num_contexts(rctx->screen, -1); @@ -206,7 +208,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->context.screen = screen; rctx->context.priv = priv; rctx->context.destroy = r600_destroy_context; - rctx->context.flush = r600_flush; + rctx->context.flush = r600_flush_from_st; /* Easy accessing of screen/winsys. */ rctx->screen = rscreen; @@ -256,6 +258,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } + rctx->screen->ws->cs_set_flush_callback(rctx->ctx.cs, r600_flush_from_winsys, rctx); + util_slab_create(&rctx->pool_transfers, sizeof(struct pipe_transfer), 64, UTIL_SLAB_SINGLETHREADED); @@ -269,6 +273,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_destroy_context(&rctx->context); return NULL; } + rctx->vbuf_mgr->caps.format_fixed32 = 0; rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) { @@ -355,6 +360,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SM3: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: + case PIPE_CAP_PRIMITIVE_RESTART: return 1; /* Supported except the original R600. */ @@ -369,7 +375,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* Unsupported features. */ case PIPE_CAP_STREAM_OUTPUT: - case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: @@ -481,6 +486,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: return 0; } @@ -498,6 +505,8 @@ static int r600_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(screen, profile); default: return 0; } @@ -510,7 +519,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) if (rscreen == NULL) return; - radeon_decref(rscreen->radeon); + radeon_destroy(rscreen->radeon); + rscreen->ws->destroy(rscreen->ws); util_slab_destroy(&rscreen->pool_buffers); pipe_mutex_destroy(rscreen->mutex_num_contexts); @@ -574,17 +584,19 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen, return TRUE; } -struct pipe_screen *r600_screen_create(struct radeon *radeon) +struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) { struct r600_screen *rscreen; + struct radeon *radeon = radeon_create(ws); rscreen = CALLOC_STRUCT(r600_screen); if (rscreen == NULL) { return NULL; } + rscreen->ws = ws; rscreen->radeon = radeon; - rscreen->screen.winsys = (struct pipe_winsys*)radeon; + rscreen->screen.winsys = (struct pipe_winsys*)ws; rscreen->screen.destroy = r600_destroy_screen; rscreen->screen.get_name = r600_get_name; rscreen->screen.get_vendor = r600_get_vendor; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6f399ed43b0..2747f54079c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -26,6 +26,8 @@ #ifndef R600_PIPE_H #define R600_PIPE_H +#include "../../winsys/radeon/drm/radeon_winsys.h" + #include <pipe/p_state.h> #include <pipe/p_screen.h> #include <pipe/p_context.h> @@ -72,6 +74,7 @@ enum r600_pipe_state_id { struct r600_screen { struct pipe_screen screen; + struct radeon_winsys *ws; struct radeon *radeon; struct r600_tiling_info *tiling_info; struct util_slab_mempool pool_buffers; @@ -183,7 +186,7 @@ struct r600_pipe_context { struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_context ctx; struct r600_vertex_element *vertex_elements; - struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS]; + struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; unsigned cb_target_mask; @@ -247,7 +250,8 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, struct r600_pipe_resource_state *rstate); void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride); + unsigned offset, unsigned stride, + enum radeon_bo_usage usage); boolean evergreen_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, @@ -270,6 +274,11 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); + +/* r600_pipe.c */ +void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, + unsigned flags); + /* r600_query.c */ void r600_init_query_functions(struct r600_pipe_context *rctx); @@ -294,7 +303,8 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, struct r600_pipe_resource_state *rstate); void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride); + unsigned offset, unsigned stride, + enum radeon_bo_usage usage); void r600_adjust_gprs(struct r600_pipe_context *rctx); boolean r600_is_format_supported(struct pipe_screen *screen, enum pipe_format format, diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h index f1970201e89..e4fe23a87b7 100644 --- a/src/gallium/drivers/r600/r600_public.h +++ b/src/gallium/drivers/r600/r600_public.h @@ -23,6 +23,8 @@ #ifndef R600_PUBLIC_H #define R600_PUBLIC_H -struct pipe_screen *r600_screen_create(struct radeon *radeon); +struct radeon_winsys; + +struct pipe_screen *r600_screen_create(struct radeon_winsys *ws); #endif diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 836e7491f1f..d9d29db7968 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -52,6 +52,12 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; + + /* If this resource is a depth-stencil buffer on evergreen, this contains + * the depth part of the format. There is a separate stencil resource + * for the stencil buffer below. */ + enum pipe_format real_format; + unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; /* transfer */ unsigned pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */ @@ -62,6 +68,7 @@ struct r600_resource_texture { unsigned tile_type; unsigned depth; unsigned dirty_db; + struct r600_resource_texture *stencil; /* Stencil is in a separate buffer on Evergreen. */ struct r600_resource_texture *flushed_depth_texture; boolean is_flushing_texture; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 3e21ad1fdc6..c37bb729ce3 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -86,7 +86,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s if (shader->bo == NULL) { return -ENOMEM; } - ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL); + ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE); if (R600_BIG_ENDIAN) { for (i = 0; i < rshader->bc.ndw; ++i) { ptr[i] = bswap_32(rshader->bc.bytecode[i]); @@ -140,13 +140,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s R600_ERR("translation from TGSI failed !\n"); return r; } - r = r600_bc_build(&shader->shader.bc); + r = r600_bytecode_build(&shader->shader.bc); if (r) { R600_ERR("building bytecode failed !\n"); return r; } if (dump_shaders) { - r600_bc_dump(&shader->shader.bc); + r600_bytecode_dump(&shader->shader.bc); fprintf(stderr, "______________________________________________________________\n"); } return r600_pipe_shader(ctx, shader); @@ -154,10 +154,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - r600_bo_reference(rctx->radeon, &shader->bo, NULL); - r600_bc_clear(&shader->shader.bc); + r600_bo_reference(&shader->bo, NULL); + r600_bytecode_clear(&shader->shader.bc); memset(&shader->shader,0,sizeof(struct r600_shader)); } @@ -185,7 +183,7 @@ struct r600_shader_ctx { unsigned temp_reg; unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; - struct r600_bc *bc; + struct r600_bytecode *bc; struct r600_shader *shader; struct r600_shader_src src[4]; u32 *literals; @@ -246,7 +244,7 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) { int i, r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int gpr = 0, base_chan = 0; int ij_index = 0; @@ -272,7 +270,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) base_chan = (2 * (ij_index % 2)) + 1; for (i = 0; i < 8; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (i < 4) alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; @@ -294,7 +292,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) alu.bank_swizzle_force = SQ_ALU_VEC_210; if ((i % 4) == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -332,6 +330,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].sid = d->Semantic.Index; ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; ctx->shader->output[i].interpolate = d->Declaration.Interpolate; + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + /* these don't count as vertex param exports */ + if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) || + (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE)) + ctx->shader->npos++; + } break; case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -341,8 +345,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SYSTEM_VALUE: if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { - struct r600_bc_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); alu.src[0].sel = 0; @@ -353,7 +357,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; break; } @@ -436,7 +440,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) return; } @@ -460,12 +464,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx, static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) { - struct r600_bc_vtx vtx; + struct r600_bytecode_vtx vtx; unsigned int ar_reg; int r; if (offset) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(alu)); @@ -479,7 +483,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; ar_reg = dst_reg; @@ -502,7 +506,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ vtx.endian = r600_endian_swap(32); - if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) return r; return 0; @@ -511,7 +515,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset static int tgsi_split_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, k, nconst, r; for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -536,7 +540,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) } else if (j > 0) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; @@ -546,7 +550,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (k == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -562,7 +566,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, k, nliteral, r; for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -574,7 +578,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; @@ -584,7 +588,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (k == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -602,14 +606,14 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; struct r600_shader_ctx ctx; - struct r600_bc_output output[32]; + struct r600_bytecode_output output[32]; unsigned output_done, noutput; unsigned opcode; int i, j, r = 0, pos0; ctx.bc = &shader->bc; ctx.shader = shader; - r600_bc_init(ctx.bc, rctx->chip_class); + r600_bytecode_init(ctx.bc, rctx->chip_class); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -649,18 +653,18 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; if (ctx.bc->chip_class >= EVERGREEN) { - r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } else { - r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } } if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + - ctx.info.file_count[TGSI_FILE_INPUT]; + ctx.info.file_max[TGSI_FILE_INPUT] + 1; ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + - ctx.info.file_count[TGSI_FILE_OUTPUT]; + ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; /* Outside the GPR range. This will be translated to one of the * kcache banks later. */ @@ -668,7 +672,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + - ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; @@ -742,8 +746,8 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi int j; for (j = 0; j < 4; j++) { - struct r600_bc_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); /* MOV_SAT R, R */ alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -757,7 +761,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi if (j == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx.bc, &alu); + r = r600_bytecode_add_alu(ctx.bc, &alu); if (r) return r; } @@ -768,7 +772,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi /* export output */ j = 0; for (i = 0, pos0 = 0; i < noutput; i++) { - memset(&output[i], 0, sizeof(struct r600_bc_output)); + memset(&output[i], 0, sizeof(struct r600_bytecode_output)); output[i + j].gpr = shader->output[i].gpr; output[i + j].elem_size = 3; output[i + j].swizzle_x = 0; @@ -801,7 +805,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { for (j = 1; j < shader->nr_cbufs; j++) { - memset(&output[i + j], 0, sizeof(struct r600_bc_output)); + memset(&output[i + j], 0, sizeof(struct r600_bytecode_output)); output[i + j].gpr = shader->output[i].gpr; output[i + j].elem_size = 3; output[i + j].swizzle_x = 0; @@ -850,7 +854,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } } if (!pos0) { - memset(&output[i], 0, sizeof(struct r600_bc_output)); + memset(&output[i], 0, sizeof(struct r600_bytecode_output)); output[i].gpr = 0; output[i].elem_size = 3; output[i].swizzle_x = 0; @@ -867,7 +871,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } /* add fake pixel export */ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { - memset(&output[0], 0, sizeof(struct r600_bc_output)); + memset(&output[0], 0, sizeof(struct r600_bytecode_output)); output[0].gpr = 0; output[0].elem_size = 3; output[0].swizzle_x = 7; @@ -895,13 +899,13 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } /* add output to bytecode */ for (i = 0; i < noutput; i++) { - r = r600_bc_add_output(ctx.bc, &output[i]); + r = r600_bytecode_add_output(ctx.bc, &output[i]); if (r) goto out_err; } /* add program end */ if (ctx.bc->chip_class == CAYMAN) - cm_bc_add_cf_end(ctx.bc); + cm_bytecode_add_cf_end(ctx.bc); free(ctx.literals); tgsi_parse_free(&ctx.parse); @@ -924,7 +928,7 @@ static int tgsi_end(struct r600_shader_ctx *ctx) return 0; } -static void r600_bc_src(struct r600_bc_alu_src *bc_src, +static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, const struct r600_shader_src *shader_src, unsigned chan) { @@ -936,13 +940,13 @@ static void r600_bc_src(struct r600_bc_alu_src *bc_src, bc_src->value = shader_src->value[bc_src->chan]; } -static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) +static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) { bc_src->abs = 1; bc_src->neg = 0; } -static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) +static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) { bc_src->neg = !bc_src->neg; } @@ -950,7 +954,7 @@ static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, - struct r600_bc_alu_dst *r600_dst) + struct r600_bytecode_alu_dst *r600_dst) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -980,7 +984,7 @@ static int tgsi_last_instruction(unsigned writemask) static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -988,25 +992,25 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], i); + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } } else { - r600_bc_src(&alu.src[0], &ctx->src[1], i); - r600_bc_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[1], i); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_SUB: - r600_bc_src_toggle_neg(&alu.src[1]); + r600_bytecode_src_toggle_neg(&alu.src[1]); break; case TGSI_OPCODE_ABS: - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src_set_abs(&alu.src[0]); break; default: break; @@ -1014,7 +1018,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1035,21 +1039,21 @@ static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int i, j, r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; for (i = 0 ; i < last_slot; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], 0); + r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == last_slot - 1) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1068,9 +1072,9 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) static float neg_pi = -3.1415926535; int r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1078,7 +1082,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; @@ -1086,11 +1090,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); alu.dst.chan = 0; @@ -1100,11 +1104,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1130,7 +1134,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) } alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; return 0; @@ -1139,7 +1143,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) static int cayman_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; int i, r; @@ -1149,7 +1153,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) for (i = 0; i < last_slot; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = i; @@ -1160,7 +1164,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; if (i == last_slot - 1) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1170,7 +1174,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -1178,7 +1182,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = 0; alu.dst.sel = ctx->temp_reg; @@ -1187,7 +1191,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -1196,14 +1200,14 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->temp_reg; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == lasti) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1213,7 +1217,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; /* We'll only need the trig stuff if we are going to write to the @@ -1229,7 +1233,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1241,19 +1245,19 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1263,7 +1267,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 1) @@ -1274,19 +1278,19 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1294,7 +1298,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.z = 0.0; */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -1305,14 +1309,14 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } /* dst.w = 1.0; */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -1323,7 +1327,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1333,11 +1337,11 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) static int tgsi_kill(struct r600_shader_ctx *ctx) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = i; @@ -1348,12 +1352,12 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - r600_bc_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } if (i == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1367,13 +1371,13 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; /* tmp.x = max(src.y, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 1; @@ -1382,7 +1386,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -1395,7 +1399,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* tmp.z = log(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1407,13 +1411,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) } else alu.dst.write = 0; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { /* tmp.z = log(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1421,7 +1425,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.dst.chan = 2; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1430,25 +1434,25 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) sel = alu.dst.sel; /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); alu.src[0].sel = sel; alu.src[0].chan = chan; - r600_bc_src(&alu.src[1], &ctx->src[0], 3); - r600_bc_src(&alu.src[2], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); + r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; alu.is_op3 = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* dst.z = exp(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1458,56 +1462,56 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.last = 1; } else alu.dst.write = 0; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { /* dst.z = exp(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } /* dst.x, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* dst.y = max(src.x, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* dst.w, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -1517,10 +1521,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) static int tgsi_rsq(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); /* FIXME: * For state trackers other than OpenGL, we'll want to use @@ -1529,13 +1533,13 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r600_bc_src(&alu.src[i], &ctx->src[i], 0); - r600_bc_src_set_abs(&alu.src[i]); + r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); + r600_bytecode_src_set_abs(&alu.src[i]); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* replicate result */ @@ -1545,11 +1549,11 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.src[0].sel = ctx->temp_reg; alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.dst.chan = i; @@ -1557,7 +1561,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1567,18 +1571,18 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r600_bc_src(&alu.src[i], &ctx->src[i], 0); + r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* replicate result */ @@ -1589,38 +1593,38 @@ static int cayman_pow(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int i, r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } /* b * LOG2(a) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &ctx->src[1], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; for (i = 0; i < last_slot; i++) { /* POW(a,b) = EXP2(b * LOG2(a))*/ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; @@ -1628,7 +1632,7 @@ static int cayman_pow(struct r600_shader_ctx *ctx) alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == last_slot - 1) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1637,38 +1641,38 @@ static int cayman_pow(struct r600_shader_ctx *ctx) static int tgsi_pow(struct r600_shader_ctx *ctx) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; /* LOG2(a) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* b * LOG2(a) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &ctx->src[1], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* POW(a,b) = EXP2(b * LOG2(a))*/ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; return tgsi_helper_tempx_replicate(ctx); @@ -1677,32 +1681,32 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) static int tgsi_ssg(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); alu.src[1].sel = V_SQ_ALU_SRC_1; - r600_bc_src(&alu.src[2], &ctx->src[0], i); + r600_bytecode_src(&alu.src[2], &ctx->src[0], i); if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } /* dst = (-tmp > 0 ? -1 : tmp) */ for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1719,7 +1723,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1728,11 +1732,11 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); alu.dst.chan = i; @@ -1745,7 +1749,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru if (i == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1755,7 +1759,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -1763,10 +1767,10 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], i); + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1776,7 +1780,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1786,14 +1790,14 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], i); + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1826,7 +1830,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) if (i == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1853,8 +1857,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) { static float one_point_five = 1.5f; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_tex tex; - struct r600_bc_alu alu; + struct r600_bytecode_tex tex; + struct r600_bytecode_alu alu; unsigned src_gpr; int r, i, j; int opcode; @@ -1872,7 +1876,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 1; i < 3; i++) { /* set gradients h/v */ - memset(&tex, 0, sizeof(struct r600_bc_tex)); + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : SQ_TEX_INST_SET_GRADIENTS_V; tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); @@ -1886,15 +1890,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_w = 3; for (j = 0; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r600_bc_src(&alu.src[0], &ctx->src[i], j); + r600_bytecode_src(&alu.src[0], &ctx->src[i], j); alu.dst.sel = tex.src_gpr; alu.dst.chan = j; if (j == 3) alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1915,7 +1919,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.coord_type_z = 1; tex.coord_type_w = 1; } - r = r600_bc_add_tex(ctx->bc, &tex); + r = r600_bytecode_add_tex(ctx->bc, &tex); if (r) return r; } @@ -1925,9 +1929,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { out_chan = 2; for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1935,40 +1939,40 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.last = 1; if (out_chan == i) alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { out_chan = 3; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = out_chan; alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = out_chan; - r600_bc_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -1976,7 +1980,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.chan = 3; alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; src_loaded = TRUE; @@ -1989,16 +1993,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); - r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); - r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2006,7 +2010,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* tmp1.z = RCP_e(|tmp1.z|) */ if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 2; @@ -2017,12 +2021,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 2; @@ -2031,7 +2035,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.chan = 2; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2040,7 +2044,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -2057,11 +2061,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.chan = 0; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -2079,7 +2083,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -2089,15 +2093,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (src_requires_loading && !src_loaded) { for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2120,7 +2124,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } } - memset(&tex, 0, sizeof(struct r600_bc_tex)); + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); tex.inst = opcode; tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); @@ -2167,7 +2171,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) tex.src_sel_w = tex.src_sel_z; - r = r600_bc_add_tex(ctx->bc, &tex); + r = r600_bytecode_add_tex(ctx->bc, &tex); if (r) return r; @@ -2178,7 +2182,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; @@ -2189,17 +2193,17 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); - r600_bc_src(&alu.src[0], &ctx->src[1], i); - r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[0], &ctx->src[1], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); alu.omod = 3; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2211,19 +2215,19 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - r600_bc_src(&alu.src[1], &ctx->src[0], i); - r600_bc_src_toggle_neg(&alu.src[1]); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src_toggle_neg(&alu.src[1]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { alu.last = 1; } alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2233,18 +2237,18 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { alu.last = 1; } alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2254,11 +2258,11 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; - r600_bc_src(&alu.src[0], &ctx->src[0], i); - r600_bc_src(&alu.src[1], &ctx->src[1], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; @@ -2267,7 +2271,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2277,7 +2281,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -2285,18 +2289,18 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); - r600_bc_src(&alu.src[0], &ctx->src[0], i); - r600_bc_src(&alu.src[1], &ctx->src[2], i); - r600_bc_src(&alu.src[2], &ctx->src[1], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[2], &ctx->src[1], i); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; if (i == lasti) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2308,7 +2312,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; static const unsigned int src0_swizzle[] = {2, 0, 1}; static const unsigned int src1_swizzle[] = {1, 2, 0}; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; uint32_t use_temp = 0; int i, r; @@ -2316,11 +2320,11 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) use_temp = 1; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); if (i < 3) { - r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); - r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; @@ -2334,18 +2338,18 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); if (i < 3) { - r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); - r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); + r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; @@ -2366,7 +2370,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) alu.is_op3 = 1; if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2378,22 +2382,22 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; int i; /* result.x = 2^floor(src); */ if (inst->Dst[0].Register.WriteMask & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -2409,7 +2413,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2422,7 +2426,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.dst.chan = 0; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2430,10 +2434,10 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.y = tmp - floor(tmp); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; #if 0 @@ -2446,7 +2450,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2455,9 +2459,9 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2466,14 +2470,14 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2481,7 +2485,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2489,7 +2493,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.w = 1.0;*/ if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; @@ -2499,7 +2503,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.dst.chan = 3; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2509,7 +2513,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) static int tgsi_log(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; int i; @@ -2517,11 +2521,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & 1) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2529,23 +2533,23 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2559,7 +2563,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2569,11 +2573,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2582,28 +2586,28 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); alu.src[0].sel = ctx->temp_reg; @@ -2614,13 +2618,13 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2632,12 +2636,12 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2647,14 +2651,14 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2666,12 +2670,12 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2681,17 +2685,17 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2701,7 +2705,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2710,11 +2714,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; if (i == 2) @@ -2723,23 +2727,23 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.dst.chan = 2; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2747,7 +2751,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.w = 1.0; */ if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; @@ -2758,7 +2762,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2769,10 +2773,10 @@ static int tgsi_log(struct r600_shader_ctx *ctx) static int tgsi_eg_arl(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: @@ -2786,11 +2790,11 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -2800,12 +2804,12 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) * between ARL and AR usage. The easy way to do that is to remove * the MOVA here, and load it for the first AR access after ar_reg * has been modified in each clause. */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; return 0; @@ -2814,19 +2818,19 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: memset(&alu, 0, sizeof(alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; memset(&alu, 0, sizeof(alu)); @@ -2836,18 +2840,18 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; break; case TGSI_OPCODE_ARR: memset(&alu, 0, sizeof(alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; break; default: @@ -2860,7 +2864,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->ar_reg; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; ctx->bc->cf_last->r6xx_uses_waterfall = 1; @@ -2870,11 +2874,11 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) static int tgsi_opdst(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r = 0; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2882,17 +2886,17 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); } if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - r600_bc_src(&alu.src[1], &ctx->src[1], i); + r600_bytecode_src(&alu.src[1], &ctx->src[1], i); } if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2901,10 +2905,10 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = opcode; alu.predicate = 1; @@ -2912,13 +2916,13 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; alu.last = 1; - r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); + r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); if (r) return r; return 0; @@ -2926,25 +2930,34 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) static int pops(struct r600_shader_ctx *ctx, int pops) { - int alu_pop = 3; - if (ctx->bc->cf_last) { - if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) - alu_pop = 0; - else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) - alu_pop = 1; - } - alu_pop += pops; - if (alu_pop == 1) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; - ctx->bc->force_add_cf = 1; - } else if (alu_pop == 2) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; - ctx->bc->force_add_cf = 1; - } else { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); + unsigned force_pop = ctx->bc->force_add_cf; + + if (!force_pop) { + int alu_pop = 3; + if (ctx->bc->cf_last) { + if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) + alu_pop = 0; + else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) + alu_pop = 1; + } + alu_pop += pops; + if (alu_pop == 1) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else if (alu_pop == 2) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else { + force_pop = 1; + } + } + + if (force_pop) { + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); ctx->bc->cf_last->pop_count = pops; ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; } + return 0; } @@ -3011,8 +3024,8 @@ static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) { struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; - sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, - sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); + sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, + sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); sp->mid[sp->num_mid] = ctx->bc->cf_last; sp->num_mid++; } @@ -3040,14 +3053,14 @@ static void fc_poplevel(struct r600_shader_ctx *ctx) #if 0 static int emit_return(struct r600_shader_ctx *ctx) { - r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); + r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); return 0; } static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) { - r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); + r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); ctx->bc->cf_last->pop_count = pops; /* TODO work out offset */ return 0; @@ -3076,7 +3089,7 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) { emit_testflag(ctx); - r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, fc_sp); @@ -3089,7 +3102,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx) { emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); fc_pushlevel(ctx, FC_IF); @@ -3099,7 +3112,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx) static int tgsi_else(struct r600_shader_ctx *ctx) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, ctx->bc->fc_sp); @@ -3129,7 +3142,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx) static int tgsi_bgnloop(struct r600_shader_ctx *ctx) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); fc_pushlevel(ctx, FC_LOOP); @@ -3142,7 +3155,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx) { int i; - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { R600_ERR("loop/endloop in shader code are not paired.\n"); @@ -3182,7 +3195,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) return -EINVAL; } - r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, fscp); @@ -3228,7 +3241,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, @@ -3353,6 +3366,18 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, }; @@ -3386,7 +3411,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, @@ -3511,6 +3536,18 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, }; @@ -3544,7 +3581,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, @@ -3669,5 +3706,17 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, }; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 3ba84bd8907..ada369ade68 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -37,9 +37,10 @@ struct r600_shader_io { struct r600_shader { unsigned processor_type; - struct r600_bc bc; + struct r600_bytecode bc; unsigned ninput; unsigned noutput; + unsigned npos; unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 01406f2bad6..fba2af8a6ac 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -662,19 +662,19 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); r600_pipe_state_add_reg(&state, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &state); } } @@ -689,10 +689,10 @@ static void r600_set_blend_color(struct pipe_context *ctx, return; rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; r600_context_pipe_state_set(&rctx->ctx, rstate); @@ -742,7 +742,7 @@ static void *r600_create_blend_state(struct pipe_context *ctx, blend->cb_target_mask = target_mask; /* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFD, NULL); + color_control, 0xFFFFFFFD, NULL, 0); for (int i = 0; i < 8; i++) { /* state->rt entries > 0 only written if independent blending */ @@ -773,9 +773,9 @@ static void *r600_create_blend_state(struct pipe_context *ctx, /* R600 does not support per-MRT blends */ if (rctx->family > CHIP_R600) - r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL, 0); if (i == 0) - r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL, 0); } return rstate; } @@ -842,28 +842,28 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, stencil_ref_mask, - 0xFFFFFFFF & C_028430_STENCILREF, NULL); + 0xFFFFFFFF & C_028430_STENCILREF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, - 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0); /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by * r600_pipe_shader_ps().*/ - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); - r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -907,7 +907,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); } } - r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0); polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL); @@ -921,33 +921,33 @@ static void *r600_create_rs_state(struct pipe_context *ctx, S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | S_028814_POLY_MODE(polygon_dual_mode) | S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); /* point size 12.4 fixed point */ tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0); tmp = (unsigned)state->line_width * 8; - r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -977,17 +977,17 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) | S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL, 0); if (uc.ui) { - r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0); } return rstate; } @@ -996,10 +996,9 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); + struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view); struct r600_pipe_resource_state *rstate; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; + struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture; struct r600_resource *rbuffer; unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; @@ -1007,43 +1006,42 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct r600_bo *bo[2]; unsigned width, height, depth, offset_level, last_level; - if (resource == NULL) + if (view == NULL) return NULL; - rstate = &resource->state; + rstate = &view->state; /* initialize base object */ - resource->base = *state; - resource->base.texture = NULL; + view->base = *state; + view->base.texture = NULL; pipe_reference(NULL, &texture->reference); - resource->base.texture = texture; - resource->base.reference.count = 1; - resource->base.context = ctx; + view->base.texture = texture; + view->base.reference.count = 1; + view->base.context = ctx; swizzle[0] = state->swizzle_r; swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { format = 0; } - desc = util_format_description(state->format); - if (desc == NULL) { - R600_ERR("unknown format %d\n", state->format); - } - tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; } + endian = r600_colorformat_endian_swap(format); if (tmp->force_int_type) { word4 &= C_038010_NUM_FORMAT_ALL; word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); } + rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; @@ -1068,6 +1066,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->bo[0] = bo[0]; rstate->bo[1] = bo[1]; + rstate->bo_usage[0] = RADEON_USAGE_READ; + rstate->bo_usage[1] = RADEON_USAGE_READ; rstate->val[0] = (S_038000_DIM(r600_tex_dim(texture->target)) | S_038000_TILE_MODE(array_mode) | @@ -1077,8 +1077,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) | S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format)); - rstate->val[2] = (tmp->offset[offset_level] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[offset_level+1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = tmp->offset[offset_level] >> 8; + rstate->val[3] = tmp->offset[offset_level+1] >> 8; rstate->val[4] = (word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_038010_REQUEST_SIZE(1) | @@ -1090,7 +1090,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) | S_038018_MAX_ANISO(4 /* max 16 samples */)); - return &resource->base; + return &view->base; } static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, @@ -1157,7 +1157,7 @@ static void r600_set_seamless_cubemap(struct r600_pipe_context *rctx, boolean en rstate->id = R600_PIPE_STATE_SEAMLESS_CUBEMAP; r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, (enable ? 0 : S_009508_DISABLE_CUBE_WRAP(1)), - 1, NULL); + 1, NULL, 0); free(rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP]); rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP] = rstate; @@ -1215,21 +1215,21 @@ static void r600_set_clip_state(struct pipe_context *ctx, for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, R_028E20_PA_CL_UCP0_X + i * 16, - fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028E24_PA_CL_UCP0_Y + i * 16, - fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028E28_PA_CL_UCP0_Z + i * 16, - fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028E2C_PA_CL_UCP0_W + i * 16, - fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_CLIP]); rctx->states[R600_PIPE_STATE_CLIP] = rstate; @@ -1260,28 +1260,28 @@ static void r600_set_scissor_state(struct pipe_context *ctx, br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -1303,11 +1303,11 @@ static void r600_set_stencil_ref(struct pipe_context *ctx, tmp = S_028430_STENCILREF(state->ref_value[0]); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, tmp, - ~C_028430_STENCILREF, NULL); + ~C_028430_STENCILREF, NULL, 0); tmp = S_028434_STENCILREF_BF(state->ref_value[1]); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, tmp, - ~C_028434_STENCILREF_BF, NULL); + ~C_028434_STENCILREF_BF, NULL, 0); free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; @@ -1325,15 +1325,15 @@ static void r600_set_viewport_state(struct pipe_context *ctx, rctx->viewport = *state; rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_VIEWPORT]); rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; @@ -1441,27 +1441,27 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, - (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_0280A0_CB_COLOR0_INFO + cb * 4, - color_info, 0xFFFFFFFF, bo[0]); + color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028060_CB_COLOR0_SIZE + cb * 4, S_028060_PITCH_TILE_MAX(pitch) | S_028060_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028080_CB_COLOR0_VIEW + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0280E0_CB_COLOR0_FRAG + cb * 4, - r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]); + 0, 0xFFFFFFFF, bo[1], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_0280C0_CB_COLOR0_TILE + cb * 4, - r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]); + 0, 0xFFFFFFFF, bo[2], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028100_CB_COLOR0_MASK + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); } static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, @@ -1492,16 +1492,16 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO, S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format), - 0xFFFFFFFF, rbuffer->bo); + 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, - (surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL); + (surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL, 0); } static void r600_set_framebuffer_state(struct pipe_context *ctx, @@ -1546,59 +1546,59 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028244_PA_SC_GENERIC_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL, - shader_control, 0xFFFFFFFF, NULL); + shader_control, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, - 0x00000000, target_mask, NULL); + 0x00000000, target_mask, NULL, 0); r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, - shader_mask, 0xFFFFFFFF, NULL); + shader_mask, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C30_CB_CLRCMP_CONTROL, - 0x01000000, 0xFFFFFFFF, NULL); + 0x01000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C34_CB_CLRCMP_SRC, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C38_CB_CLRCMP_DST, - 0x000000FF, 0xFFFFFFFF, NULL); + 0x000000FF, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C3C_CB_CLRCMP_MSK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C48_PA_SC_AA_MASK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; @@ -1674,7 +1674,7 @@ void r600_adjust_gprs(struct r600_pipe_context *rctx) if (rctx->chip_class >= EVERGREEN) return; - if (!rctx->ps_shader && !rctx->vs_shader) + if (!rctx->ps_shader || !rctx->vs_shader) return; if (rctx->ps_shader->shader.bc.ngpr > rctx->default_ps_gprs) @@ -1695,7 +1695,7 @@ void r600_adjust_gprs(struct r600_pipe_context *rctx) tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); rstate.nregs = 0; - r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL); + r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &rstate); } @@ -1866,20 +1866,20 @@ void r600_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_VS_PRIO(vs_prio); tmp |= S_008C00_GS_PRIO(gs_prio); tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_GPR_RESOURCE_MGMT_1 */ tmp = 0; tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_GPR_RESOURCE_MGMT_2 */ tmp = 0; tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_THREAD_RESOURCE_MGMT */ tmp = 0; @@ -1887,78 +1887,78 @@ void r600_init_config(struct r600_pipe_context *rctx) tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_STACK_RESOURCE_MGMT_1 */ tmp = 0; tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_STACK_RESOURCE_MGMT_2 */ tmp = 0; tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL, 0); if (rctx->chip_class >= R700) { - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, S_009508_DISABLE_CUBE_ANISO(1) | S_009508_SYNC_GRADIENT(1) | S_009508_SYNC_WALKER(1) | - S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL, 0); } else { - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, S_009508_DISABLE_CUBE_ANISO(1) | S_009508_SYNC_GRADIENT(1) | S_009508_SYNC_WALKER(1) | - S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL); - } - r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL); + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL, 0); + } + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -2022,38 +2022,38 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shad S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); } - r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_028850_SQ_PGM_RESOURCES_PS, S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028854_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); + exports_ps, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288CC_SQ_PGM_CF_OFFSET_PS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all), S_028808_MULTIWRITE_ENABLE(1), - NULL); + NULL, 0); /* only set some bits here, the other bits are set in the dsa state */ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, S_02880C_Z_EXPORT_ENABLE(1) | S_02880C_STENCIL_REF_EXPORT_ENABLE(1) | S_02880C_KILL_ENABLE(1), - NULL); + NULL, 0); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -2062,7 +2062,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; - unsigned i, tmp; + unsigned i, tmp, nparams; /* clear previous register */ rstate->nregs = 0; @@ -2081,28 +2081,36 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad for (i = 0; i < 10; i++) { r600_pipe_state_add_reg(rstate, R_028614_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); + spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0); } + /* Certain attributes (position, psize, etc.) don't count as params. + * VS is required to export at least one param and r600_shader_from_tgsi() + * takes care of adding a dummy export. + */ + nparams = rshader->noutput - rshader->npos; + if (nparams < 1) + nparams = 1; + r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); + S_0286C4_VS_EXPORT_COUNT(nparams - 1), + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028868_SQ_PGM_RESOURCES_VS, S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288D0_SQ_PGM_CF_OFFSET_VS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028858_SQ_PGM_START_VS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void r600_fetch_shader(struct pipe_context *ctx, @@ -2115,12 +2123,12 @@ void r600_fetch_shader(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); + 0, + 0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ); } void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) @@ -2149,7 +2157,7 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, 0x0, - S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0); r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, S_028D0C_DEPTH_COPY_ENABLE(1) | @@ -2157,7 +2165,7 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028D0C_COPY_CENTROID(1), S_028D0C_DEPTH_COPY_ENABLE(1) | S_028D0C_STENCIL_COPY_ENABLE(1) | - S_028D0C_COPY_CENTROID(1), NULL); + S_028D0C_COPY_CENTROID(1), NULL, 0); return rstate; } @@ -2178,10 +2186,12 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + unsigned offset, unsigned stride, + enum radeon_bo_usage usage) { rstate->val[0] = offset; rstate->bo[0] = rbuffer->bo; + rstate->bo_usage[0] = usage; rstate->val[1] = rbuffer->bo_size - offset - 1; rstate->val[2] = S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | S_038008_STRIDE(stride); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 408eaed491b..853458f0156 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -150,7 +150,7 @@ void r600_delete_state(struct pipe_context *ctx, void *state) rctx->states[rstate->id] = NULL; } for (int i = 0; i < rstate->nregs; i++) { - r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + r600_bo_reference(&rstate->regs[i].bo, NULL); } free(rstate); } @@ -181,7 +181,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state) if (rctx->vertex_elements == state) rctx->vertex_elements = NULL; - r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL); + r600_bo_reference(&v->fetch_shader, NULL); u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements); FREE(state); } @@ -336,7 +336,7 @@ static void r600_update_alpha_ref(struct r600_pipe_context *rctx) rstate.nregs = 0; if (rctx->export_16bpc) alpha_ref &= ~0x1FFF; - r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &rstate); rctx->alpha_ref_dirty = false; @@ -349,7 +349,7 @@ static void r600_spi_block_init(struct r600_pipe_context *rctx, struct r600_pipe rstate->nregs = 0; rstate->id = R600_PIPE_STATE_SPI; for (i = 0; i < 32; i++) { - r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL, 0); } } @@ -418,7 +418,6 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } r600_upload_const_buffer(rctx, &rbuffer, &offset); - offset += r600_bo_offset(rbuffer->r.bo); switch (shader) { case PIPE_SHADER_VERTEX: @@ -426,10 +425,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); rstate = &rctx->vs_const_buffer_resource[index]; @@ -442,10 +441,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { - r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } break; @@ -454,10 +453,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); rstate = &rctx->ps_const_buffer_resource[index]; @@ -469,10 +468,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { - r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } break; @@ -518,7 +517,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } if (vertex_buffer == NULL || rbuffer == NULL) continue; - offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + offset += vertex_buffer->buffer_offset; if (!rstate->id) { if (rctx->chip_class >= EVERGREEN) { @@ -529,10 +528,10 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { - r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); + r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } } @@ -615,16 +614,18 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (rctx->vgt.id != R600_PIPE_STATE_VGT) { rctx->vgt.id = R600_PIPE_STATE_VGT; rctx->vgt.nregs = 0; - r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, draw.info.restart_index, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, draw.info.primitive_restart, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->vgt, R_028814_PA_SU_SC_MODE_CNTL, 0, - S_028814_PROVOKING_VTX_LAST(1), NULL); + S_028814_PROVOKING_VTX_LAST(1), NULL, 0); } @@ -634,6 +635,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_pipe_state_mod_reg(&rctx->vgt, draw.info.max_index); r600_pipe_state_mod_reg(&rctx->vgt, draw.info.min_index); r600_pipe_state_mod_reg(&rctx->vgt, draw.info.index_bias); + r600_pipe_state_mod_reg(&rctx->vgt, draw.info.restart_index); + r600_pipe_state_mod_reg(&rctx->vgt, draw.info.primitive_restart); r600_pipe_state_mod_reg(&rctx->vgt, 0); r600_pipe_state_mod_reg(&rctx->vgt, draw.info.start_instance); if (draw.info.mode == PIPE_PRIM_QUADS || draw.info.mode == PIPE_PRIM_QUAD_STRIP || draw.info.mode == PIPE_PRIM_POLYGON) { @@ -676,11 +679,14 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, u32 range_id, u32 block_id, - struct r600_bo *bo) + struct r600_bo *bo, + enum radeon_bo_usage usage) { struct r600_range *range; struct r600_block *block; + if (bo) assert(usage); + range = &ctx->range[range_id]; block = range->blocks[block_id]; state->regs[state->nregs].block = block; @@ -689,6 +695,7 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, state->regs[state->nregs].value = value; state->regs[state->nregs].mask = mask; state->regs[state->nregs].bo = bo; + state->regs[state->nregs].bo_usage = usage; state->nregs++; assert(state->nregs < R600_BLOCK_MAX_REG); @@ -696,13 +703,17 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, - struct r600_bo *bo) + struct r600_bo *bo, + enum radeon_bo_usage usage) { + if (bo) assert(usage); + state->regs[state->nregs].id = offset; state->regs[state->nregs].block = NULL; state->regs[state->nregs].value = value; state->regs[state->nregs].mask = mask; state->regs[state->nregs].bo = bo; + state->regs[state->nregs].bo_usage = usage; state->nregs++; assert(state->nregs < R600_BLOCK_MAX_REG); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index e9e8b277243..7c1bd9d8ec6 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -31,7 +31,6 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> -#include "state_tracker/drm_driver.h" #include "pipebuffer/pb_buffer.h" #include "r600_pipe.h" #include "r600_resource.h" @@ -67,7 +66,7 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 rtransfer->staging_texture, 0, &sbox); - ctx->flush(ctx, NULL); + r600_flush(ctx, NULL, RADEON_FLUSH_ASYNC); } unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, @@ -174,15 +173,15 @@ static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen, { struct pipe_resource *ptex = &rtex->resource.b.b.b; unsigned nblocksx, block_align, width; - unsigned blocksize = util_format_get_blocksize(ptex->format); + unsigned blocksize = util_format_get_blocksize(rtex->real_format); if (rtex->pitch_override) return rtex->pitch_override / blocksize; width = mip_minify(ptex->width0, level); - nblocksx = util_format_get_nblocksx(ptex->format, width); + nblocksx = util_format_get_nblocksx(rtex->real_format, width); - block_align = r600_get_block_alignment(screen, ptex->format, + block_align = r600_get_block_alignment(screen, rtex->real_format, rtex->array_mode[level]); nblocksx = align(nblocksx, block_align); return nblocksx; @@ -196,9 +195,19 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, unsigned height, tile_height; height = mip_minify(ptex->height0, level); - height = util_format_get_nblocksy(ptex->format, height); + height = util_format_get_nblocksy(rtex->real_format, height); tile_height = r600_get_height_alignment(screen, rtex->array_mode[level]); + + /* XXX Hack around an alignment issue. Less tests fail with this. + * + * The thing is depth-stencil buffers should be tiled, i.e. + * the alignment should be >=8. If I make them tiled, stencil starts + * working because it no longer overlaps with the depth buffer + * in memory, but texturing like drawpix-stencil breaks. */ + if (util_format_is_depth_or_stencil(rtex->real_format) && tile_height < 8) + tile_height = 8; + height = align(height, tile_height); return height; } @@ -221,7 +230,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, unsigned w, h, tile_height, tile_width; tile_height = r600_get_height_alignment(screen, array_mode); - tile_width = r600_get_block_alignment(screen, ptex->format, array_mode); + tile_width = r600_get_block_alignment(screen, rtex->real_format, array_mode); w = mip_minify(ptex->width0, level); h = mip_minify(ptex->height0, level); @@ -239,14 +248,14 @@ static void r600_setup_miptree(struct pipe_screen *screen, unsigned array_mode) { struct pipe_resource *ptex = &rtex->resource.b.b.b; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; enum chip_class chipc = r600_get_family_class(radeon); unsigned size, layer_size, i, offset; - unsigned nblocksx, nblocksy, extra_size = 0; + unsigned nblocksx, nblocksy; for (i = 0, offset = 0; i <= ptex->last_level; i++) { - unsigned blocksize = util_format_get_blocksize(ptex->format); - unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode); + unsigned blocksize = util_format_get_blocksize(rtex->real_format); + unsigned base_align = r600_get_base_alignment(screen, rtex->real_format, array_mode); r600_texture_set_array_mode(screen, rtex, i, array_mode); @@ -265,10 +274,6 @@ static void r600_setup_miptree(struct pipe_screen *screen, else size = layer_size * ptex->array_size; - /* evergreen stores depth and stencil separately */ - if ((chipc >= EVERGREEN) && util_format_is_depth_or_stencil(ptex->format)) - extra_size = align(extra_size + (nblocksx * nblocksy * 1), base_align); - /* align base image and start of miptree */ if ((i == 0) || (i == 1)) offset = align(offset, base_align); @@ -279,7 +284,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, offset += size; } - rtex->size = offset + extra_size; + rtex->size = offset; } /* Figure out whether u_blitter will fallback to a transfer operation. @@ -329,7 +334,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; return r600_bo_get_winsys_handle(radeon, resource->bo, rtex->pitch_in_bytes[0], whandle); @@ -340,13 +345,12 @@ static void r600_texture_destroy(struct pipe_screen *screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; if (rtex->flushed_depth_texture) pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); if (resource->bo) { - r600_bo_reference(radeon, &resource->bo, NULL); + r600_bo_reference(&resource->bo, NULL); } FREE(rtex); } @@ -369,11 +373,12 @@ r600_texture_create_object(struct pipe_screen *screen, unsigned array_mode, unsigned pitch_in_bytes_override, unsigned max_buffer_size, - struct r600_bo *bo) + struct r600_bo *bo, + boolean alloc_bo) { struct r600_resource_texture *rtex; struct r600_resource *resource; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; rtex = CALLOC_STRUCT(r600_resource_texture); if (rtex == NULL) @@ -386,59 +391,112 @@ r600_texture_create_object(struct pipe_screen *screen, resource->b.b.b.screen = screen; resource->bo = bo; rtex->pitch_override = pitch_in_bytes_override; + rtex->real_format = base->format; + + /* We must split depth and stencil into two separate buffers on Evergreen. */ + if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) && + r600_get_family_class(((struct r600_screen*)screen)->radeon) >= EVERGREEN && + util_format_is_depth_and_stencil(base->format)) { + struct pipe_resource stencil; + unsigned stencil_pitch_override = 0; + + switch (base->format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + rtex->real_format = PIPE_FORMAT_Z24X8_UNORM; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + rtex->real_format = PIPE_FORMAT_X8Z24_UNORM; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + rtex->real_format = PIPE_FORMAT_Z32_FLOAT; + break; + default: + assert(0); + FREE(rtex); + return NULL; + } + + /* Divide the pitch in bytes by 4 for stencil, because it has a smaller pixel size. */ + if (pitch_in_bytes_override) { + assert(base->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || + base->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); + stencil_pitch_override = pitch_in_bytes_override / 4; + } + + /* Allocate the stencil buffer. */ + stencil = *base; + stencil.format = PIPE_FORMAT_S8_USCALED; + rtex->stencil = r600_texture_create_object(screen, &stencil, array_mode, + stencil_pitch_override, + max_buffer_size, NULL, FALSE); + if (!rtex->stencil) { + FREE(rtex); + return NULL; + } + /* Proceed in creating the depth buffer. */ + } + /* only mark depth textures the HW can hit as depth textures */ - if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base)) + if (util_format_is_depth_or_stencil(rtex->real_format) && permit_hardware_blit(screen, base)) rtex->depth = 1; r600_setup_miptree(screen, rtex, array_mode); + /* If we initialized separate stencil for Evergreen. place it after depth. */ + if (rtex->stencil) { + unsigned stencil_align, stencil_offset; + + stencil_align = r600_get_base_alignment(screen, rtex->stencil->real_format, array_mode); + stencil_offset = align(rtex->size, stencil_align); + + for (unsigned i = 0; i <= rtex->stencil->resource.b.b.b.last_level; i++) + rtex->stencil->offset[i] += stencil_offset; + + rtex->size = stencil_offset + rtex->stencil->size; + } + resource->size = rtex->size; - if (!resource->bo) { + /* Now create the backing buffer. */ + if (!resource->bo && alloc_bo) { struct pipe_resource *ptex = &rtex->resource.b.b.b; - int base_align = r600_get_base_alignment(screen, ptex->format, array_mode); + unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode); resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage); if (!resource->bo) { + pipe_resource_reference((struct pipe_resource**)&rtex->stencil, NULL); FREE(rtex); return NULL; } } + + if (rtex->stencil) + rtex->stencil->resource.bo = rtex->resource.bo; return rtex; } +DEBUG_GET_ONCE_BOOL_OPTION(tiling_enabled, "R600_TILING", FALSE); + struct pipe_resource *r600_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ) { + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; unsigned array_mode = 0; - static int force_tiling = -1; - /* Would like some magic "get_bool_option_once" routine. - */ - if (force_tiling == -1) { -#if 0 - /* reenable when 2D tiling is fixed better */ - struct r600_screen *rscreen = (struct r600_screen *)screen; - if (r600_get_minor_version(rscreen->radeon) >= 9) - force_tiling = debug_get_bool_option("R600_TILING", TRUE); -#endif - force_tiling = debug_get_bool_option("R600_TILING", FALSE); - } - - if (force_tiling && permit_hardware_blit(screen, templ)) { - if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && - !(templ->bind & PIPE_BIND_SCANOUT)) { + if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && + !(templ->bind & PIPE_BIND_SCANOUT)) { + if (util_format_is_compressed(templ->format)) { + array_mode = V_038000_ARRAY_1D_TILED_THIN1; + } + else if (debug_get_option_tiling_enabled() && + r600_get_minor_version(radeon) >= 9 && + permit_hardware_blit(screen, templ)) { array_mode = V_038000_ARRAY_2D_TILED_THIN1; } } - if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && - util_format_is_compressed(templ->format)) - array_mode = V_038000_ARRAY_1D_TILED_THIN1; - return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, - 0, 0, NULL); - + 0, 0, NULL, TRUE); } static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, @@ -483,8 +541,9 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle) { - struct radeon *rw = (struct radeon*)screen->winsys; + struct radeon *rw = ((struct r600_screen*)screen)->radeon; struct r600_bo *bo = NULL; + unsigned stride = 0; unsigned array_mode = 0; /* Support only 2D textures without mipmaps */ @@ -492,15 +551,13 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, templ->depth0 != 1 || templ->last_level != 0) return NULL; - bo = r600_bo_handle(rw, whandle->handle, &array_mode); + bo = r600_bo_handle(rw, whandle, &stride, &array_mode); if (bo == NULL) { return NULL; } return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, - whandle->stride, - 0, - bo); + stride, 0, bo, FALSE); } int r600_texture_depth_flush(struct pipe_context *ctx, @@ -590,6 +647,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, (texture->flags & R600_RESOURCE_FLAG_TRANSFER)) use_staging_texture = FALSE; + if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) + return NULL; + trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) return NULL; @@ -648,7 +708,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ - ctx->flush(ctx, NULL); + r600_flush(ctx, NULL, 0); } return &trans->transfer; } @@ -684,10 +744,11 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct r600_bo *bo; enum pipe_format format = transfer->resource->format; - struct radeon *radeon = (struct radeon *)ctx->screen->winsys; + struct radeon *radeon = rctx->screen->radeon; unsigned offset = 0; char *map; @@ -706,7 +767,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } - if (!(map = r600_bo_map(radeon, bo, transfer->usage, ctx))) { + if (!(map = r600_bo_map(radeon, bo, rctx->ctx.cs, transfer->usage))) { return NULL; } @@ -717,7 +778,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct radeon *radeon = (struct radeon *)ctx->screen->winsys; + struct radeon *radeon = ((struct r600_screen*)ctx->screen)->radeon; struct r600_bo *bo; if (rtransfer->staging_texture) { @@ -754,11 +815,7 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, }; if (swizzle_view) { - /* Combine two sets of swizzles. */ - for (i = 0; i < 4; i++) { - swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? - swizzle_format[swizzle_view[i]] : swizzle_view[i]; - } + util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); } else { memcpy(swizzle, swizzle_format, 4); } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index f6eec24cc05..de458cf398a 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -28,6 +28,32 @@ #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 +/* evergreen values */ +#define EG_RESOURCE_OFFSET 0x00030000 +#define EG_RESOURCE_END 0x00034000 +#define EG_LOOP_CONST_OFFSET 0x0003A200 +#define EG_LOOP_CONST_END 0x0003A26C +#define EG_BOOL_CONST_OFFSET 0x0003A500 +#define EG_BOOL_CONST_END 0x0003A506 + +#define R600_CONFIG_REG_OFFSET 0X00008000 +#define R600_CONFIG_REG_END 0X0000AC00 +#define R600_CONTEXT_REG_OFFSET 0X00028000 +#define R600_CONTEXT_REG_END 0X00029000 +#define R600_ALU_CONST_OFFSET 0X00030000 +#define R600_ALU_CONST_END 0X00032000 +#define R600_RESOURCE_OFFSET 0X00038000 +#define R600_RESOURCE_END 0X0003C000 +#define R600_SAMPLER_OFFSET 0X0003C000 +#define R600_SAMPLER_END 0X0003CFF0 +#define R600_CTL_CONST_OFFSET 0X0003CFF0 +#define R600_CTL_CONST_END 0X0003E200 +#define R600_LOOP_CONST_OFFSET 0X0003E200 +#define R600_LOOP_CONST_END 0X0003E380 +#define R600_BOOL_CONST_OFFSET 0X0003E380 +#define R600_BOOL_CONST_END 0X00040000 + + #define PKT3_NOP 0x10 #define PKT3_INDIRECT_BUFFER_END 0x17 #define PKT3_SET_PREDICATION 0x20 @@ -66,11 +92,38 @@ #define PKT3_SET_SAMPLER 0x6E #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define SURFACE_BASE_UPDATE_DEPTH (1 << 0) +#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x)) +#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) + +#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 +#define EVENT_TYPE_ZPASS_DONE 0x15 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + * 5 - TS events + */ #define PREDICATION_OP_CLEAR 0x0 #define PREDICATION_OP_ZPASS 0x1 #define PREDICATION_OP_PRIMCOUNT 0x2 +#define PRED_OP(x) ((x) << 16) + +#define PREDICATION_CONTINUE (1 << 31) + +#define PREDICATION_HINT_WAIT (0 << 12) +#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) + +#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) +#define PREDICATION_DRAW_VISIBLE (1 << 8) + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF @@ -83,8 +136,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PRED_S(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate)) /* Registers */ #define R_008C00_SQ_CONFIG 0x00008C00 diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index b3c7d1494fc..74efe226530 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -26,7 +26,7 @@ #include "r600_asm.h" #include "r700_sq.h" -void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) { unsigned count = (cf->ndw / 4) - 1; *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); @@ -36,7 +36,7 @@ void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) S_SQ_CF_WORD1_COUNT_3(count >> 3); } -int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) { bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | diff --git a/src/gallium/drivers/softpipe/Android.mk b/src/gallium/drivers/softpipe/Android.mk new file mode 100644 index 00000000000..d198fa5d0f2 --- /dev/null +++ b/src/gallium/drivers/softpipe/Android.mk @@ -0,0 +1,67 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# from Makefile +C_SOURCES = \ + sp_fs_exec.c \ + sp_fs_sse.c \ + sp_clear.c \ + sp_fence.c \ + sp_flush.c \ + sp_query.c \ + sp_context.c \ + sp_draw_arrays.c \ + sp_prim_vbuf.c \ + sp_quad_pipe.c \ + sp_quad_stipple.c \ + sp_quad_depth_test.c \ + sp_quad_fs.c \ + sp_quad_blend.c \ + sp_screen.c \ + sp_setup.c \ + sp_state_blend.c \ + sp_state_clip.c \ + sp_state_derived.c \ + sp_state_sampler.c \ + sp_state_shader.c \ + sp_state_so.c \ + sp_state_rasterizer.c \ + sp_state_surface.c \ + sp_state_vertex.c \ + sp_texture.c \ + sp_tex_sample.c \ + sp_tex_tile_cache.c \ + sp_tile_cache.c \ + sp_surface.c + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(C_SOURCES) + +LOCAL_MODULE := libmesa_pipe_softpipe + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index ae3f00f3387..22e8a2e5817 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -60,7 +60,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, return; #if 0 - softpipe_update_derived(softpipe); /* not needed?? */ + softpipe_update_derived(softpipe, PIPE_PRIM_TRIANGLES); /* not needed?? */ #endif if (buffers & PIPE_CLEAR_COLOR) { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 2c43602ea1c..c97b0333035 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -35,6 +35,7 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" #include "util/u_inlines.h" #include "tgsi/tgsi_exec.h" #include "vl/vl_decoder.h" @@ -90,6 +91,14 @@ softpipe_destroy( struct pipe_context *pipe ) struct softpipe_context *softpipe = softpipe_context( pipe ); uint i; +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->pstipple.sampler) + pipe->delete_sampler_state(pipe, softpipe->pstipple.sampler); + + pipe_resource_reference(&softpipe->pstipple.texture, NULL); + pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, NULL); +#endif + if (softpipe->draw) draw_destroy( softpipe->draw ); @@ -346,6 +355,11 @@ softpipe_create_context( struct pipe_screen *screen, sp_init_surface_functions(softpipe); +#if DO_PSTIPPLE_IN_HELPER_MODULE + /* create the polgon stipple sampler */ + softpipe->pstipple.sampler = util_pstipple_create_sampler(&softpipe->pipe); +#endif + return &softpipe->pipe; fail: diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a572ee8cf00..410b0a65792 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -38,8 +38,11 @@ #include "sp_quad_pipe.h" -/** Do polygon stipple in the driver here, or in the draw module? */ -#define DO_PSTIPPLE_IN_DRAW_MODULE 1 +/** Do polygon stipple in the draw module? */ +#define DO_PSTIPPLE_IN_DRAW_MODULE 0 + +/** Do polygon stipple with the util module? */ +#define DO_PSTIPPLE_IN_HELPER_MODULE 1 struct softpipe_vbuf_render; @@ -64,6 +67,7 @@ struct softpipe_context { struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; + struct sp_fragment_shader_variant *fs_variant; struct sp_vertex_shader *vs; struct sp_geometry_shader *gs; struct sp_velems_state *velems; @@ -143,6 +147,13 @@ struct softpipe_context { struct pipe_query *render_cond_query; uint render_cond_mode; + /** Polygon stipple items */ + struct { + struct pipe_resource *texture; + struct pipe_sampler_state *sampler; + struct pipe_sampler_view *sampler_view; + } pstipple; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 01b4ca985d0..69b5b96b4fd 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -64,7 +64,7 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) sp->reduced_api_prim = u_reduced_prim(mode); if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } softpipe_map_transfers(sp); @@ -122,7 +122,7 @@ softpipe_draw_vbo(struct pipe_context *pipe, sp->reduced_api_prim = u_reduced_prim(info->mode); if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } softpipe_map_transfers(sp); diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h index 4792ace3a33..d46d7d5a657 100644 --- a/src/gallium/drivers/softpipe/sp_fs.h +++ b/src/gallium/drivers/softpipe/sp_fs.h @@ -31,17 +31,15 @@ #ifndef SP_FS_H #define SP_FS_H -struct sp_fragment_shader * -softpipe_create_fs_exec(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); -struct sp_fragment_shader * -softpipe_create_fs_llvm(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); struct tgsi_interp_coef; struct tgsi_exec_vector; diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 346e1b402ba..779b8c4995c 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -42,25 +42,25 @@ /** - * Subclass of sp_fragment_shader + * Subclass of sp_fragment_shader_variant */ struct sp_exec_fragment_shader { - struct sp_fragment_shader base; + struct sp_fragment_shader_variant base; /* No other members for now */ }; /** cast wrapper */ static INLINE struct sp_exec_fragment_shader * -sp_exec_fragment_shader(const struct sp_fragment_shader *base) +sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var) { - return (struct sp_exec_fragment_shader *) base; + return (struct sp_exec_fragment_shader *) var; } static void -exec_prepare( const struct sp_fragment_shader *base, +exec_prepare( const struct sp_fragment_shader_variant *var, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers ) { @@ -68,9 +68,9 @@ exec_prepare( const struct sp_fragment_shader *base, * Bind tokens/shader to the interpreter's machine state. * Avoid redundant binding. */ - if (machine->Tokens != base->shader.tokens) { + if (machine->Tokens != var->tokens) { tgsi_exec_machine_bind_shader( machine, - base->shader.tokens, + var->tokens, PIPE_MAX_SAMPLERS, samplers ); } @@ -118,7 +118,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef, * interface: */ static unsigned -exec_run( const struct sp_fragment_shader *base, +exec_run( const struct sp_fragment_shader_variant *var, struct tgsi_exec_machine *machine, struct quad_header *quad ) { @@ -136,9 +136,9 @@ exec_run( const struct sp_fragment_shader *base, /* store outputs */ { - const ubyte *sem_name = base->info.output_semantic_name; - const ubyte *sem_index = base->info.output_semantic_index; - const uint n = base->info.num_outputs; + const ubyte *sem_name = var->info.output_semantic_name; + const ubyte *sem_index = var->info.output_semantic_index; + const uint n = var->info.num_outputs; uint i; for (i = 0; i < n; i++) { switch (sem_name[i]) { @@ -180,29 +180,23 @@ exec_run( const struct sp_fragment_shader *base, static void -exec_delete( struct sp_fragment_shader *base ) +exec_delete( struct sp_fragment_shader_variant *var ) { - FREE((void *) base->shader.tokens); - FREE(base); + FREE( (void *) var->tokens ); + FREE(var); } -struct sp_fragment_shader * -softpipe_create_fs_exec(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { struct sp_exec_fragment_shader *shader; - /* Decide whether we'll be codegenerating this shader and if so do - * that now. - */ - shader = CALLOC_STRUCT(sp_exec_fragment_shader); if (!shader) return NULL; - /* we need to keep a local copy of the tokens */ - shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); shader->base.prepare = exec_prepare; shader->base.run = exec_run; shader->base.delete = exec_delete; diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 5b18cd035e3..c873af125bd 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -48,11 +48,11 @@ /** - * Subclass of sp_fragment_shader + * Subclass of sp_fragment_shader_variant */ struct sp_sse_fragment_shader { - struct sp_fragment_shader base; + struct sp_fragment_shader_variant base; struct x86_function sse2_program; tgsi_sse2_fs_function func; float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; @@ -61,14 +61,14 @@ struct sp_sse_fragment_shader /** cast wrapper */ static INLINE struct sp_sse_fragment_shader * -sp_sse_fragment_shader(const struct sp_fragment_shader *base) +sp_sse_fragment_shader(const struct sp_fragment_shader_variant *base) { return (struct sp_sse_fragment_shader *) base; } static void -fs_sse_prepare( const struct sp_fragment_shader *base, +fs_sse_prepare( const struct sp_fragment_shader_variant *base, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers ) { @@ -119,7 +119,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef, * TODO: process >1 quad at a time */ static unsigned -fs_sse_run( const struct sp_fragment_shader *base, +fs_sse_run( const struct sp_fragment_shader_variant *base, struct tgsi_exec_machine *machine, struct quad_header *quad ) { @@ -189,7 +189,7 @@ fs_sse_run( const struct sp_fragment_shader *base, static void -fs_sse_delete( struct sp_fragment_shader *base ) +fs_sse_delete( struct sp_fragment_shader_variant *base ) { struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base); @@ -198,9 +198,9 @@ fs_sse_delete( struct sp_fragment_shader *base ) } -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { struct sp_sse_fragment_shader *shader; @@ -226,7 +226,6 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, return NULL; } - shader->base.shader.tokens = NULL; /* don't hold reference to templ->tokens */ shader->base.prepare = fs_sse_prepare; shader->base.run = fs_sse_run; shader->base.delete = fs_sse_delete; @@ -239,9 +238,9 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, /* Maybe put this variant in the header file. */ -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { return NULL; } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 76cfc0bf51c..c881194768a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -240,6 +240,7 @@ blend_quad(struct quad_stage *qs, static const float one[4] = { 1, 1, 1, 1 }; struct softpipe_context *softpipe = qs->softpipe; float source[4][QUAD_SIZE] = { { 0 } }; + float blend_dest[4][QUAD_SIZE]; /* * Compute src/first term RGB @@ -480,79 +481,85 @@ blend_quad(struct quad_stage *qs, assert(0 && "invalid alpha src factor"); } + /* Save the original dest for use in masking */ + VEC4_COPY(blend_dest[0], dest[0]); + VEC4_COPY(blend_dest[1], dest[1]); + VEC4_COPY(blend_dest[2], dest[2]); + VEC4_COPY(blend_dest[3], dest[3]); + /* - * Compute dest/second term RGB + * Compute blend_dest/second term RGB */ switch (softpipe->blend->rt[blend_index].rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */ break; case PIPE_BLENDFACTOR_DST_ALPHA: if (has_dst_alpha) { - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */ + VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */ + VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */ } else { - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ } break; case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: if (has_dst_alpha) { const float *alpha = quadColor[3]; float diff[4], temp[4]; - VEC4_SUB(diff, one, dest[3]); + VEC4_SUB(diff, one, blend_dest[3]); VEC4_MIN(temp, alpha, diff); - VEC4_MUL(dest[0], quadColor[0], temp); /* R */ - VEC4_MUL(dest[1], quadColor[1], temp); /* G */ - VEC4_MUL(dest[2], quadColor[2], temp); /* B */ + VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */ + VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */ + VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */ } else { - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ } break; case PIPE_BLENDFACTOR_CONST_COLOR: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ + VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ } break; case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ } break; case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ break; case PIPE_BLENDFACTOR_SRC1_COLOR: case PIPE_BLENDFACTOR_SRC1_ALPHA: @@ -563,45 +570,45 @@ blend_quad(struct quad_stage *qs, { float inv_comp[4]; VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ } break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */ } break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: if (has_dst_alpha) { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ + VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ } else { - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ } break; case PIPE_BLENDFACTOR_INV_DST_COLOR: { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */ + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */ } break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: @@ -609,22 +616,22 @@ blend_quad(struct quad_stage *qs, float inv_comp[4]; /* R */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* G */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* B */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(dest[2], dest[2], inv_comp); + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); } break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[0], dest[0], inv_comp); - VEC4_MUL(dest[1], dest[1], inv_comp); - VEC4_MUL(dest[2], dest[2], inv_comp); + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); } break; case PIPE_BLENDFACTOR_INV_SRC1_COLOR: @@ -637,29 +644,29 @@ blend_quad(struct quad_stage *qs, } /* - * Compute dest/second term A + * Compute blend_dest/second term A */ switch (softpipe->blend->rt[blend_index].alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_SRC_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */ break; case PIPE_BLENDFACTOR_DST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_DST_ALPHA: if (has_dst_alpha) { - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */ } else { - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ } break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_CONST_COLOR: /* fall-through */ @@ -667,11 +674,11 @@ blend_quad(struct quad_stage *qs, { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[3], dest[3], comp); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */ } break; case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ + VEC4_COPY(blend_dest[3], zero); /* A */ break; case PIPE_BLENDFACTOR_INV_SRC_COLOR: /* fall-through */ @@ -679,7 +686,7 @@ blend_quad(struct quad_stage *qs, { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */ } break; case PIPE_BLENDFACTOR_INV_DST_COLOR: @@ -687,11 +694,11 @@ blend_quad(struct quad_stage *qs, case PIPE_BLENDFACTOR_INV_DST_ALPHA: if (has_dst_alpha) { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ + VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */ } else { - VEC4_COPY(dest[3], zero); /* A */ + VEC4_COPY(blend_dest[3], zero); /* A */ } break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: @@ -700,7 +707,7 @@ blend_quad(struct quad_stage *qs, { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[3], dest[3], inv_comp); + VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp); } break; default: @@ -712,29 +719,29 @@ blend_quad(struct quad_stage *qs, */ switch (softpipe->blend->rt[blend_index].rgb_func) { case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_SUB_SAT(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ + VEC4_SUB_SAT(quadColor[0], blend_dest[0], source[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], blend_dest[1], source[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], blend_dest[2], source[2]); /* B */ break; case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */ break; default: assert(0 && "invalid rgb blend func"); @@ -745,19 +752,19 @@ blend_quad(struct quad_stage *qs, */ switch (softpipe->blend->rt[blend_index].alpha_func) { case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + VEC4_ADD_SAT(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ + VEC4_SUB_SAT(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ + VEC4_SUB_SAT(quadColor[3], blend_dest[3], source[3]); /* A */ break; case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */ break; default: assert(0 && "invalid alpha blend func"); @@ -797,7 +804,7 @@ blend_fallback(struct quad_stage *qs, unsigned cbuf; boolean write_all; - write_all = softpipe->fs->color0_writes_all_cbufs; + write_all = softpipe->fs_variant->info.color0_writes_all_cbufs; for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { @@ -810,17 +817,25 @@ blend_fallback(struct quad_stage *qs, quads[0]->input.y0); boolean has_dst_alpha = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format); - uint q, i, j, qbuf; - - qbuf = write_all ? 0 : cbuf; + uint q, i, j; for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; float (*quadColor)[4]; + float temp_quad_color[QUAD_SIZE][4]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); - quadColor = quad->output.color[qbuf]; + if (write_all) { + for (j = 0; j < QUAD_SIZE; j++) { + for (i = 0; i < 4; i++) { + temp_quad_color[i][j] = quad->output.color[0][i][j]; + } + } + quadColor = temp_quad_color; + } else { + quadColor = quad->output.color[cbuf]; + } /* get/swizzle dest colors */ diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 15f3a8fd813..a349f0d1f3c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -727,9 +727,9 @@ depth_test_quads_fallback(struct quad_stage *qs, unsigned nr) { unsigned i, pass = 0; - const struct sp_fragment_shader *fs = qs->softpipe->fs; - boolean interp_depth = !fs->info.writes_z; - boolean shader_stencil_ref = fs->info.writes_stencil; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + boolean interp_depth = !fsInfo->writes_z; + boolean shader_stencil_ref = fsInfo->writes_stencil; struct depth_data data; data.use_shader_stencil_refs = FALSE; @@ -838,7 +838,9 @@ choose_depth_test(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - boolean interp_depth = !qs->softpipe->fs->info.writes_z; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + + boolean interp_depth = !fsInfo->writes_z; boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 90f4787d599..d74d6d4914e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -74,7 +74,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) struct tgsi_exec_machine *machine = softpipe->fs_machine; /* run shader */ - return softpipe->fs->run( softpipe->fs, machine, quad ); + return softpipe->fs_variant->run( softpipe->fs_variant, machine, quad ); } @@ -140,10 +140,10 @@ shade_begin(struct quad_stage *qs) { struct softpipe_context *softpipe = qs->softpipe; - softpipe->fs->prepare( softpipe->fs, - softpipe->fs_machine, - (struct tgsi_sampler **) - softpipe->tgsi.frag_samplers_list ); + softpipe->fs_variant->prepare( softpipe->fs_variant, + softpipe->fs_machine, + (struct tgsi_sampler **) + softpipe->tgsi.frag_samplers_list ); qs->next->begin(qs->next); } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 2cfd02a22c6..0c4506ae8f4 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -30,9 +30,9 @@ #include "sp_state.h" #include "pipe/p_shader_tokens.h" + static void -sp_push_quad_first( struct softpipe_context *sp, - struct quad_stage *quad ) +insert_stage_at_head(struct softpipe_context *sp, struct quad_stage *quad) { quad->next = sp->quad.first; sp->quad.first = quad; @@ -46,24 +46,24 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - !sp->fs->info.uses_kill && - !sp->fs->info.writes_z && - !sp->fs->info.writes_stencil; + !sp->fs_variant->info.uses_kill && + !sp->fs_variant->info.writes_z && + !sp->fs_variant->info.writes_stencil; sp->quad.first = sp->quad.blend; if (early_depth_test) { - sp_push_quad_first( sp, sp->quad.shade ); - sp_push_quad_first( sp, sp->quad.depth_test ); + insert_stage_at_head( sp, sp->quad.shade ); + insert_stage_at_head( sp, sp->quad.depth_test ); } else { - sp_push_quad_first( sp, sp->quad.depth_test ); - sp_push_quad_first( sp, sp->quad.shade ); + insert_stage_at_head( sp, sp->quad.depth_test ); + insert_stage_at_head( sp, sp->quad.shade ); } -#if !DO_PSTIPPLE_IN_DRAW_MODULE +#if !DO_PSTIPPLE_IN_DRAW_MODULE && !DO_PSTIPPLE_IN_HELPER_MODULE if (sp->rasterizer->poly_stipple_enable) - sp_push_quad_first( sp, sp->quad.pstipple ); + insert_stage_at_head( sp, sp->quad.pstipple ); #endif } diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 4ae69c1c2bd..88f42572f19 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -157,7 +157,7 @@ softpipe_get_query_result(struct pipe_context *pipe, /*os_get_time is in microseconds*/ td.frequency = 1000000; td.disjoint = FALSE; - memcpy(vresult, &sq->so, + memcpy(vresult, &td, sizeof(struct pipe_query_data_timestamp_disjoint)); } break; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 1e58d27be88..960ab8c96ae 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -185,6 +185,8 @@ softpipe_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(screen, profile); default: return 0; } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0ce28f4c6ee..656d001809f 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -568,17 +568,18 @@ tri_persp_coeff(struct setup_context *setup, static void setup_fragcoord_coeff(struct setup_context *setup, uint slot) { - struct sp_fragment_shader* spfs = setup->softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; + /*X*/ - setup->coef[slot].a0[0] = spfs->pixel_center_integer ? 0.0 : 0.5; + setup->coef[slot].a0[0] = fsInfo->pixel_center_integer ? 0.0 : 0.5; setup->coef[slot].dadx[0] = 1.0; setup->coef[slot].dady[0] = 0.0; /*Y*/ setup->coef[slot].a0[1] = - (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) - + (spfs->pixel_center_integer ? 0.0 : 0.5); + (fsInfo->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) + + (fsInfo->pixel_center_integer ? 0.0 : 0.5); setup->coef[slot].dadx[1] = 0.0; - setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0; + setup->coef[slot].dady[1] = fsInfo->origin_lower_left ? -1.0 : 1.0; /*Z*/ setup->coef[slot].a0[2] = setup->posCoef.a0[2]; setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; @@ -599,7 +600,7 @@ static void setup_tri_coefficients(struct setup_context *setup) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; float v[3]; @@ -618,7 +619,7 @@ setup_tri_coefficients(struct setup_context *setup) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -632,7 +633,7 @@ setup_tri_coefficients(struct setup_context *setup) tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmid[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); tri_linear_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -642,7 +643,7 @@ setup_tri_coefficients(struct setup_context *setup) tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmid[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); tri_persp_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -654,7 +655,7 @@ setup_tri_coefficients(struct setup_context *setup) assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -939,7 +940,7 @@ setup_line_coefficients(struct setup_context *setup, const float (*v1)[4]) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; float area; @@ -974,7 +975,7 @@ setup_line_coefficients(struct setup_context *setup, /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -987,7 +988,7 @@ setup_line_coefficients(struct setup_context *setup, for (j = 0; j < NUM_CHANNELS; j++) { line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); line_linear_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -996,7 +997,7 @@ setup_line_coefficients(struct setup_context *setup, for (j = 0; j < NUM_CHANNELS; j++) { line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); line_persp_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -1008,7 +1009,7 @@ setup_line_coefficients(struct setup_context *setup, assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -1188,7 +1189,7 @@ sp_setup_point(struct setup_context *setup, const float (*v0)[4]) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const int sizeAttr = setup->softpipe->psize_slot; const float size = sizeAttr > 0 ? v0[sizeAttr][0] @@ -1232,7 +1233,7 @@ sp_setup_point(struct setup_context *setup, const_coeff(setup, &setup->posCoef, 0, 2); const_coeff(setup, &setup->posCoef, 0, 3); - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -1255,7 +1256,7 @@ sp_setup_point(struct setup_context *setup, assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -1396,7 +1397,7 @@ sp_setup_prepare(struct setup_context *setup) struct softpipe_context *sp = setup->softpipe; if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } /* Note: nr_attrs is only used for debugging (vertex printing) */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index bb19f8cff20..ec4c8cf5e4d 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -60,34 +60,45 @@ struct tgsi_exec_machine; struct vertex_info; -/** - * Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ -struct sp_fragment_shader { - struct pipe_shader_state shader; +struct sp_fragment_shader_variant_key +{ + boolean polygon_stipple; +}; + +struct sp_fragment_shader_variant +{ + const struct tgsi_token *tokens; + struct sp_fragment_shader_variant_key key; struct tgsi_shader_info info; + unsigned stipple_sampler_unit; + + /* See comments about this elsewhere */ +#if 0 struct draw_fragment_shader *draw_shader; +#endif - boolean origin_lower_left; /**< fragment shader uses lower left position origin? */ - boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */ - boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */ - void (*prepare)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct tgsi_sampler **samplers); + void (*prepare)(const struct sp_fragment_shader_variant *shader, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers); - /* Run the shader - this interface will get cleaned up in the - * future: - */ - unsigned (*run)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct quad_header *quad ); + unsigned (*run)(const struct sp_fragment_shader_variant *shader, + struct tgsi_exec_machine *machine, + struct quad_header *quad); + /* Deletes this instance of the object */ + void (*delete)(struct sp_fragment_shader_variant *shader); - void (*delete)( struct sp_fragment_shader * ); + struct sp_fragment_shader_variant *next; +}; + + +/** Subclass of pipe_shader_state */ +struct sp_fragment_shader { + struct pipe_shader_state shader; + struct sp_fragment_shader_variant *variants; + struct draw_fragment_shader *draw_shader; }; @@ -141,7 +152,7 @@ softpipe_set_framebuffer_state(struct pipe_context *, const struct pipe_framebuffer_state *); void -softpipe_update_derived( struct softpipe_context *softpipe ); +softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim); void softpipe_draw_vbo(struct pipe_context *pipe, @@ -170,4 +181,16 @@ struct vertex_info * softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key); + + +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key); + + #endif diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f9590eb0b24..fd688089a3e 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -25,8 +25,10 @@ * **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" @@ -64,7 +66,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &softpipe->fs_variant->info; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; const uint num = draw_num_shader_outputs(softpipe->draw); uint i; @@ -84,11 +86,11 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * from the vertex shader. */ vinfo->num_attribs = 0; - for (i = 0; i < spfs->info.num_inputs; i++) { + for (i = 0; i < fsInfo->num_inputs; i++) { int src; enum interp_mode interp; - switch (spfs->info.input_interpolate[i]) { + switch (fsInfo->input_interpolate[i]) { case TGSI_INTERPOLATE_CONSTANT: interp = INTERP_CONSTANT; break; @@ -103,7 +105,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) interp = INTERP_LINEAR; } - switch (spfs->info.input_semantic_name[i]) { + switch (fsInfo->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: interp = INTERP_POS; break; @@ -117,8 +119,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* this includes texcoords and varying vars */ src = draw_find_shader_output(softpipe->draw, - spfs->info.input_semantic_name[i], - spfs->info.input_semantic_index[i]); + fsInfo->input_semantic_name[i], + fsInfo->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } @@ -241,10 +243,101 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } +static void +update_fragment_shader(struct softpipe_context *softpipe, unsigned prim) +{ + struct sp_fragment_shader_variant_key key; + + memset(&key, 0, sizeof(key)); + + if (prim == PIPE_PRIM_TRIANGLES) + key.polygon_stipple = softpipe->rasterizer->poly_stipple_enable; + + if (softpipe->fs) { + softpipe->fs_variant = softpipe_find_fs_variant(softpipe, + softpipe->fs, &key); + } + else { + softpipe->fs_variant = NULL; + } + + /* This would be the logical place to pass the fragment shader + * to the draw module. However, doing this here, during state + * validation, causes problems with the 'draw' module helpers for + * wide/AA/stippled lines. + * In principle, the draw's fragment shader should be per-variant + * but that doesn't work. So we use a single draw fragment shader + * per fragment shader, not per variant. + */ +#if 0 + if (softpipe->fs_variant) { + draw_bind_fragment_shader(softpipe->draw, + softpipe->fs_variant->draw_shader); + } + else { + draw_bind_fragment_shader(softpipe->draw, NULL); + } +#endif +} + + +/** + * This should be called when the polygon stipple pattern changes. + * We create a new texture from the stipple pattern and create a new + * sampler view. + */ +static void +update_polygon_stipple_pattern(struct softpipe_context *softpipe) +{ + struct pipe_resource *tex; + struct pipe_sampler_view *view; + + tex = util_pstipple_create_stipple_texture(&softpipe->pipe, + softpipe->poly_stipple.stipple); + pipe_resource_reference(&softpipe->pstipple.texture, tex); + + view = util_pstipple_create_sampler_view(&softpipe->pipe, tex); + pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, view); +} + + +/** + * Should be called when polygon stipple is enabled/disabled or when + * the fragment shader changes. + * We add/update the fragment sampler and sampler views to sample from + * the polygon stipple texture. The texture unit that we use depends on + * the fragment shader (we need to use a unit not otherwise used by the + * shader). + */ +static void +update_polygon_stipple_enable(struct softpipe_context *softpipe, unsigned prim) +{ + if (prim == PIPE_PRIM_TRIANGLES && + softpipe->fs_variant->key.polygon_stipple) { + const unsigned unit = softpipe->fs_variant->stipple_sampler_unit; + + assert(unit >= softpipe->num_fragment_samplers); + + /* sampler state */ + softpipe->fragment_samplers[unit] = softpipe->pstipple.sampler; + + /* sampler view */ + pipe_sampler_view_reference(&softpipe->fragment_sampler_views[unit], + softpipe->pstipple.sampler_view); + + sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[unit], + softpipe->pstipple.sampler_view); + + softpipe->dirty |= SP_NEW_SAMPLER; + } +} + + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ -void softpipe_update_derived( struct softpipe_context *softpipe ) +void +softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim) { struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen); @@ -254,7 +347,24 @@ void softpipe_update_derived( struct softpipe_context *softpipe ) softpipe->tex_timestamp = sp_screen->timestamp; softpipe->dirty |= SP_NEW_TEXTURE; } - + +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->dirty & SP_NEW_STIPPLE) + /* before updating samplers! */ + update_polygon_stipple_pattern(softpipe); +#endif + + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_FS)) + update_fragment_shader(softpipe, prim); + +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_STIPPLE | + SP_NEW_FS)) + update_polygon_stipple_enable(softpipe, prim); +#endif + if (softpipe->dirty & (SP_NEW_SAMPLER | SP_NEW_TEXTURE | SP_NEW_FS | diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 60331bc4976..16023c990a7 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -373,8 +373,9 @@ softpipe_reset_sampler_variants(struct softpipe_context *softpipe) } } - for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { + for (i = 0; i <= softpipe->fs_variant->info.file_max[TGSI_FILE_SAMPLER]; i++) { if (softpipe->fragment_samplers[i]) { + assert(softpipe->fragment_sampler_views[i]->texture); softpipe->tgsi.frag_samplers_list[i] = get_sampler_variant( i, sp_sampler(softpipe->fragment_samplers[i]), diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index 3dec5de3cc4..da895270aa9 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_inlines.h" +#include "util/u_pstipple.h" #include "draw/draw_context.h" #include "draw/draw_vs.h" #include "draw/draw_gs.h" @@ -42,46 +43,105 @@ #include "tgsi/tgsi_parse.h" +/** + * Create a new fragment shader variant. + */ +static struct sp_fragment_shader_variant * +create_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key) +{ + struct sp_fragment_shader_variant *var; + struct pipe_shader_state *stipple_fs = NULL, *curfs = &fs->shader; + unsigned unit = 0; + + if (key->polygon_stipple) { + /* get new shader that implements polygon stippling */ + stipple_fs = util_pstipple_create_fragment_shader(&softpipe->pipe, + curfs, &unit); + curfs = stipple_fs; + } + + /* codegen, create variant object */ + var = softpipe_create_fs_variant_sse(softpipe, curfs); + if (!var) { + var = softpipe_create_fs_variant_exec(softpipe, curfs); + } + + if (var) { + var->key = *key; + var->tokens = tgsi_dup_tokens(curfs->tokens); + var->stipple_sampler_unit = unit; + + tgsi_scan_shader(var->tokens, &var->info); + + /* See comments elsewhere about draw fragment shaders */ +#if 0 + /* draw's fs state */ + var->draw_shader = draw_create_fragment_shader(softpipe->draw, + &fs->shader); + if (!var->draw_shader) { + var->delete(var); + FREE((void *) var->tokens); + return NULL; + } +#endif + + /* insert variant into linked list */ + var->next = fs->variants; + fs->variants = var; + } + + if (stipple_fs) { + free((void *) stipple_fs->tokens); + free(stipple_fs); + } + + return var; +} + + +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *sp, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key) +{ + struct sp_fragment_shader_variant *var; + + for (var = fs->variants; var; var = var->next) { + if (memcmp(&var->key, key, sizeof(*key)) == 0) { + /* found it */ + return var; + } + } + + return create_fs_variant(sp, fs, key); +} + + static void * softpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct softpipe_context *softpipe = softpipe_context(pipe); - struct sp_fragment_shader *state; - unsigned i; + struct sp_fragment_shader *state = CALLOC_STRUCT(sp_fragment_shader); /* debug */ if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); - /* codegen */ - state = softpipe_create_fs_sse( softpipe, templ ); - if (!state) { - state = softpipe_create_fs_exec( softpipe, templ ); - } - - if (!state) - return NULL; + /* we need to keep a local copy of the tokens */ + state->shader.tokens = tgsi_dup_tokens(templ->tokens); /* draw's fs state */ - state->draw_shader = draw_create_fragment_shader(softpipe->draw, templ); + state->draw_shader = draw_create_fragment_shader(softpipe->draw, + &state->shader); if (!state->draw_shader) { - state->delete( state ); + FREE((void *) state->shader.tokens); + FREE(state); return NULL; } - /* get/save the summary info for this shader */ - tgsi_scan_shader(templ->tokens, &state->info); - - for (i = 0; i < state->info.num_properties; ++i) { - if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN) - state->origin_lower_left = state->info.properties[i].data[0]; - else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER) - state->pixel_center_integer = state->info.properties[i].data[0]; - else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) - state->color0_writes_all_cbufs = state->info.properties[i].data[0]; - } - return state; } @@ -90,6 +150,7 @@ static void softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_fragment_shader *state = (struct sp_fragment_shader *) fs; if (softpipe->fs == fs) return; @@ -98,8 +159,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) softpipe->fs = fs; - draw_bind_fragment_shader(softpipe->draw, - (softpipe->fs ? softpipe->fs->draw_shader : NULL)); + if (fs == NULL) + softpipe->fs_variant = NULL; + + if (state) + draw_bind_fragment_shader(softpipe->draw, + state->draw_shader); + else + draw_bind_fragment_shader(softpipe->draw, NULL); softpipe->dirty |= SP_NEW_FS; } @@ -110,8 +177,9 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state = fs; + struct sp_fragment_shader_variant *var, *next_var; - assert(fs != softpipe_context(pipe)->fs); + assert(fs != softpipe->fs); if (softpipe->fs_machine->Tokens == state->shader.tokens) { /* unbind the shader from the tgsi executor if we're @@ -120,9 +188,23 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) tgsi_exec_machine_bind_shader(softpipe->fs_machine, NULL, 0, NULL); } + /* delete variants */ + for (var = state->variants; var; var = next_var) { + next_var = var->next; + + assert(var != softpipe->fs_variant); + + /* See comments elsewhere about draw fragment shaders */ +#if 0 + draw_delete_fragment_shader(softpipe->draw, var->draw_shader); +#endif + + var->delete(var); + } + draw_delete_fragment_shader(softpipe->draw, state->draw_shader); - state->delete( state ); + FREE((void *) state->shader.tokens); } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index f7309480bb9..89c6536b1f4 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2566,7 +2566,114 @@ sp_sampler_variant_destroy( struct sp_sampler_variant *samp ) FREE(samp); } +static void +sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level, + int dims[4]) +{ + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_sampler_view *view = samp->view; + const struct pipe_resource *texture = view->texture; + + /* undefined according to EXT_gpu_program */ + level += view->u.tex.first_level; + if (level > view->u.tex.last_level) + return; + + dims[0] = u_minify(texture->width0, level); + + switch(texture->target) { + case PIPE_TEXTURE_1D_ARRAY: + dims[1] = texture->array_size; + /* fallthrough */ + case PIPE_TEXTURE_1D: + case PIPE_BUFFER: + return; + case PIPE_TEXTURE_2D_ARRAY: + dims[2] = texture->array_size; + /* fallthrough */ + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_RECT: + dims[1] = u_minify(texture->height0, level); + return; + case PIPE_TEXTURE_3D: + dims[1] = u_minify(texture->height0, level); + dims[2] = u_minify(texture->depth0, level); + return; + default: + assert(!"unexpected texture target in sample_get_dims()"); + return; + } +} + +/* this function is only used for unfiltered texel gets + via the TGSI TXF opcode. */ +static void +sample_get_texels(struct tgsi_sampler *tgsi_sampler, + const int v_i[QUAD_SIZE], + const int v_j[QUAD_SIZE], + const int v_k[QUAD_SIZE], + const int lod[QUAD_SIZE], + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + union tex_tile_address addr; + const struct pipe_resource *texture = samp->view->texture; + int j, c; + const float *tx; + + addr.value = 0; + /* TODO write a better test for LOD */ + addr.bits.level = lod[0]; + switch(texture->target) { + case PIPE_TEXTURE_1D: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_2d(samp, addr, v_i[j], 0); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_1D_ARRAY: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_1d_array(samp, addr, v_i[j], v_j[j]); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_2d(samp, addr, v_i[j], v_j[j]); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_2D_ARRAY: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_2d_array(samp, addr, v_i[j], v_j[j], v_k[j]); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_3D: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_3d(samp, addr, v_i[j], v_j[j], v_k[j]); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */ + default: + assert(!"Unknown or CUBE texture type in TXF processing\n"); + break; + } +} /** * Create a sampler variant for a given set of non-orthogonal state. */ @@ -2692,5 +2799,7 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler, samp->base.get_samples = samp->sample_target; } + samp->base.get_dims = sample_get_dims; + samp->base.get_texel = sample_get_texels; return samp; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index b847cf331b3..4a60f635825 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -71,13 +71,22 @@ svga_get_vendor( struct pipe_screen *pscreen ) static const char * svga_get_name( struct pipe_screen *pscreen ) { + const char *build = "", *llvm = "", *mutex = ""; + static char name[100]; #ifdef DEBUG /* Only return internal details in the DEBUG version: */ - return "SVGA3D; build: DEBUG; mutex: " PIPE_ATOMIC; + build = "build: DEBUG;"; + mutex = "mutex: " PIPE_ATOMIC ";"; +#ifdef HAVE_LLVM + llvm = "LLVM;"; +#endif #else - return "SVGA3D; build: RELEASE; "; + build = "build: RELEASE;"; #endif + + util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm); + return name; } @@ -245,6 +254,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 0; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; } break; case PIPE_SHADER_VERTEX: @@ -286,6 +297,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 8c788f4bb0e..05de9ff7cd0 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -67,7 +67,9 @@ extern "C" { #if !defined(__HAIKU__) && !defined(__USE_MISC) +#if !defined(PIPE_OS_ANDROID) typedef unsigned int uint; +#endif typedef unsigned short ushort; #endif typedef unsigned char ubyte; diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index eea3d79e64b..b3a7b337bc6 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -99,9 +99,9 @@ #endif #endif -#if defined(__PPC__) +#if defined(__ppc__) || defined(__ppc64__) || defined(__PPC__) #define PIPE_ARCH_PPC -#if defined(__PPC64__) +#if defined(__ppc64__) || defined(__PPC64__) #define PIPE_ARCH_PPC_64 #endif #endif @@ -120,6 +120,15 @@ # define PIPE_ARCH_BIG_ENDIAN #endif +#elif defined(__APPLE__) +#include <machine/endian.h> + +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +# define PIPE_ARCH_LITTLE_ENDIAN +#elif __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN +# define PIPE_ARCH_BIG_ENDIAN +#endif + #else #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) @@ -145,6 +154,14 @@ #define PIPE_OS_UNIX #endif +/* + * Android defines __linux__ so PIPE_OS_LINUX and PIPE_OS_UNIX will also be + * defined. + */ +#if defined(ANDROID) +#define PIPE_OS_ANDROID +#endif + #if defined(__FreeBSD__) #define PIPE_OS_FREEBSD #define PIPE_OS_BSD diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 3f6d90d1bf4..da3ee87515f 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -49,6 +49,7 @@ struct pipe_index_buffer; struct pipe_query; struct pipe_poly_stipple; struct pipe_rasterizer_state; +struct pipe_resolve_info; struct pipe_resource; struct pipe_sampler_state; struct pipe_sampler_view; @@ -268,13 +269,10 @@ struct pipe_context { /** * Resolve a multisampled resource into a non-multisampled one. - * Source and destination must have the same size and same format. + * Source and destination must be of the same format. */ void (*resource_resolve)(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned dst_layer, - struct pipe_resource *src, - unsigned src_layer); + const struct pipe_resolve_info *info); /*@}*/ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 79b89699566..795de1fbf62 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -99,6 +99,9 @@ enum pipe_error { #define PIPE_MASK_B 0x4 #define PIPE_MASK_A 0x8 #define PIPE_MASK_RGBA 0xf +#define PIPE_MASK_Z 0x10 +#define PIPE_MASK_S 0x20 +#define PIPE_MASK_ZS 0x30 /** @@ -468,6 +471,7 @@ enum pipe_cap { PIPE_CAP_MIXED_COLORBUFFER_FORMATS = 46, PIPE_CAP_SEAMLESS_CUBE_MAP = 47, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE = 48, + PIPE_CAP_SCALED_RESOLVE = 49 }; /* Shader caps not specific to any single stage */ @@ -491,6 +495,7 @@ enum pipe_shader_cap PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR = 14, PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15, PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */ + PIPE_SHADER_CAP_INTEGERS = 17 }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index d442c15c02a..840b3ee0e37 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -483,6 +483,34 @@ struct pipe_draw_info }; +/** + * Information to describe a resource_resolve call. + */ +struct pipe_resolve_info +{ + struct { + struct pipe_resource *res; + unsigned level; + unsigned layer; + int x0; /**< always left */ + int y0; /**< always top */ + int x1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */ + int y1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */ + } dst; + + struct { + struct pipe_resource *res; + unsigned layer; + int x0; + int y0; + int x1; /**< may be < x0 only if PIPE_CAP_SCALED_RESOLVE is supported */ + int y1; /**< may be < y1 even if PIPE_CAP_SCALED_RESOLVE not supported */ + } src; + + unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */ +}; + + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_video_decoder.h b/src/gallium/include/pipe/p_video_decoder.h index f063d8f3a1b..2aa4001c179 100644 --- a/src/gallium/include/pipe/p_video_decoder.h +++ b/src/gallium/include/pipe/p_video_decoder.h @@ -59,75 +59,74 @@ struct pipe_video_decoder void (*destroy)(struct pipe_video_decoder *decoder); /** - * Creates a buffer as decoding input + * Creates a decoder buffer */ - struct pipe_video_decode_buffer *(*create_buffer)(struct pipe_video_decoder *decoder); + void *(*create_buffer)(struct pipe_video_decoder *decoder); /** - * flush decoder buffer to video hardware + * Destroys a decoder buffer */ - void (*flush_buffer)(struct pipe_video_decode_buffer *decbuf, - unsigned num_ycbcr_blocks[3], - struct pipe_video_buffer *ref_frames[2], - struct pipe_video_buffer *dst); -}; - -/** - * input buffer for a decoder - */ -struct pipe_video_decode_buffer -{ - struct pipe_video_decoder *decoder; + void (*destroy_buffer)(struct pipe_video_decoder *decoder, void *buffer); /** - * destroy this decode buffer + * set the current decoder buffer */ - void (*destroy)(struct pipe_video_decode_buffer *decbuf); + void (*set_decode_buffer)(struct pipe_video_decoder *decoder, void *buffer); /** - * map the input buffer into memory before starting decoding + * set the picture parameters for the next frame + * only used for bitstream decoding */ - void (*begin_frame)(struct pipe_video_decode_buffer *decbuf); + void (*set_picture_parameters)(struct pipe_video_decoder *decoder, + struct pipe_picture_desc *picture); /** * set the quantification matrixes */ - void (*set_quant_matrix)(struct pipe_video_decode_buffer *decbuf, - const uint8_t intra_matrix[64], - const uint8_t non_intra_matrix[64]); + void (*set_quant_matrix)(struct pipe_video_decoder *decoder, + const struct pipe_quant_matrix *matrix); /** - * get the pointer where to put the ycbcr blocks of a component + * set target where video data is decoded to */ - struct pipe_ycbcr_block *(*get_ycbcr_stream)(struct pipe_video_decode_buffer *, int component); + void (*set_decode_target)(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *target); /** - * get the pointer where to put the ycbcr dct block data of a component + * set reference frames for motion compensation */ - short *(*get_ycbcr_buffer)(struct pipe_video_decode_buffer *, int component); + void (*set_reference_frames)(struct pipe_video_decoder *decoder, + struct pipe_video_buffer **ref_frames, + unsigned num_ref_frames); /** - * get the stride of the mv buffer + * start decoding of a new frame */ - unsigned (*get_mv_stream_stride)(struct pipe_video_decode_buffer *decbuf); + void (*begin_frame)(struct pipe_video_decoder *decoder); /** - * get the pointer where to put the motion vectors of a ref frame + * decode a macroblock */ - struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decode_buffer *decbuf, int ref_frame); + void (*decode_macroblock)(struct pipe_video_decoder *decoder, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks); /** * decode a bitstream */ - void (*decode_bitstream)(struct pipe_video_decode_buffer *decbuf, - unsigned num_bytes, const void *data, - struct pipe_picture_desc *picture, - unsigned num_ycbcr_blocks[3]); + void (*decode_bitstream)(struct pipe_video_decoder *decoder, + unsigned num_bytes, const void *data); + + /** + * end decoding of the current frame + */ + void (*end_frame)(struct pipe_video_decoder *decoder); /** - * unmap decoder buffer before flushing + * flush any outstanding command buffers to the hardware + * should be called before a video_buffer is acessed by the state tracker again */ - void (*end_frame)(struct pipe_video_decode_buffer *decbuf); + void (*flush)(struct pipe_video_decoder *decoder); }; /** diff --git a/src/gallium/include/pipe/p_video_enums.h b/src/gallium/include/pipe/p_video_enums.h index 492ab84e33f..ea25a25883d 100644 --- a/src/gallium/include/pipe/p_video_enums.h +++ b/src/gallium/include/pipe/p_video_enums.h @@ -51,6 +51,7 @@ enum pipe_video_cap PIPE_VIDEO_CAP_NPOT_TEXTURES = 1, PIPE_VIDEO_CAP_MAX_WIDTH = 2, PIPE_VIDEO_CAP_MAX_HEIGHT = 3, + PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED = 4 }; enum pipe_video_codec diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 2a64ffb5601..f655ed411f4 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -43,65 +43,82 @@ struct pipe_video_rect unsigned x, y, w, h; }; -enum pipe_mpeg12_picture_type +/* + * see table 6-12 in the spec + */ +enum pipe_mpeg12_picture_coding_type { - PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP, - PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM, - PIPE_MPEG12_PICTURE_TYPE_FRAME + PIPE_MPEG12_PICTURE_CODING_TYPE_I = 0x01, + PIPE_MPEG12_PICTURE_CODING_TYPE_P = 0x02, + PIPE_MPEG12_PICTURE_CODING_TYPE_B = 0x03, + PIPE_MPEG12_PICTURE_CODING_TYPE_D = 0x04 }; -enum pipe_mpeg12_dct_intra +/* + * see table 6-14 in the spec + */ +enum pipe_mpeg12_picture_structure { - PIPE_MPEG12_DCT_DELTA = 0, - PIPE_MPEG12_DCT_INTRA = 1 + PIPE_MPEG12_PICTURE_STRUCTURE_RESERVED = 0x00, + PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP = 0x01, + PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_BOTTOM = 0x02, + PIPE_MPEG12_PICTURE_STRUCTURE_FRAME = 0x03 }; -enum pipe_mpeg12_dct_type +/* + * flags for macroblock_type, see section 6.3.17.1 in the spec + */ +enum pipe_mpeg12_macroblock_type { - PIPE_MPEG12_DCT_TYPE_FRAME = 0, - PIPE_MPEG12_DCT_TYPE_FIELD = 1 + PIPE_MPEG12_MB_TYPE_QUANT = 0x01, + PIPE_MPEG12_MB_TYPE_MOTION_FORWARD = 0x02, + PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD = 0x04, + PIPE_MPEG12_MB_TYPE_PATTERN = 0x08, + PIPE_MPEG12_MB_TYPE_INTRA = 0x10 }; -enum pipe_video_field_select +/* + * flags for motion_type, see table 6-17 and 6-18 in the spec + */ +enum pipe_mpeg12_motion_type { - PIPE_VIDEO_FRAME = 0, - PIPE_VIDEO_TOP_FIELD = 1, - PIPE_VIDEO_BOTTOM_FIELD = 3, - - /* TODO - PIPE_VIDEO_DUALPRIME - PIPE_VIDEO_16x8 - */ + PIPE_MPEG12_MO_TYPE_RESERVED = 0x00, + PIPE_MPEG12_MO_TYPE_FIELD = 0x01, + PIPE_MPEG12_MO_TYPE_FRAME = 0x02, + PIPE_MPEG12_MO_TYPE_16x8 = 0x02, + PIPE_MPEG12_MO_TYPE_DUAL_PRIME = 0x03 }; -enum pipe_video_mv_weight +/* + * see section 6.3.17.1 and table 6-19 in the spec + */ +enum pipe_mpeg12_dct_type { - PIPE_VIDEO_MV_WEIGHT_MIN = 0, - PIPE_VIDEO_MV_WEIGHT_HALF = 128, - PIPE_VIDEO_MV_WEIGHT_MAX = 256 + PIPE_MPEG12_DCT_TYPE_FRAME = 0, + PIPE_MPEG12_DCT_TYPE_FIELD = 1 }; -/* bitfields because this is used as a vertex buffer element */ -struct pipe_motionvector +enum pipe_mpeg12_field_select { - struct { - short x, y; - ushort field_select; /**< enum pipe_video_field_select */ - ushort weight; /**< enum pipe_video_mv_weight */ - } top, bottom; + PIPE_MPEG12_FS_FIRST_FORWARD = 0x01, + PIPE_MPEG12_FS_FIRST_BACKWARD = 0x02, + PIPE_MPEG12_FS_SECOND_FORWARD = 0x04, + PIPE_MPEG12_FS_SECOND_BACKWARD = 0x08 }; -/* bitfields because this is used as a vertex buffer element */ -struct pipe_ycbcr_block +struct pipe_picture_desc { - ubyte x, y; - ubyte intra; /**< enum pipe_mpeg12_dct_intra */ - ubyte coding; /**< enum pipe_mpeg12_dct_type */ + enum pipe_video_profile profile; }; -struct pipe_picture_desc +struct pipe_quant_matrix { - enum pipe_video_profile profile; + enum pipe_video_codec codec; +}; + +struct pipe_macroblock +{ + enum pipe_video_codec codec; }; struct pipe_mpeg12_picture_desc @@ -115,9 +132,58 @@ struct pipe_mpeg12_picture_desc unsigned alternate_scan; unsigned intra_vlc_format; unsigned concealment_motion_vectors; + unsigned intra_dc_precision; unsigned f_code[2][2]; }; +struct pipe_mpeg12_quant_matrix +{ + struct pipe_quant_matrix base; + + const uint8_t *intra_matrix; + const uint8_t *non_intra_matrix; +}; + +struct pipe_mpeg12_macroblock +{ + struct pipe_macroblock base; + + /* see section 6.3.17 in the spec */ + unsigned short x, y; + + /* see section 6.3.17.1 in the spec */ + unsigned char macroblock_type; + + union { + struct { + /* see table 6-17 in the spec */ + unsigned int frame_motion_type:2; + + /* see table 6-18 in the spec */ + unsigned int field_motion_type:2; + + /* see table 6-19 in the spec */ + unsigned int dct_type:1; + } bits; + unsigned int value; + } macroblock_modes; + + /* see section 6.3.17.2 in the spec */ + unsigned char motion_vertical_field_select; + + /* see Table 7-7 in the spec */ + short PMV[2][2][2]; + + /* see figure 6.10-12 in the spec */ + unsigned short coded_block_pattern; + + /* see figure 6.10-12 in the spec */ + short *blocks; + + /* Number of skipped macroblocks after this macroblock */ + unsigned short num_skipped_macroblocks; +}; + #ifdef __cplusplus } #endif diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h index 12f2aaddc91..0a31cf10a34 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h @@ -352,9 +352,9 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe> { for(unsigned i = 0; i < count; ++i) { - if(constbufs[i] != constant_buffers[s][i].p) + if(constbufs[i] != constant_buffers[s][start + i].p) { - constant_buffers[s][i] = constbufs[i]; + constant_buffers[s][start + i] = constbufs[i]; if(s < caps.stages && start + i < caps.constant_buffers[s]) pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL); } @@ -391,11 +391,12 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe> { samplers[s][start + i] = samps[i]; sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler; + last_different = i; } if(last_different >= 0) { num_samplers[s] = std::max(num_samplers[s], start + last_different + 1); - update_flags |= (UPDATE_SAMPLERS_SHIFT + s); + update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s); } } } @@ -1726,9 +1727,26 @@ changed: SYNCHRONIZED; GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource; - unsigned dst_layer = d3d11_subresource_to_face(dst->resource, dst_subresource); - unsigned src_layer = d3d11_subresource_to_face(src->resource, src_subresource); - pipe->resource_resolve(pipe, dst->resource, dst_layer, src->resource, src_layer); + struct pipe_resolve_info info; + + info.dst.res = dst->resource; + info.src.res = src->resource; + info.dst.level = 0; + info.dst.layer = d3d11_subresource_to_face(dst->resource, dst_subresource); + info.src.layer = d3d11_subresource_to_face(src->resource, src_subresource); + + info.src.x0 = 0; + info.src.x1 = info.src.res->width0; + info.src.y0 = 0; + info.src.y1 = info.src.res->height0; + info.dst.x0 = 0; + info.dst.x1 = info.dst.res->width0; + info.dst.y0 = 0; + info.dst.y1 = info.dst.res->height0; + + info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS; + + pipe->resource_resolve(pipe, &info); } #if API >= 11 diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index e6612b1911d..bc8dacba1b7 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -48,6 +48,16 @@ dri_init_extensions(struct dri_context *ctx) driInitExtensions(st->ctx, NULL, GL_FALSE); } +static void +dri_pp_query(struct dri_context *ctx) +{ + unsigned int i; + + for (i = 0; i < PP_FILTERS; i++) { + ctx->pp_enabled[i] = driQueryOptioni(&ctx->optionCache, pp_filters[i].name); + } +} + GLboolean dri_create_context(gl_api api, const struct gl_config * visual, __DRIcontext * cPriv, void *sharedContextPrivate) @@ -105,6 +115,11 @@ dri_create_context(gl_api api, const struct gl_config * visual, if (api == API_OPENGL) dri_init_extensions(ctx); + // Context successfully created. See if post-processing is requested. + dri_pp_query(ctx); + + ctx->pp = pp_init(screen->base.screen, ctx->pp_enabled); + return GL_TRUE; fail: @@ -134,6 +149,8 @@ dri_destroy_context(__DRIcontext * cPriv) ctx->st->flush(ctx->st, 0, NULL); ctx->st->destroy(ctx->st); + if (ctx->pp) pp_free(ctx->pp); + FREE(ctx); } @@ -187,6 +204,13 @@ dri_make_current(__DRIcontext * cPriv, ctx->stapi->make_current(ctx->stapi, ctx->st, &draw->base, &read->base); + // This is ok to call here. If they are already init, it's a no-op. + if (draw->textures[ST_ATTACHMENT_BACK_LEFT] && draw->textures[ST_ATTACHMENT_DEPTH_STENCIL] + && ctx->pp) + pp_init_fbos(ctx->pp, draw->textures[ST_ATTACHMENT_BACK_LEFT]->width0, + draw->textures[ST_ATTACHMENT_BACK_LEFT]->height0, + draw->textures[ST_ATTACHMENT_DEPTH_STENCIL]); + return GL_TRUE; } diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h index 35105e861f9..cfc8e3345e5 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.h +++ b/src/gallium/state_trackers/dri/common/dri_context.h @@ -34,6 +34,7 @@ #include "pipe/p_compiler.h" #include "dri_wrapper.h" +#include "postprocess/filters.h" struct pipe_context; struct pipe_fence; @@ -61,6 +62,8 @@ struct dri_context /* gallium */ struct st_api *stapi; struct st_context_iface *st; + struct pp_queue_t *pp; + unsigned int pp_enabled[PP_FILTERS]; }; static INLINE struct dri_context * diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index 5931df993b0..dcb6fdf8f3c 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -42,15 +42,25 @@ #include "util/u_debug.h" PUBLIC const char __driConfigOptions[] = - DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE - DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) - DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) - DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY -/* DRI_CONF_FORCE_S3TC_ENABLE(false) */ - DRI_CONF_ALLOW_LARGE_TEXTURES(1) - DRI_CONF_SECTION_END DRI_CONF_END; - -static const uint __driNConfigOptions = 3; + DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_SECTION_END + + DRI_CONF_SECTION_QUALITY +/* DRI_CONF_FORCE_S3TC_ENABLE(false) */ + DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_PP_CELSHADE(0) + DRI_CONF_PP_NORED(0) + DRI_CONF_PP_NOGREEN(0) + DRI_CONF_PP_NOBLUE(0) + DRI_CONF_PP_JIMENEZMLAA(0, 0, 32) + DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32) + DRI_CONF_SECTION_END + DRI_CONF_END; + +static const uint __driNConfigOptions = 9; static const __DRIconfig ** dri_fill_in_modes(struct dri_screen *screen, diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index fe4ddb312be..6cf237577ec 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -44,8 +44,19 @@ * DRI2 flush extension. */ static void -dri2_flush_drawable(__DRIdrawable *draw) +dri2_flush_drawable(__DRIdrawable *dPriv) { + struct dri_context *ctx = dri_get_current(dPriv->driScreenPriv); + struct dri_drawable *drawable = dri_drawable(dPriv); + + struct pipe_resource *ptex = drawable->textures[ST_ATTACHMENT_BACK_LEFT]; + + if (ctx) { + if (ptex && ctx->pp && drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]) + pp_run(ctx->pp, ptex, ptex, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]); + + ctx->st->flush(ctx->st, 0, NULL); + } } static void @@ -266,7 +277,6 @@ dri2_allocate_buffer(__DRIscreen *sPriv, struct dri_screen *screen = dri_screen(sPriv); struct dri2_buffer *buffer; struct pipe_resource templ; - enum st_attachment_type statt; enum pipe_format pf; unsigned bind = 0; struct winsys_handle whandle; @@ -274,22 +284,16 @@ dri2_allocate_buffer(__DRIscreen *sPriv, switch (attachment) { case __DRI_BUFFER_FRONT_LEFT: case __DRI_BUFFER_FAKE_FRONT_LEFT: - statt = ST_ATTACHMENT_FRONT_LEFT; bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case __DRI_BUFFER_BACK_LEFT: - statt = ST_ATTACHMENT_BACK_LEFT; bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case __DRI_BUFFER_DEPTH: case __DRI_BUFFER_DEPTH_STENCIL: case __DRI_BUFFER_STENCIL: - statt = ST_ATTACHMENT_DEPTH_STENCIL; bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ break; - default: - statt = ST_ATTACHMENT_INVALID; - break; } switch (format) { @@ -662,20 +666,6 @@ fail: } static boolean -dri2_create_context(gl_api api, const struct gl_config * visual, - __DRIcontext * cPriv, void *sharedContextPrivate) -{ - struct dri_context *ctx = NULL; - - if (!dri_create_context(api, visual, cPriv, sharedContextPrivate)) - return FALSE; - - ctx = cPriv->driverPrivate; - - return TRUE; -} - -static boolean dri2_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, const struct gl_config * visual, boolean isPixmap) @@ -702,7 +692,7 @@ const struct __DriverAPIRec driDriverAPI = { .InitScreen = NULL, .InitScreen2 = dri2_init_screen, .DestroyScreen = dri_destroy_screen, - .CreateContext = dri2_create_context, + .CreateContext = dri_create_context, .DestroyContext = dri_destroy_context, .CreateBuffer = dri2_create_buffer, .DestroyBuffer = dri_destroy_buffer, diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index a1879a8f46a..082df55e8ea 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -136,6 +136,9 @@ drisw_swap_buffers(__DRIdrawable *dPriv) ptex = drawable->textures[ST_ATTACHMENT_BACK_LEFT]; if (ptex) { + if (ctx->pp && drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]) + pp_run(ctx->pp, ptex, ptex, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]); + ctx->st->flush(ctx->st, ST_FLUSH_FRONT, NULL); drisw_copy_to_front(dPriv, ptex); diff --git a/src/gallium/state_trackers/egl/Android.mk b/src/gallium/state_trackers/egl/Android.mk new file mode 100644 index 00000000000..e459bd4655e --- /dev/null +++ b/src/gallium/state_trackers/egl/Android.mk @@ -0,0 +1,54 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +common_SOURCES := \ + common/egl_g3d.c \ + common/egl_g3d_api.c \ + common/egl_g3d_image.c \ + common/egl_g3d_st.c \ + common/egl_g3d_sync.c \ + common/native_helper.c + +android_SOURCES := \ + android/native_android.cpp + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(common_SOURCES) \ + $(android_SOURCES) + +LOCAL_CFLAGS := -DHAVE_ANDROID_BACKEND + +LOCAL_C_INCLUDES := \ + $(GALLIUM_TOP)/state_trackers/egl \ + $(GALLIUM_TOP)/winsys/sw \ + $(MESA_TOP)/src/egl/main \ + $(DRM_GRALLOC_TOP) + +LOCAL_MODULE := libmesa_st_egl + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp new file mode 100644 index 00000000000..211d6a2aeef --- /dev/null +++ b/src/gallium/state_trackers/egl/android/native_android.cpp @@ -0,0 +1,835 @@ +/* + * Mesa 3-D graphics library + * Version: 7.12 + * + * Copyright (C) 2010-2011 Chia-I Wu <[email protected]> + * Copyright (C) 2010-2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#define LOG_TAG "EGL-GALLIUM" +#include <cutils/log.h> +#include <cutils/properties.h> +#include <hardware/gralloc.h> +#include <utils/Errors.h> +#include <ui/android_native_buffer.h> + +extern "C" { +#include "egllog.h" +} + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_box.h" +#include "common/native.h" +#include "common/native_helper.h" +#include "android/android_sw_winsys.h" +#include "state_tracker/drm_driver.h" + +struct android_config; + +struct android_display { + struct native_display base; + + boolean use_drm; + const struct native_event_handler *event_handler; + struct android_config *configs; + int num_configs; +}; + +struct android_surface { + struct native_surface base; + + struct android_display *adpy; + android_native_window_t *win; + + /* staging color buffer for when buffer preserving is enabled */ + struct pipe_resource *color_res; + + uint stamp; + android_native_buffer_t *buf; + struct pipe_resource *buf_res; + + /* cache the current back buffers */ + struct { + int width; + int height; + int format; + } cache_key; + void *cache_handles[2]; + struct pipe_resource *cache_resources[2]; +}; + +struct android_config { + struct native_config base; +}; + +static INLINE struct android_display * +android_display(const struct native_display *ndpy) +{ + return (struct android_display *) ndpy; +} + +static INLINE struct android_surface * +android_surface(const struct native_surface *nsurf) +{ + return (struct android_surface *) nsurf; +} + +static INLINE struct android_config * +android_config(const struct native_config *nconf) +{ + return (struct android_config *) nconf; +} + +namespace android { + +static enum pipe_format +get_pipe_format(int native) +{ + enum pipe_format fmt; + + switch (native) { + case HAL_PIXEL_FORMAT_RGBA_8888: + fmt = PIPE_FORMAT_R8G8B8A8_UNORM; + break; + case HAL_PIXEL_FORMAT_RGBX_8888: + fmt = PIPE_FORMAT_R8G8B8X8_UNORM; + break; + case HAL_PIXEL_FORMAT_RGB_888: + fmt = PIPE_FORMAT_R8G8B8_UNORM; + break; + case HAL_PIXEL_FORMAT_RGB_565: + fmt = PIPE_FORMAT_B5G6R5_UNORM; + break; + case HAL_PIXEL_FORMAT_BGRA_8888: + fmt = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + case HAL_PIXEL_FORMAT_RGBA_5551: + /* fmt = PIPE_FORMAT_A1B5G5R5_UNORM; */ + case HAL_PIXEL_FORMAT_RGBA_4444: + /* fmt = PIPE_FORMAT_A4B4G4R4_UNORM; */ + default: + LOGE("unsupported native format 0x%x", native); + fmt = PIPE_FORMAT_NONE; + break; + } + + return fmt; +} + +#include <gralloc_drm_handle.h> +static int +get_handle_name(buffer_handle_t handle) +{ + struct gralloc_drm_handle_t *dh; + + /* check that the buffer is allocated by drm_gralloc and cast */ + dh = gralloc_drm_handle(handle); + + return (dh) ? dh->name : 0; +} + +/** + * Import an android_native_buffer_t allocated by the server. + */ +static struct pipe_resource * +import_buffer(struct android_display *adpy, const struct pipe_resource *templ, + struct android_native_buffer_t *abuf) +{ + struct pipe_screen *screen = adpy->base.screen; + struct pipe_resource *res; + + if (templ->bind & PIPE_BIND_RENDER_TARGET) { + if (!screen->is_format_supported(screen, templ->format, + templ->target, 0, PIPE_BIND_RENDER_TARGET)) + LOGW("importing unsupported buffer as render target"); + } + if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { + if (!screen->is_format_supported(screen, templ->format, + templ->target, 0, PIPE_BIND_SAMPLER_VIEW)) + LOGW("importing unsupported buffer as sampler view"); + } + + if (adpy->use_drm) { + struct winsys_handle handle; + + memset(&handle, 0, sizeof(handle)); + handle.type = DRM_API_HANDLE_TYPE_SHARED; + /* for DRM, we need the GEM name */ + handle.handle = get_handle_name(abuf->handle); + if (!handle.handle) { + LOGE("unable to import invalid buffer %p", abuf); + return NULL; + } + + handle.stride = + abuf->stride * util_format_get_blocksize(templ->format); + + res = screen->resource_from_handle(screen, templ, &handle); + } + else { + struct android_winsys_handle handle; + + memset(&handle, 0, sizeof(handle)); + handle.handle = abuf->handle; + handle.stride = + abuf->stride * util_format_get_blocksize(templ->format); + + res = screen->resource_from_handle(screen, + templ, (struct winsys_handle *) &handle); + } + + if (!res) + LOGE("failed to import buffer %p", abuf); + + return res; +} + +static void +android_surface_clear_cache(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + int i; + + for (i = 0; i < Elements(asurf->cache_handles); i++) { + asurf->cache_handles[i] = NULL; + pipe_resource_reference(&asurf->cache_resources[i], NULL); + } + + memset(&asurf->cache_key, 0, sizeof(asurf->cache_key)); +} + +static struct pipe_resource * +android_surface_add_cache(struct native_surface *nsurf, + struct android_native_buffer_t *abuf) +{ + struct android_surface *asurf = android_surface(nsurf); + void *handle; + int idx; + + /* how about abuf->usage? */ + if (asurf->cache_key.width != abuf->width || + asurf->cache_key.height != abuf->height || + asurf->cache_key.format != abuf->format) + android_surface_clear_cache(&asurf->base); + + if (asurf->adpy->use_drm) + handle = (void *) get_handle_name(abuf->handle); + else + handle = (void *) abuf->handle; + /* NULL is invalid */ + if (!handle) { + LOGE("invalid buffer native buffer %p", abuf); + return NULL; + } + + /* find the slot to use */ + for (idx = 0; idx < Elements(asurf->cache_handles); idx++) { + if (asurf->cache_handles[idx] == handle || !asurf->cache_handles[idx]) + break; + } + if (idx == Elements(asurf->cache_handles)) { + LOGW("cache full: buf %p, width %d, height %d, format %d, usage 0x%x", + abuf, abuf->width, abuf->height, abuf->format, abuf->usage); + android_surface_clear_cache(&asurf->base); + idx = 0; + } + + if (idx == 0) { + asurf->cache_key.width = abuf->width; + asurf->cache_key.height = abuf->height; + asurf->cache_key.format = abuf->format; + } + + if (!asurf->cache_handles[idx]) { + struct pipe_resource templ; + + assert(!asurf->cache_resources[idx]); + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.format = get_pipe_format(asurf->buf->format); + templ.bind = PIPE_BIND_RENDER_TARGET; + if (!asurf->adpy->use_drm) { + templ.bind |= PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_TRANSFER_READ; + } + + templ.width0 = asurf->buf->width; + templ.height0 = asurf->buf->height; + templ.depth0 = 1; + templ.array_size = 1; + + if (templ.format != PIPE_FORMAT_NONE) { + asurf->cache_resources[idx] = + import_buffer(asurf->adpy, &templ, asurf->buf); + } + else { + asurf->cache_resources[idx] = NULL; + } + + asurf->cache_handles[idx] = handle; + } + + return asurf->cache_resources[idx]; +} + +/** + * Dequeue the next back buffer for rendering. + */ +static boolean +android_surface_dequeue_buffer(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + struct pipe_resource *res; + + if (asurf->win->dequeueBuffer(asurf->win, &asurf->buf) != NO_ERROR) { + LOGE("failed to dequeue window %p", asurf->win); + return FALSE; + } + + asurf->buf->common.incRef(&asurf->buf->common); + asurf->win->lockBuffer(asurf->win, asurf->buf); + + res = android_surface_add_cache(&asurf->base, asurf->buf); + if (!res) + return FALSE; + + pipe_resource_reference(&asurf->buf_res, res); + + return TRUE; +} + +/** + * Enqueue the back buffer. This will make it the next front buffer. + */ +static boolean +android_surface_enqueue_buffer(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + + pipe_resource_reference(&asurf->buf_res, NULL); + + asurf->win->queueBuffer(asurf->win, asurf->buf); + + asurf->buf->common.decRef(&asurf->buf->common); + asurf->buf = NULL; + + return TRUE; +} + +static boolean +android_surface_swap_buffers(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + struct android_display *adpy = asurf->adpy; + + if (!asurf->buf) + return TRUE; + + android_surface_enqueue_buffer(&asurf->base); + + asurf->stamp++; + adpy->event_handler->invalid_surface(&adpy->base, + &asurf->base, asurf->stamp); + + return TRUE; +} + +static void +copy_resources(struct native_display *ndpy, + struct pipe_resource *src, + struct pipe_resource *dst) +{ + struct pipe_context *pipe; + struct pipe_box box; + + pipe = ndpy_get_copy_context(ndpy); + if (!pipe) + return; + + u_box_origin_2d(src->width0, src->height0, &box); + pipe->resource_copy_region(pipe, dst, 0, 0, 0, 0, src, 0, &box); + pipe->flush(pipe, NULL); +} + +static boolean +android_surface_present(struct native_surface *nsurf, + enum native_attachment natt, + boolean preserve, + uint swap_interval) +{ + struct android_surface *asurf = android_surface(nsurf); + struct android_display *adpy = asurf->adpy; + boolean ret; + + if (swap_interval || natt != NATIVE_ATTACHMENT_BACK_LEFT) + return FALSE; + + /* we always render to color_res first when it exists */ + if (asurf->color_res) { + copy_resources(&adpy->base, asurf->color_res, asurf->buf_res); + if (!preserve) + pipe_resource_reference(&asurf->color_res, NULL); + } + else if (preserve) { + struct pipe_resource templ; + + memset(&templ, 0, sizeof(templ)); + templ.target = asurf->buf_res->target; + templ.format = asurf->buf_res->format; + templ.bind = PIPE_BIND_RENDER_TARGET; + templ.width0 = asurf->buf_res->width0; + templ.height0 = asurf->buf_res->height0; + templ.depth0 = asurf->buf_res->depth0; + templ.array_size = asurf->buf_res->array_size; + + asurf->color_res = + adpy->base.screen->resource_create(adpy->base.screen, &templ); + if (!asurf->color_res) + return FALSE; + + /* preserve the contents */ + copy_resources(&adpy->base, asurf->buf_res, asurf->color_res); + } + + return android_surface_swap_buffers(nsurf); +} + +static boolean +android_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_resource **textures, + int *width, int *height) +{ + struct android_surface *asurf = android_surface(nsurf); + struct winsys_handle handle; + + if (!asurf->buf) { + if (!android_surface_dequeue_buffer(&asurf->base)) + return FALSE; + + /* color_res must be compatible with buf_res */ + if (asurf->color_res && + (asurf->color_res->format != asurf->buf_res->format || + asurf->color_res->width0 != asurf->buf_res->width0 || + asurf->color_res->height0 != asurf->buf_res->height0)) + pipe_resource_reference(&asurf->color_res, NULL); + } + + if (textures) { + /* we have access to only the back buffer */ + const enum native_attachment att = NATIVE_ATTACHMENT_BACK_LEFT; + + if (native_attachment_mask_test(attachment_mask, att)) { + textures[att] = NULL; + pipe_resource_reference(&textures[att], + (asurf->color_res) ? asurf->color_res : asurf->buf_res); + } + } + + if (seq_num) + *seq_num = asurf->stamp; + if (width) + *width = asurf->buf->width; + if (height) + *height = asurf->buf->height; + + return TRUE; +} + +static void +android_surface_wait(struct native_surface *nsurf) +{ +} + +static void +android_surface_destroy(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + int i; + + pipe_resource_reference(&asurf->color_res, NULL); + + if (asurf->buf) + android_surface_enqueue_buffer(&asurf->base); + + android_surface_clear_cache(&asurf->base); + + asurf->win->common.decRef(&asurf->win->common); + + FREE(asurf); +} + +static struct native_surface * +android_display_create_window_surface(struct native_display *ndpy, + EGLNativeWindowType win, + const struct native_config *nconf) +{ + struct android_display *adpy = android_display(ndpy); + struct android_config *aconf = android_config(nconf); + struct android_surface *asurf; + enum pipe_format format; + int val; + + if (win->common.magic != ANDROID_NATIVE_WINDOW_MAGIC) { + LOGE("invalid native window with magic 0x%x", win->common.magic); + return NULL; + } + if (win->query(win, NATIVE_WINDOW_FORMAT, &val)) { + LOGE("failed to query native window format"); + return NULL; + } + format = get_pipe_format(val); + if (format != nconf->color_format) { + LOGW("native window format 0x%x != config format 0x%x", + format, nconf->color_format); + if (!adpy->base.screen->is_format_supported(adpy->base.screen, + format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) { + LOGE("and the native window cannot be used as a render target"); + return NULL; + } + } + + asurf = CALLOC_STRUCT(android_surface); + if (!asurf) + return NULL; + + asurf->adpy = adpy; + asurf->win = win; + asurf->win->common.incRef(&asurf->win->common); + + /* request buffers that are for CPU access */ + if (!adpy->use_drm) { + native_window_set_usage(asurf->win, + GRALLOC_USAGE_SW_READ_OFTEN | GRALLOC_USAGE_SW_WRITE_OFTEN); + } + + asurf->base.destroy = android_surface_destroy; + asurf->base.present = android_surface_present; + asurf->base.validate = android_surface_validate; + asurf->base.wait = android_surface_wait; + + return &asurf->base; +} + +static boolean +android_display_init_configs(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + const int native_formats[] = { + HAL_PIXEL_FORMAT_RGBA_8888, + HAL_PIXEL_FORMAT_RGBX_8888, + HAL_PIXEL_FORMAT_RGB_888, + HAL_PIXEL_FORMAT_RGB_565, + HAL_PIXEL_FORMAT_BGRA_8888, + }; + int i; + + adpy->configs = (struct android_config *) + CALLOC(Elements(native_formats), sizeof(*adpy->configs)); + if (!adpy->configs) + return FALSE; + + for (i = 0; i < Elements(native_formats); i++) { + enum pipe_format color_format; + struct android_config *aconf; + + color_format = get_pipe_format(native_formats[i]); + if (color_format == PIPE_FORMAT_NONE || + !adpy->base.screen->is_format_supported(adpy->base.screen, + color_format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) { + LOGI("skip unsupported native format 0x%x", native_formats[i]); + continue; + } + + aconf = &adpy->configs[adpy->num_configs++]; + /* only the back buffer */ + aconf->base.buffer_mask = 1 << NATIVE_ATTACHMENT_BACK_LEFT; + aconf->base.color_format = color_format; + aconf->base.window_bit = TRUE; + + aconf->base.native_visual_id = native_formats[i]; + aconf->base.native_visual_type = native_formats[i]; + } + + return TRUE; +} + +static boolean +android_display_init_drm(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + const hw_module_t *mod; + int fd, err; + + /* get the authorized fd from gralloc */ + err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod); + if (!err) { + const gralloc_module_t *gr = (gralloc_module_t *) mod; + + err = -EINVAL; + if (gr->perform) + err = gr->perform(gr, GRALLOC_MODULE_PERFORM_GET_DRM_FD, &fd); + } + if (!err && fd >= 0) { + adpy->base.screen = + adpy->event_handler->new_drm_screen(&adpy->base, NULL, fd); + } + + if (adpy->base.screen) { + LOGI("using DRM screen"); + return TRUE; + } + else { + LOGW("failed to create DRM screen"); + LOGW("will fall back to other EGL drivers if any"); + return FALSE; + } +} + +static boolean +android_display_init_sw(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + struct sw_winsys *ws; + + ws = android_create_sw_winsys(); + if (ws) { + adpy->base.screen = + adpy->event_handler->new_sw_screen(&adpy->base, ws); + } + + if (adpy->base.screen) { + LOGI("using SW screen"); + return TRUE; + } + else { + LOGE("failed to create SW screen"); + return FALSE; + } +} + +static boolean +android_display_init_screen(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + + if (adpy->use_drm) + android_display_init_drm(&adpy->base); + else + android_display_init_sw(&adpy->base); + + if (!adpy->base.screen) + return FALSE; + + if (!android_display_init_configs(&adpy->base)) { + adpy->base.screen->destroy(adpy->base.screen); + adpy->base.screen = NULL; + return FALSE; + } + + return TRUE; +} + +static void +android_display_destroy(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + + FREE(adpy->configs); + if (adpy->base.screen) + adpy->base.screen->destroy(adpy->base.screen); + FREE(adpy); +} + +static const struct native_config ** +android_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct android_display *adpy = android_display(ndpy); + const struct native_config **configs; + int i; + + configs = (const struct native_config **) + MALLOC(adpy->num_configs * sizeof(*configs)); + if (configs) { + for (i = 0; i < adpy->num_configs; i++) + configs[i] = (const struct native_config *) &adpy->configs[i]; + if (num_configs) + *num_configs = adpy->num_configs; + } + + return configs; +} + +static int +android_display_get_param(struct native_display *ndpy, + enum native_param_type param) +{ + int val; + + switch (param) { + case NATIVE_PARAM_PRESERVE_BUFFER: + val = 1; + break; + default: + val = 0; + break; + } + + return val; +} + +static struct pipe_resource * +android_display_import_buffer(struct native_display *ndpy, + struct native_buffer *nbuf) +{ + struct android_display *adpy = android_display(ndpy); + struct android_native_buffer_t *abuf; + enum pipe_format format; + struct pipe_resource templ; + + if (nbuf->type != NATIVE_BUFFER_ANDROID) + return NULL; + + abuf = nbuf->u.android; + + if (!abuf || abuf->common.magic != ANDROID_NATIVE_BUFFER_MAGIC || + abuf->common.version != sizeof(*abuf)) { + LOGE("invalid android native buffer"); + return NULL; + } + + format = get_pipe_format(abuf->format); + if (format == PIPE_FORMAT_NONE) + return NULL; + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.format = format; + /* assume for texturing only */ + templ.bind = PIPE_BIND_SAMPLER_VIEW; + templ.width0 = abuf->width; + templ.height0 = abuf->height; + templ.depth0 = 1; + templ.array_size = 1; + + return import_buffer(adpy, &templ, abuf); +} + +static boolean +android_display_export_buffer(struct native_display *ndpy, + struct pipe_resource *res, + struct native_buffer *nbuf) +{ + return FALSE; +} + +static struct native_display_buffer android_display_buffer = { + android_display_import_buffer, + android_display_export_buffer +}; + +static struct android_display * +android_display_create(const struct native_event_handler *event_handler, + boolean use_sw) +{ + struct android_display *adpy; + char value[PROPERTY_VALUE_MAX]; + boolean force_sw; + + /* check if SW renderer is forced */ + if (property_get("debug.mesa.software", value, NULL)) + force_sw = (atoi(value) != 0); + else + force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE); + if (force_sw) + use_sw = TRUE; + + adpy = CALLOC_STRUCT(android_display); + if (!adpy) + return NULL; + + adpy->event_handler = event_handler; + adpy->use_drm = !use_sw; + + adpy->base.init_screen = android_display_init_screen; + adpy->base.destroy = android_display_destroy; + adpy->base.get_param = android_display_get_param; + adpy->base.get_configs = android_display_get_configs; + adpy->base.create_window_surface = android_display_create_window_surface; + + adpy->base.buffer = &android_display_buffer; + + return adpy; +} + +static const struct native_event_handler *android_event_handler; + +static struct native_display * +native_create_display(void *dpy, boolean use_sw) +{ + struct android_display *adpy; + + adpy = android_display_create(android_event_handler, use_sw); + + return (adpy) ? &adpy->base : NULL; +} + +static const struct native_platform android_platform = { + "Android", /* name */ + native_create_display +}; + +}; /* namespace android */ + +using namespace android; + +static void +android_log(EGLint level, const char *msg) +{ + switch (level) { + case _EGL_DEBUG: + LOGD("%s", msg); + break; + case _EGL_INFO: + LOGI("%s", msg); + break; + case _EGL_WARNING: + LOGW("%s", msg); + break; + case _EGL_FATAL: + LOG_FATAL("%s", msg); + break; + default: + break; + } +} + +const struct native_platform * +native_get_android_platform(const struct native_event_handler *event_handler) +{ + android_event_handler = event_handler; + /* use Android logger */ + _eglSetLogProc(android_log); + + return &android_platform; +} diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 6649f02b244..b5e3d99b811 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -132,6 +132,12 @@ egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType plat) nplat = native_get_fbdev_platform(&egl_g3d_native_event_handler); #endif break; + case _EGL_PLATFORM_ANDROID: + plat_name = "Android"; +#ifdef HAVE_ANDROID_BACKEND + nplat = native_get_android_platform(&egl_g3d_native_event_handler); +#endif + break; default: break; } @@ -572,6 +578,11 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy) if (dpy->Platform == _EGL_PLATFORM_WAYLAND && gdpy->native->buffer) dpy->Extensions.MESA_drm_image = EGL_TRUE; +#ifdef EGL_ANDROID_image_native_buffer + if (dpy->Platform == _EGL_PLATFORM_ANDROID && gdpy->native->buffer) + dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE; +#endif + #ifdef EGL_WL_bind_wayland_display if (gdpy->native->wayland_bufmgr) dpy->Extensions.WL_bind_wayland_display = EGL_TRUE; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c index 7e9a29b0284..4d90c400319 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c @@ -202,6 +202,24 @@ egl_g3d_reference_wl_buffer(_EGLDisplay *dpy, struct wl_buffer *buffer, #endif /* EGL_WL_bind_wayland_display */ +#ifdef EGL_ANDROID_image_native_buffer + +static struct pipe_resource * +egl_g3d_reference_android_native_buffer(_EGLDisplay *dpy, + struct android_native_buffer_t *buf) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct native_buffer nbuf; + + memset(&nbuf, 0, sizeof(nbuf)); + nbuf.type = NATIVE_BUFFER_ANDROID; + nbuf.u.android = buf; + + return gdpy->native->buffer->import_buffer(gdpy->native, &nbuf); +} + +#endif /* EGL_ANDROID_image_native_buffer */ + _EGLImage * egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, EGLenum target, EGLClientBuffer buffer, @@ -239,6 +257,12 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, (struct wl_buffer *) buffer, &gimg->base, attribs); break; #endif +#ifdef EGL_ANDROID_image_native_buffer + case EGL_NATIVE_BUFFER_ANDROID: + ptex = egl_g3d_reference_android_native_buffer(dpy, + (struct android_native_buffer_t *) buffer); + break; +#endif default: ptex = NULL; break; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c index 60c3e332ac9..b839f848d7b 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c @@ -126,7 +126,7 @@ pbuffer_reference_openvg_image(struct egl_g3d_surface *gsurf) } static void -pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf) +pbuffer_allocate_pbuffer_texture(struct egl_g3d_surface *gsurf) { struct egl_g3d_display *gdpy = egl_g3d_display(gsurf->base.Resource.Display); @@ -141,7 +141,8 @@ pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf) templ.depth0 = 1; templ.array_size = 1; templ.format = gsurf->stvis.color_format; - templ.bind = PIPE_BIND_RENDER_TARGET; + /* for rendering and binding to texture */ + templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; ptex = screen->resource_create(screen, &templ); gsurf->render_texture = ptex; @@ -166,7 +167,7 @@ egl_g3d_st_framebuffer_validate_pbuffer(struct st_framebuffer_iface *stfbi, if (!gsurf->render_texture) { switch (gsurf->client_buffer_type) { case EGL_NONE: - pbuffer_allocate_render_texture(gsurf); + pbuffer_allocate_pbuffer_texture(gsurf); break; case EGL_OPENVG_IMAGE: pbuffer_reference_openvg_image(gsurf); diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h index fc50ee485fe..58593a489cd 100644 --- a/src/gallium/state_trackers/egl/common/native.h +++ b/src/gallium/state_trackers/egl/common/native.h @@ -293,6 +293,9 @@ native_get_drm_platform(const struct native_event_handler *event_handler); const struct native_platform * native_get_fbdev_platform(const struct native_event_handler *event_handler); +const struct native_platform * +native_get_android_platform(const struct native_event_handler *event_handler); + #ifdef __cplusplus } #endif diff --git a/src/gallium/state_trackers/egl/common/native_buffer.h b/src/gallium/state_trackers/egl/common/native_buffer.h index b8a66d17e12..503ed580b05 100644 --- a/src/gallium/state_trackers/egl/common/native_buffer.h +++ b/src/gallium/state_trackers/egl/common/native_buffer.h @@ -33,9 +33,11 @@ #include "pipe/p_state.h" struct native_display; +struct android_native_buffer_t; enum native_buffer_type { NATIVE_BUFFER_DRM, + NATIVE_BUFFER_ANDROID, NUM_NATIVE_BUFFERS }; @@ -50,6 +52,8 @@ struct native_buffer { unsigned handle; /**< the handle of the GEM object */ unsigned stride; } drm; + + struct android_native_buffer_t *android; /**< opaque native buffer */ } u; }; diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c index 47910de8d3c..c013769e57d 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.c +++ b/src/gallium/state_trackers/egl/drm/native_drm.c @@ -134,8 +134,11 @@ drm_display_destroy(struct native_display *ndpy) if (drmdpy->device_name) FREE(drmdpy->device_name); - if (drmdpy->fd >= 0) - close(drmdpy->fd); + if (drmdpy->own_gbm) { + gbm_device_destroy(&drmdpy->gbmdrm->base.base); + if (drmdpy->fd >= 0) + close(drmdpy->fd); + } FREE(drmdpy); } @@ -258,7 +261,7 @@ drm_display_init_screen(struct native_display *ndpy) } static struct native_display * -drm_create_display(struct gbm_gallium_drm_device *gbmdrm, +drm_create_display(struct gbm_gallium_drm_device *gbmdrm, int own_gbm, const struct native_event_handler *event_handler) { struct drm_display *drmdpy; @@ -267,6 +270,8 @@ drm_create_display(struct gbm_gallium_drm_device *gbmdrm, if (!drmdpy) return NULL; + drmdpy->gbmdrm = gbmdrm; + drmdpy->own_gbm = own_gbm; drmdpy->fd = gbmdrm->base.base.fd; drmdpy->device_name = drm_get_device_name(drmdpy->fd); @@ -302,22 +307,30 @@ native_create_display(void *dpy, boolean use_sw) { struct gbm_gallium_drm_device *gbm; int fd; + int own_gbm = 0; gbm = dpy; if (gbm == NULL) { fd = open("/dev/dri/card0", O_RDWR); + /* FIXME: Use an internal constructor to create a gbm + * device with gallium backend directly, without setenv */ + setenv("GBM_BACKEND", "gbm_gallium_drm.so", 1); gbm = gbm_gallium_drm_device(gbm_create_device(fd)); + own_gbm = 1; } if (gbm == NULL) return NULL; if (strcmp(gbm_device_get_backend_name(&gbm->base.base), "drm") != 0 || - gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM) + gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM) { + if (own_gbm) + gbm_device_destroy(&gbm->base.base); return NULL; + } - return drm_create_display(gbm, drm_event_handler); + return drm_create_display(gbm, own_gbm, drm_event_handler); } static const struct native_platform drm_platform = { diff --git a/src/gallium/state_trackers/egl/drm/native_drm.h b/src/gallium/state_trackers/egl/drm/native_drm.h index 675a58a1922..18cebf4e276 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.h +++ b/src/gallium/state_trackers/egl/drm/native_drm.h @@ -41,6 +41,8 @@ #include "common/native_wayland_drm_bufmgr_helper.h" #endif +#include "gbm_gallium_drmint.h" + struct drm_config; struct drm_crtc; struct drm_connector; @@ -52,6 +54,8 @@ struct drm_display { const struct native_event_handler *event_handler; + struct gbm_gallium_drm_device *gbmdrm; + int own_gbm; int fd; char *device_name; struct drm_config *config; diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c index 269c7a4baf8..50d63ea3f73 100644 --- a/src/gallium/state_trackers/vdpau/decode.c +++ b/src/gallium/state_trackers/vdpau/decode.c @@ -82,13 +82,22 @@ vlVdpDecoderCreate(VdpDevice device, goto error_decoder; } + vldecoder->num_buffers = pipe->screen->get_video_param + ( + pipe->screen, p_profile, + PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED + ); vldecoder->cur_buffer = 0; - for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i) { - vldecoder->buffer[i] = vldecoder->decoder->create_buffer(vldecoder->decoder); - if (!vldecoder->buffer[i]) { + vldecoder->buffers = CALLOC(vldecoder->num_buffers, sizeof(void*)); + if (!vldecoder->buffers) + goto error_alloc_buffers; + + for (i = 0; i < vldecoder->num_buffers; ++i) { + vldecoder->buffers[i] = vldecoder->decoder->create_buffer(vldecoder->decoder); + if (!vldecoder->buffers[i]) { ret = VDP_STATUS_ERROR; - goto error_buffer; + goto error_create_buffers; } } @@ -103,11 +112,15 @@ vlVdpDecoderCreate(VdpDevice device, return VDP_STATUS_OK; error_handle: -error_buffer: +error_create_buffers: + + for (i = 0; i < vldecoder->num_buffers; ++i) + if (vldecoder->buffers[i]) + vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffers[i]); - for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i) - if (vldecoder->buffer[i]) - vldecoder->buffer[i]->destroy(vldecoder->buffer[i]); + FREE(vldecoder->buffers); + +error_alloc_buffers: vldecoder->decoder->destroy(vldecoder->decoder); @@ -128,9 +141,11 @@ vlVdpDecoderDestroy(VdpDecoder decoder) if (!vldecoder) return VDP_STATUS_INVALID_HANDLE; - for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i) - if (vldecoder->buffer[i]) - vldecoder->buffer[i]->destroy(vldecoder->buffer[i]); + for (i = 0; i < vldecoder->num_buffers; ++i) + if (vldecoder->buffers[i]) + vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffers[i]); + + FREE(vldecoder->buffers); vldecoder->decoder->destroy(vldecoder->decoder); @@ -161,38 +176,37 @@ vlVdpDecoderGetParameters(VdpDecoder decoder, } static VdpStatus -vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder, - struct pipe_video_decode_buffer *buffer, - struct pipe_video_buffer *target, - VdpPictureInfoMPEG1Or2 *picture_info, - uint32_t bitstream_buffer_count, - VdpBitstreamBuffer const *bitstream_buffers) +vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder, + VdpPictureInfoMPEG1Or2 *picture_info, + uint32_t bitstream_buffer_count, + VdpBitstreamBuffer const *bitstream_buffers) { struct pipe_mpeg12_picture_desc picture; + struct pipe_mpeg12_quant_matrix quant; struct pipe_video_buffer *ref_frames[2]; - uint8_t intra_quantizer_matrix[64]; - unsigned num_ycbcr_blocks[3] = { 0, 0, 0 }; unsigned i; VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Decoding MPEG2\n"); + i = 0; + /* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */ - if (picture_info->forward_reference == VDP_INVALID_HANDLE) - ref_frames[0] = NULL; - else { - ref_frames[0] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer; - if (!ref_frames[0]) + if (picture_info->forward_reference != VDP_INVALID_HANDLE) { + ref_frames[i] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer; + if (!ref_frames[i]) return VDP_STATUS_INVALID_HANDLE; + ++i; } - if (picture_info->backward_reference == VDP_INVALID_HANDLE) - ref_frames[1] = NULL; - else { - ref_frames[1] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer; - if (!ref_frames[1]) + if (picture_info->backward_reference != VDP_INVALID_HANDLE) { + ref_frames[i] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer; + if (!ref_frames[i]) return VDP_STATUS_INVALID_HANDLE; + ++i; } + decoder->set_reference_frames(decoder, ref_frames, i); + memset(&picture, 0, sizeof(picture)); picture.base.profile = decoder->profile; picture.picture_coding_type = picture_info->picture_coding_type; @@ -202,24 +216,28 @@ vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder, picture.alternate_scan = picture_info->alternate_scan; picture.intra_vlc_format = picture_info->intra_vlc_format; picture.concealment_motion_vectors = picture_info->concealment_motion_vectors; + picture.intra_dc_precision = picture_info->intra_dc_precision; picture.f_code[0][0] = picture_info->f_code[0][0] - 1; picture.f_code[0][1] = picture_info->f_code[0][1] - 1; picture.f_code[1][0] = picture_info->f_code[1][0] - 1; picture.f_code[1][1] = picture_info->f_code[1][1] - 1; - buffer->begin_frame(buffer); + decoder->set_picture_parameters(decoder, &picture.base); - memcpy(intra_quantizer_matrix, picture_info->intra_quantizer_matrix, sizeof(intra_quantizer_matrix)); - intra_quantizer_matrix[0] = 1 << (7 - picture_info->intra_dc_precision); - buffer->set_quant_matrix(buffer, intra_quantizer_matrix, picture_info->non_intra_quantizer_matrix); + memset(&quant, 0, sizeof(quant)); + quant.base.codec = PIPE_VIDEO_CODEC_MPEG12; + quant.intra_matrix = picture_info->intra_quantizer_matrix; + quant.non_intra_matrix = picture_info->non_intra_quantizer_matrix; - for (i = 0; i < bitstream_buffer_count; ++i) - buffer->decode_bitstream(buffer, bitstream_buffers[i].bitstream_bytes, - bitstream_buffers[i].bitstream, &picture.base, num_ycbcr_blocks); + decoder->set_quant_matrix(decoder, &quant.base); + + decoder->begin_frame(decoder); - buffer->end_frame(buffer); + for (i = 0; i < bitstream_buffer_count; ++i) + decoder->decode_bitstream(decoder, bitstream_buffers[i].bitstream_bytes, + bitstream_buffers[i].bitstream); - decoder->flush_buffer(buffer, num_ycbcr_blocks, ref_frames, target); + decoder->end_frame(decoder); return VDP_STATUS_OK; } @@ -254,17 +272,19 @@ vlVdpDecoderRender(VdpDecoder decoder, // TODO: Recreate decoder with correct chroma return VDP_STATUS_INVALID_CHROMA_TYPE; - // TODO: Right now only mpeg2 is supported. + // TODO: Right now only mpeg 1 & 2 is supported. switch (vldecoder->decoder->profile) { + case PIPE_VIDEO_PROFILE_MPEG1: case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: case PIPE_VIDEO_PROFILE_MPEG2_MAIN: ++vldecoder->cur_buffer; - vldecoder->cur_buffer %= VL_NUM_DECODE_BUFFERS; - return vlVdpDecoderRenderMpeg2(vldecoder->decoder, - vldecoder->buffer[vldecoder->cur_buffer], - vlsurf->video_buffer, - (VdpPictureInfoMPEG1Or2 *)picture_info, - bitstream_buffer_count,bitstream_buffers); + vldecoder->cur_buffer %= vldecoder->num_buffers; + + vldecoder->decoder->set_decode_buffer(vldecoder->decoder, vldecoder->buffers[vldecoder->cur_buffer]); + vldecoder->decoder->set_decode_target(vldecoder->decoder, vlsurf->video_buffer); + + return vlVdpDecoderRenderMpeg12(vldecoder->decoder, (VdpPictureInfoMPEG1Or2 *)picture_info, + bitstream_buffer_count, bitstream_buffers); break; default: diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c index d5187006bfc..fbd24a29414 100644 --- a/src/gallium/state_trackers/vdpau/mixer.c +++ b/src/gallium/state_trackers/vdpau/mixer.c @@ -157,8 +157,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, vl_compositor_clear_layers(&vmixer->compositor); vl_compositor_set_buffer_layer(&vmixer->compositor, 0, surf->video_buffer, NULL, NULL); - vl_compositor_render(&vmixer->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME, - dst->surface, NULL, NULL); + vl_compositor_render(&vmixer->compositor, dst->surface, NULL, NULL); return VDP_STATUS_OK; } diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c index 1176c7a30b7..7e324db5589 100644 --- a/src/gallium/state_trackers/vdpau/presentation.c +++ b/src/gallium/state_trackers/vdpau/presentation.c @@ -169,8 +169,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, vl_compositor_clear_layers(&pq->compositor); vl_compositor_set_rgba_layer(&pq->compositor, 0, surf->sampler_view, NULL, NULL); - vl_compositor_render(&pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME, - drawable_surface, NULL, NULL); + vl_compositor_render(&pq->compositor, drawable_surface, NULL, NULL); pq->device->context->pipe->screen->flush_frontbuffer ( diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h index e5d945629fb..5482eff0630 100644 --- a/src/gallium/state_trackers/vdpau/vdpau_private.h +++ b/src/gallium/state_trackers/vdpau/vdpau_private.h @@ -46,7 +46,6 @@ #define TOSTRING(x) QUOTEME(x) #define INFORMATION_STRING TOSTRING(INFORMATION) #define VL_HANDLES -#define VL_NUM_DECODE_BUFFERS 4 static inline enum pipe_video_chroma_format ChromaToPipe(VdpChromaType vdpau_type) @@ -256,7 +255,8 @@ typedef struct { vlVdpDevice *device; struct pipe_video_decoder *decoder; - struct pipe_video_decode_buffer *buffer[VL_NUM_DECODE_BUFFERS]; + unsigned num_buffers; + void **buffers; unsigned cur_buffer; } vlVdpDecoder; diff --git a/src/gallium/state_trackers/xorg/SConscript b/src/gallium/state_trackers/xorg/SConscript index 4ea4ec4ee8b..1768f701e48 100644 --- a/src/gallium/state_trackers/xorg/SConscript +++ b/src/gallium/state_trackers/xorg/SConscript @@ -32,6 +32,7 @@ sources = [ 'xorg_output.c', 'xorg_renderer.c', 'xorg_xv.c', + 'xorg_xvmc.c', ] st_xorg = env.ConvenienceLibrary( diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index f696b72e1e3..61ba6bdddf7 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -4,6 +4,7 @@ #include "xorg_exa_tgsi.h" #include "cso_cache/cso_context.h" +#include "util/u_format.h" #include "util/u_sampler.h" @@ -52,18 +53,17 @@ static const struct xorg_composite_blend xorg_blends[] = { static INLINE void -pixel_to_float4(Pixel pixel, float *color) +pixel_to_float4(Pixel pixel, float *color, enum pipe_format format) { - CARD32 r, g, b, a; - - a = (pixel >> 24) & 0xff; - r = (pixel >> 16) & 0xff; - g = (pixel >> 8) & 0xff; - b = (pixel >> 0) & 0xff; - color[0] = ((float)r) / 255.; - color[1] = ((float)g) / 255.; - color[2] = ((float)b) / 255.; - color[3] = ((float)a) / 255.; + const struct util_format_description *format_desc; + uint8_t packed[4]; + + format_desc = util_format_description(format); + packed[0] = pixel; + packed[1] = pixel >> 8; + packed[2] = pixel >> 16; + packed[3] = pixel >> 24; + format_desc->unpack_rgba_float(color, 0, packed, 0, 1, 1); } static boolean @@ -311,7 +311,7 @@ bind_shaders(struct exa_context *exa, int op, vs_traits |= VS_SOLID_FILL; debug_assert(pSrcPicture->format == PICT_a8r8g8b8); pixel_to_float4(pSrcPicture->pSourcePict->solidFill.color, - exa->solid_color); + exa->solid_color, PIPE_FORMAT_B8G8R8A8_UNORM); exa->has_solid_color = TRUE; } else { debug_assert("!gradients not supported"); @@ -533,7 +533,7 @@ boolean xorg_solid_bind_state(struct exa_context *exa, unsigned vs_traits, fs_traits; struct xorg_shader shader; - pixel_to_float4(fg, exa->solid_color); + pixel_to_float4(fg, exa->solid_color, pixmap->tex->format); exa->has_solid_color = TRUE; #if 0 diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 6f2c52eabb6..3350ac736cf 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -372,13 +372,15 @@ dri2_copy_region(DrawablePtr pDraw, RegionPtr pRegion, save_accel = ms->exa->accel; ms->exa->accel = TRUE; - /* In case it won't be though, make sure the GPU copy contents of the - * source pixmap will be used for the software fallback - presumably the - * client modified them before calling in here. - */ - exaMoveInPixmap(src_priv->pPixmap); - DamageRegionAppend(src_draw, pRegion); - DamageRegionProcessPending(src_draw); + if (pSrcBuffer->attachment != DRI2BufferFrontLeft) { + /* In case it won't be though, make sure the GPU copy contents of the + * source pixmap will be used for the software fallback - presumably the + * client modified them before calling in here. + */ + exaMoveInPixmap(src_priv->pPixmap); + DamageRegionAppend(src_draw, pRegion); + DamageRegionProcessPending(src_draw); + } if (cust && cust->winsys_context_throttle) cust->winsys_context_throttle(cust, ms->ctx, THROTTLE_SWAP); diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index 063ae92f6be..0ade319cdc3 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -817,7 +817,7 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) &ms->swapThrottling) ? X_CONFIG : X_DEFAULT; - ms->dirtyThrottling = cust ? cust->dirty_throttling : TRUE; + ms->dirtyThrottling = cust ? cust->dirty_throttling : FALSE; from_dt = xf86GetOptValBool(ms->Options, OPTION_THROTTLE_DIRTY, &ms->dirtyThrottling) ? X_CONFIG : X_DEFAULT; diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index 664e8c75730..84a3a2fa4e2 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -222,4 +222,11 @@ void xorg_xv_init(ScreenPtr pScreen); +/*********************************************************************** + * xorg_xvmc.c + */ +void +xorg_xvmc_init(ScreenPtr pScreen, char *name); + + #endif /* _XORG_TRACKER_H_ */ diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index af4992fc2ed..67fd6dfb501 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -750,6 +750,8 @@ xorg_xv_init(ScreenPtr pScreen) if (num_adaptors) { xf86XVScreenInit(pScreen, adaptors, num_adaptors); + if (textured_adapter) + xorg_xvmc_init(pScreen, textured_adapter->name); } else { xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Disabling Xv because no adaptors could be initialized.\n"); diff --git a/src/gallium/state_trackers/xorg/xorg_xvmc.c b/src/gallium/state_trackers/xorg/xorg_xvmc.c new file mode 100644 index 00000000000..0f3f3f00907 --- /dev/null +++ b/src/gallium/state_trackers/xorg/xorg_xvmc.c @@ -0,0 +1,119 @@ +#include "xorg_tracker.h" + +#include <xf86.h> +#include <xf86xv.h> +#include <xf86xvmc.h> +#include <X11/extensions/Xv.h> +#include <X11/extensions/XvMC.h> +#include <fourcc.h> + +#define FOURCC_RGB 0x0000003 +#define XVIMAGE_RGB \ +{ \ + FOURCC_RGB, \ + XvRGB, \ + LSBFirst, \ + { \ + 'R', 'G', 'B', 0x00, \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71 \ + }, \ + 32, \ + XvPacked, \ + 1, \ + 24, 0x00FF0000, 0x0000FF00, 0x000000FF, \ + 0, 0, 0, \ + 0, 0, 0, \ + 0, 0, 0, \ + { \ + 'B','G','R','X', \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 \ + }, \ + XvTopToBottom \ +} + +static int subpicture_index_list[] = { + FOURCC_RGB, + FOURCC_IA44, + FOURCC_AI44 +}; + +static XF86MCImageIDList subpicture_list = +{ + sizeof(subpicture_index_list)/sizeof(*subpicture_index_list), + subpicture_index_list +}; + +static XF86MCSurfaceInfoRec yv12_mpeg2_surface = +{ + FOURCC_I420, + XVMC_CHROMA_FORMAT_420, + 0, + 2048, 2048, 2048, 2048, + XVMC_IDCT | XVMC_MPEG_2, + XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE, + &subpicture_list +}; + +static const XF86MCSurfaceInfoRec uyvy_mpeg2_surface = +{ + FOURCC_UYVY, + XVMC_CHROMA_FORMAT_422, + 0, + 2048, 2048, 2048, 2048, + XVMC_IDCT | XVMC_MPEG_2, + XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE, + &subpicture_list +}; + +static XF86MCSurfaceInfoPtr surfaces[] = +{ + (XF86MCSurfaceInfoPtr)&yv12_mpeg2_surface, + (XF86MCSurfaceInfoPtr)&uyvy_mpeg2_surface +}; + +static const XF86ImageRec rgb_subpicture = XVIMAGE_RGB; +static const XF86ImageRec ia44_subpicture = XVIMAGE_IA44; +static const XF86ImageRec ai44_subpicture = XVIMAGE_AI44; + +static XF86ImagePtr subpictures[] = +{ + (XF86ImagePtr)&rgb_subpicture, + (XF86ImagePtr)&ia44_subpicture, + (XF86ImagePtr)&ai44_subpicture +}; + +static const XF86MCAdaptorRec adaptor_template = +{ + "", + sizeof(surfaces)/sizeof(*surfaces), + surfaces, + sizeof(subpictures)/sizeof(*subpictures), + subpictures, + (xf86XvMCCreateContextProcPtr)NULL, + (xf86XvMCDestroyContextProcPtr)NULL, + (xf86XvMCCreateSurfaceProcPtr)NULL, + (xf86XvMCDestroySurfaceProcPtr)NULL, + (xf86XvMCCreateSubpictureProcPtr)NULL, + (xf86XvMCDestroySubpictureProcPtr)NULL +}; + +void +xorg_xvmc_init(ScreenPtr pScreen, char *name) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + XF86MCAdaptorPtr adaptorXvMC = xf86XvMCCreateAdaptorRec(); + if (!adaptorXvMC) + return; + + *adaptorXvMC = adaptor_template; + adaptorXvMC->name = name; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "[XvMC] Associated with %s.\n", name); + if (!xf86XvMCScreenInit(pScreen, 1, &adaptorXvMC)) + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "[XvMC] Failed to initialize extension.\n"); + else + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "[XvMC] Extension initialized.\n"); + xf86XvMCDestroyAdaptorRec(adaptorXvMC); +} diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index 0c53b730287..79bd9c618ce 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -42,266 +42,125 @@ #include "xvmc_private.h" -static const unsigned const_empty_block_mask_420[3][2][2] = { - { { 0x20, 0x10 }, { 0x08, 0x04 } }, - { { 0x02, 0x02 }, { 0x02, 0x02 } }, - { { 0x01, 0x01 }, { 0x01, 0x01 } } -}; - -static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic) -{ - switch (xvmc_pic) { - case XVMC_TOP_FIELD: - return PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP; - case XVMC_BOTTOM_FIELD: - return PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM; - case XVMC_FRAME_PICTURE: - return PIPE_MPEG12_PICTURE_TYPE_FRAME; - default: - assert(0); - } - - XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized picture type 0x%08X.\n", xvmc_pic); - - return -1; -} - -static inline void -MacroBlockTypeToPipeWeights(const XvMCMacroBlock *xvmc_mb, unsigned weights[2]) -{ - assert(xvmc_mb); - - switch (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) { - case XVMC_MB_TYPE_MOTION_FORWARD: - weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX; - weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; - break; - - case (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD): - weights[0] = PIPE_VIDEO_MV_WEIGHT_HALF; - weights[1] = PIPE_VIDEO_MV_WEIGHT_HALF; - break; - - case XVMC_MB_TYPE_MOTION_BACKWARD: - weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN; - weights[1] = PIPE_VIDEO_MV_WEIGHT_MAX; - break; - - default: - /* workaround for xines xxmc video out plugin */ - if (!(xvmc_mb->macroblock_type & ~XVMC_MB_TYPE_PATTERN)) { - weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX; - weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; - } else { - weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN; - weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; - } - break; - } -} - -static inline struct pipe_motionvector -MotionVectorToPipe(const XvMCMacroBlock *xvmc_mb, unsigned vector, - unsigned field_select_mask, unsigned weight) -{ - struct pipe_motionvector mv; - - assert(xvmc_mb); - - switch (xvmc_mb->motion_type) { - case XVMC_PREDICTION_FRAME: - mv.top.x = xvmc_mb->PMV[0][vector][0]; - mv.top.y = xvmc_mb->PMV[0][vector][1]; - mv.top.field_select = PIPE_VIDEO_FRAME; - mv.top.weight = weight; - - mv.bottom.x = xvmc_mb->PMV[0][vector][0]; - mv.bottom.y = xvmc_mb->PMV[0][vector][1]; - mv.bottom.weight = weight; - mv.bottom.field_select = PIPE_VIDEO_FRAME; - break; - - case XVMC_PREDICTION_FIELD: - mv.top.x = xvmc_mb->PMV[0][vector][0]; - mv.top.y = xvmc_mb->PMV[0][vector][1]; - mv.top.field_select = (xvmc_mb->motion_vertical_field_select & field_select_mask) ? - PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; - mv.top.weight = weight; - - mv.bottom.x = xvmc_mb->PMV[1][vector][0]; - mv.bottom.y = xvmc_mb->PMV[1][vector][1]; - mv.bottom.field_select = (xvmc_mb->motion_vertical_field_select & (field_select_mask << 2)) ? - PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; - mv.bottom.weight = weight; - break; - - default: // TODO: Support DUALPRIME and 16x8 - break; - } - - return mv; -} - -static inline void -UploadYcbcrBlocks(XvMCSurfacePrivate *surface, +static void +MacroBlocksToPipe(XvMCContextPrivate *context, + XvMCSurfacePrivate *surface, + unsigned int xvmc_picture_structure, const XvMCMacroBlock *xvmc_mb, - const XvMCBlockArray *xvmc_blocks) + const XvMCBlockArray *xvmc_blocks, + struct pipe_mpeg12_macroblock *mb, + unsigned int num_macroblocks) { - enum pipe_mpeg12_dct_intra intra; - enum pipe_mpeg12_dct_type coding; + unsigned int i, j, k; - unsigned tb, x, y, luma_blocks; - short *blocks; - - assert(surface); assert(xvmc_mb); + assert(xvmc_blocks); + assert(num_macroblocks); - if (!xvmc_mb->coded_block_pattern) - return; - - intra = xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA ? - PIPE_MPEG12_DCT_INTRA : PIPE_MPEG12_DCT_DELTA; - - coding = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ? - PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME; - - blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES; - - for (y = 0, luma_blocks = 0; y < 2; ++y) { - for (x = 0; x < 2; ++x, ++tb) { - if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) { + for (; num_macroblocks > 0; --num_macroblocks) { + mb->base.codec = PIPE_VIDEO_CODEC_MPEG12; + mb->x = xvmc_mb->x; + mb->y = xvmc_mb->y; + mb->macroblock_type = xvmc_mb->macroblock_type; - struct pipe_ycbcr_block *stream = surface->ycbcr[0].stream; - stream->x = xvmc_mb->x * 2 + x; - stream->y = xvmc_mb->y * 2 + y; - stream->intra = intra; - stream->coding = coding; + switch (xvmc_picture_structure) { + case XVMC_FRAME_PICTURE: + mb->macroblock_modes.bits.frame_motion_type = xvmc_mb->motion_type; + mb->macroblock_modes.bits.field_motion_type = 0; + break; - surface->ycbcr[0].num_blocks_added++; - surface->ycbcr[0].stream++; + case XVMC_TOP_FIELD: + case XVMC_BOTTOM_FIELD: + mb->macroblock_modes.bits.frame_motion_type = 0; + mb->macroblock_modes.bits.field_motion_type = xvmc_mb->motion_type; + break; - luma_blocks++; - } + default: + assert(0); } - } - - if (luma_blocks > 0) { - memcpy(surface->ycbcr[0].buffer, blocks, BLOCK_SIZE_BYTES * luma_blocks); - surface->ycbcr[0].buffer += BLOCK_SIZE_SAMPLES * luma_blocks; - blocks += BLOCK_SIZE_SAMPLES * luma_blocks; - } - - /* TODO: Implement 422, 444 */ - //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); - for (tb = 1; tb < 3; ++tb) { - if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[tb][0][0]) { + mb->macroblock_modes.bits.dct_type = xvmc_mb->dct_type; + mb->motion_vertical_field_select = xvmc_mb->motion_vertical_field_select; - struct pipe_ycbcr_block *stream = surface->ycbcr[tb].stream; - stream->x = xvmc_mb->x; - stream->y = xvmc_mb->y; - stream->intra = intra; - stream->coding = PIPE_MPEG12_DCT_TYPE_FRAME; + for (i = 0; i < 2; ++i) + for (j = 0; j < 2; ++j) + for (k = 0; k < 2; ++k) + mb->PMV[i][j][k] = xvmc_mb->PMV[i][j][k]; - memcpy(surface->ycbcr[tb].buffer, blocks, BLOCK_SIZE_BYTES); + mb->coded_block_pattern = xvmc_mb->coded_block_pattern; + mb->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES; + mb->num_skipped_macroblocks = 0; - surface->ycbcr[tb].num_blocks_added++; - surface->ycbcr[tb].stream++; - surface->ycbcr[tb].buffer += BLOCK_SIZE_SAMPLES; - blocks += BLOCK_SIZE_SAMPLES; - } + ++xvmc_mb; + ++mb; } - } static void -MacroBlocksToPipe(XvMCSurfacePrivate *surface, - unsigned int xvmc_picture_structure, - const XvMCMacroBlock *xvmc_mb, - const XvMCBlockArray *xvmc_blocks, - unsigned int num_macroblocks) +SetDecoderStatus(XvMCSurfacePrivate *surface) { - unsigned int i, j; + struct pipe_video_decoder *decoder; + struct pipe_video_buffer *ref_frames[2]; - assert(xvmc_mb); - assert(xvmc_blocks); - assert(num_macroblocks); + XvMCContextPrivate *context_priv; - for (i = 0; i < num_macroblocks; ++i) { - unsigned mv_pos = xvmc_mb->x + surface->mv_stride * xvmc_mb->y; - unsigned mv_weights[2]; + unsigned i, num_refs = 0; - if (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_PATTERN | XVMC_MB_TYPE_INTRA)) - UploadYcbcrBlocks(surface, xvmc_mb, xvmc_blocks); + assert(surface); - MacroBlockTypeToPipeWeights(xvmc_mb, mv_weights); + context_priv = surface->context->privData; + decoder = context_priv->decoder; - for (j = 0; j < 2; ++j) { - if (!surface->ref[j].mv) continue; + decoder->set_decode_buffer(decoder, surface->decode_buffer); + decoder->set_decode_target(decoder, surface->video_buffer); - surface->ref[j].mv[mv_pos] = MotionVectorToPipe - ( - xvmc_mb, j, - j ? XVMC_SELECT_FIRST_BACKWARD : XVMC_SELECT_FIRST_FORWARD, - mv_weights[j] - ); - } + for (i = 0; i < 2; ++i) { + if (surface->ref[i]) { + XvMCSurfacePrivate *ref = surface->ref[i]->privData; - ++xvmc_mb; + if (ref) + ref_frames[num_refs++] = ref->video_buffer; + } } + decoder->set_reference_frames(decoder, ref_frames, num_refs); } static void -unmap_and_flush_surface(XvMCSurfacePrivate *surface) +RecursiveEndFrame(XvMCSurfacePrivate *surface) { - struct pipe_video_buffer *ref_frames[2]; XvMCContextPrivate *context_priv; - unsigned i, num_ycbcr_blocks[3]; + unsigned i; assert(surface); context_priv = surface->context->privData; for ( i = 0; i < 2; ++i ) { - if (surface->ref[i].surface) { - XvMCSurfacePrivate *ref = surface->ref[i].surface->privData; + if (surface->ref[i]) { + XvMCSurface *ref = surface->ref[i]; assert(ref); - unmap_and_flush_surface(ref); - surface->ref[i].surface = NULL; - ref_frames[i] = ref->video_buffer; - } else { - ref_frames[i] = NULL; + surface->ref[i] = NULL; + RecursiveEndFrame(ref->privData); + surface->ref[i] = ref; } } - if (surface->mapped) { - surface->decode_buffer->end_frame(surface->decode_buffer); - for (i = 0; i < 3; ++i) - num_ycbcr_blocks[i] = surface->ycbcr[i].num_blocks_added; - context_priv->decoder->flush_buffer(surface->decode_buffer, - num_ycbcr_blocks, - ref_frames, - surface->video_buffer); - surface->mapped = 0; + if (surface->frame_started) { + surface->frame_started = 0; + SetDecoderStatus(surface); + + for (i = 0; i < 2; ++i) + surface->ref[i] = NULL; + + context_priv->decoder->end_frame(context_priv->decoder); } } PUBLIC Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface) { - static const uint8_t dummy_quant[64] = { - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 - }; - XvMCContextPrivate *context_priv; struct pipe_context *pipe; XvMCSurfacePrivate *surface_priv; @@ -323,9 +182,6 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac return BadAlloc; surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder); - surface_priv->decode_buffer->set_quant_matrix(surface_priv->decode_buffer, dummy_quant, dummy_quant); - - surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer); surface_priv->video_buffer = pipe->create_video_buffer ( pipe, PIPE_FORMAT_NV12, context_priv->decoder->chroma_format, @@ -355,15 +211,15 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur XvMCMacroBlockArray *macroblocks, XvMCBlockArray *blocks ) { - struct pipe_video_decode_buffer *t_buffer; + struct pipe_mpeg12_macroblock mb[num_macroblocks]; + struct pipe_video_decoder *decoder; + XvMCContextPrivate *context_priv; XvMCSurfacePrivate *target_surface_priv; XvMCSurfacePrivate *past_surface_priv; XvMCSurfacePrivate *future_surface_priv; XvMCMacroBlock *xvmc_mb; - unsigned i; - XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p, with past %p and future %p\n", target_surface, past_surface, future_surface); @@ -394,6 +250,9 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur assert(flags == 0 || flags == XVMC_SECOND_FIELD); + context_priv = context->privData; + decoder = context_priv->decoder; + target_surface_priv = target_surface->privData; past_surface_priv = past_surface ? past_surface->privData : NULL; future_surface_priv = future_surface ? future_surface->privData : NULL; @@ -402,50 +261,39 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur assert(!past_surface || past_surface_priv->context == context); assert(!future_surface || future_surface_priv->context == context); - t_buffer = target_surface_priv->decode_buffer; - - // enshure that all reference frames are flushed - // not really nessasary, but speeds ups rendering + // call end frame on all referenced frames if (past_surface) - unmap_and_flush_surface(past_surface->privData); + RecursiveEndFrame(past_surface->privData); if (future_surface) - unmap_and_flush_surface(future_surface->privData); + RecursiveEndFrame(future_surface->privData); xvmc_mb = macroblocks->macro_blocks + first_macroblock; /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */ - if (target_surface_priv->mapped && ( - target_surface_priv->ref[0].surface != past_surface || - target_surface_priv->ref[1].surface != future_surface || + if (target_surface_priv->frame_started && ( + target_surface_priv->ref[0] != past_surface || + target_surface_priv->ref[1] != future_surface || (xvmc_mb->x == 0 && xvmc_mb->y == 0))) { - // If they change anyway we need to clear our surface - unmap_and_flush_surface(target_surface_priv); + // If they change anyway we must assume that the current frame is ended + RecursiveEndFrame(target_surface_priv); } - if (!target_surface_priv->mapped) { - t_buffer->begin_frame(t_buffer); - - for (i = 0; i < 3; ++i) { - target_surface_priv->ycbcr[i].num_blocks_added = 0; - target_surface_priv->ycbcr[i].stream = t_buffer->get_ycbcr_stream(t_buffer, i); - target_surface_priv->ycbcr[i].buffer = t_buffer->get_ycbcr_buffer(t_buffer, i); - } + target_surface_priv->ref[0] = past_surface; + target_surface_priv->ref[1] = future_surface; - for (i = 0; i < 2; ++i) { - target_surface_priv->ref[i].surface = i == 0 ? past_surface : future_surface; - - if (target_surface_priv->ref[i].surface) - target_surface_priv->ref[i].mv = t_buffer->get_mv_stream(t_buffer, i); - else - target_surface_priv->ref[i].mv = NULL; - } + SetDecoderStatus(target_surface_priv); - target_surface_priv->mapped = 1; + if (!target_surface_priv->frame_started) { + target_surface_priv->frame_started = 1; + decoder->begin_frame(decoder); } - MacroBlocksToPipe(target_surface_priv, picture_structure, xvmc_mb, blocks, num_macroblocks); + MacroBlocksToPipe(context_priv, target_surface_priv, picture_structure, + xvmc_mb, blocks, mb, num_macroblocks); + + context_priv->decoder->decode_macroblock(context_priv->decoder, &mb[0].base, num_macroblocks); XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface); @@ -543,7 +391,9 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, assert(desty + desth - 1 < drawable_surface->height); */ - unmap_and_flush_surface(surface_priv); + RecursiveEndFrame(surface_priv); + + context_priv->decoder->flush(context_priv->decoder); vl_compositor_clear_layers(compositor); vl_compositor_set_buffer_layer(compositor, 0, surface_priv->video_buffer, &src_rect, NULL); @@ -567,7 +417,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, // Workaround for r600g, there seems to be a bug in the fence refcounting code pipe->screen->fence_reference(pipe->screen, &surface_priv->fence, NULL); - vl_compositor_render(compositor, PictureToPipe(flags), context_priv->drawable_surface, &dst_rect, NULL); + vl_compositor_render(compositor, context_priv->drawable_surface, &dst_rect, NULL); pipe->flush(pipe, &surface_priv->fence); @@ -630,6 +480,7 @@ PUBLIC Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface) { XvMCSurfacePrivate *surface_priv; + XvMCContextPrivate *context_priv; XVMC_MSG(XVMC_TRACE, "[XvMC] Destroying surface %p.\n", surface); @@ -639,10 +490,13 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface) return XvMCBadSurface; surface_priv = surface->privData; + context_priv = surface_priv->context->privData; - if (surface_priv->mapped) - surface_priv->decode_buffer->end_frame(surface_priv->decode_buffer); - surface_priv->decode_buffer->destroy(surface_priv->decode_buffer); + if (surface_priv->frame_started) { + SetDecoderStatus(surface_priv); + context_priv->decoder->end_frame(context_priv->decoder); + } + context_priv->decoder->destroy_buffer(context_priv->decoder, surface_priv->decode_buffer); surface_priv->video_buffer->destroy(surface_priv->video_buffer); FREE(surface_priv); surface->privData = NULL; diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h index 5f8d9d13cb3..fd14ac916ee 100644 --- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h +++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h @@ -45,7 +45,6 @@ struct vl_context; struct pipe_video_decoder; -struct pipe_video_decode_buffer; struct pipe_video_buffer; struct pipe_sampler_view; @@ -70,22 +69,13 @@ typedef struct typedef struct { - struct pipe_video_decode_buffer *decode_buffer; + void *decode_buffer; struct pipe_video_buffer *video_buffer; - bool mapped; // are we still mapped to memory? + // have we allready told the decoder to start a frame + bool frame_started; - struct { - unsigned num_blocks_added; - struct pipe_ycbcr_block *stream; - short *buffer; - } ycbcr[3]; - - unsigned mv_stride; - struct { - XvMCSurface *surface; - struct pipe_motionvector *mv; - } ref[2]; + XvMCSurface *ref[2]; struct pipe_fence_handle *fence; diff --git a/src/gallium/targets/dri-r300/target.c b/src/gallium/targets/dri-r300/target.c index b48bcad3710..9b6d816fb62 100644 --- a/src/gallium/targets/dri-r300/target.c +++ b/src/gallium/targets/dri-r300/target.c @@ -1,4 +1,3 @@ - #include "target-helpers/inline_debug_helper.h" #include "state_tracker/drm_driver.h" #include "radeon/drm/radeon_drm_public.h" diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile index 0c4de203d35..2d7463008fe 100644 --- a/src/gallium/targets/dri-r600/Makefile +++ b/src/gallium/targets/dri-r600/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/noop/libnoop.a diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript index 1df11a8747b..c10d65b595a 100644 --- a/src/gallium/targets/dri-r600/SConscript +++ b/src/gallium/targets/dri-r600/SConscript @@ -6,6 +6,7 @@ env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE']) env.Prepend(LIBS = [ st_dri, + radeonwinsys, r600winsys, r600, trace, diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/dri-r600/target.c +++ b/src/gallium/targets/dri-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/targets/egl-static/Android.mk b/src/gallium/targets/egl-static/Android.mk new file mode 100644 index 00000000000..ebc89ead454 --- /dev/null +++ b/src/gallium/targets/egl-static/Android.mk @@ -0,0 +1,56 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + egl.c \ + egl_pipe.c \ + egl_st.c + +LOCAL_CFLAGS := \ + -DFEATURE_ES1=1 \ + -DFEATURE_ES2=1 \ + -D_EGL_MAIN=_eglBuiltInDriverGALLIUM + +LOCAL_C_INCLUDES := \ + $(GALLIUM_TOP)/state_trackers/vega \ + $(GALLIUM_TOP)/state_trackers/egl \ + $(MESA_TOP)/src/egl/main \ + $(MESA_TOP)/src/mesa \ + $(DRM_TOP)/include/drm \ + $(DRM_TOP) + +# swrast +LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE + +ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),) +LOCAL_CFLAGS += -D_EGL_PIPE_R600=1 +endif + +LOCAL_MODULE := libmesa_egl_gallium + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile index 69e7eecdf0c..1583ab181ea 100644 --- a/src/gallium/targets/egl-static/Makefile +++ b/src/gallium/targets/egl-static/Makefile @@ -117,11 +117,12 @@ endif # r300 ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) +ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),) egl_CPPFLAGS += -D_EGL_PIPE_R300=1 egl_LIBS += \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a -egl_SYS += -ldrm_radeon +endif endif # r600 @@ -129,8 +130,8 @@ ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),) egl_CPPFLAGS += -D_EGL_PIPE_R600=1 egl_LIBS += \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r600/libr600.a -egl_SYS += -ldrm_radeon endif # vmwgfx @@ -141,10 +142,18 @@ egl_LIBS += \ $(TOP)/src/gallium/drivers/svga/libsvga.a endif -# swrast +# softpipe +ifneq ($(findstring softpipe,$(GALLIUM_DRIVERS_DIRS)),) egl_CPPFLAGS += -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE egl_LIBS += $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a egl_SYS += -lm +endif + +# llvmpipe +ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),) +egl_CPPFLAGS += -DGALLIUM_LLVMPIPE +egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +endif # sort to remove duplicates egl_CPPFLAGS := $(sort $(egl_CPPFLAGS)) @@ -158,8 +167,6 @@ st_GL_SYS := -lm -lpthread $(DLOPEN_LIBS) # LLVM ifeq ($(MESA_LLVM),1) -egl_CPPFLAGS += -DGALLIUM_LLVMPIPE -egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a egl_SYS += $(LLVM_LIBS) LDFLAGS += $(LLVM_LDFLAGS) diff --git a/src/gallium/targets/egl-static/egl.c b/src/gallium/targets/egl-static/egl.c index 568f5498dd4..a7aee27238b 100644 --- a/src/gallium/targets/egl-static/egl.c +++ b/src/gallium/targets/egl-static/egl.c @@ -109,6 +109,70 @@ out: return (*chip_id >= 0); } +#elif defined(PIPE_OS_ANDROID) + +#include <xf86drm.h> +/* for i915 */ +#include <i915_drm.h> +/* for radeon */ +#include <radeon_drm.h> +/* for util_strcmp */ +#include "util/u_string.h" + +static boolean +drm_fd_get_pci_id(int fd, int *vendor_id, int *chip_id) +{ + drmVersionPtr version; + + *chip_id = -1; + + version = drmGetVersion(fd); + if (!version) { + _eglLog(_EGL_WARNING, "invalid drm fd"); + return FALSE; + } + if (!version->name) { + _eglLog(_EGL_WARNING, "unable to determine the driver name"); + drmFreeVersion(version); + return FALSE; + } + + if (util_strcmp(version->name, "i915") == 0) { + struct drm_i915_getparam gp; + int ret; + + *vendor_id = 0x8086; + + memset(&gp, 0, sizeof(gp)); + gp.param = I915_PARAM_CHIPSET_ID; + gp.value = chip_id; + ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); + if (ret) { + _eglLog(_EGL_WARNING, "failed to get param for i915"); + *chip_id = -1; + } + } + else if (util_strcmp(version->name, "radeon") == 0) { + struct drm_radeon_info info; + int ret; + + *vendor_id = 0x1002; + + memset(&info, 0, sizeof(info)); + info.request = RADEON_INFO_DEVICE_ID; + info.value = (unsigned long) chip_id; + ret = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (ret) { + _eglLog(_EGL_WARNING, "failed to get info for radeon"); + *chip_id = -1; + } + } + + drmFreeVersion(version); + + return (*chip_id >= 0); +} + #else static boolean @@ -157,13 +221,21 @@ drm_fd_get_screen_name(int fd) static struct pipe_screen * create_drm_screen(const char *name, int fd) { + struct pipe_screen *screen; + if (!name) { name = drm_fd_get_screen_name(fd); if (!name) return NULL; } - return egl_pipe_create_drm_screen(name, fd); + screen = egl_pipe_create_drm_screen(name, fd); + if (screen) + _eglLog(_EGL_INFO, "created a pipe screen for %s", name); + else + _eglLog(_EGL_WARNING, "failed to create a pipe screen for %s", name); + + return screen; } static struct pipe_screen * diff --git a/src/gallium/targets/egl-static/egl_pipe.c b/src/gallium/targets/egl-static/egl_pipe.c index 658c532b404..f2b50bd0eab 100644 --- a/src/gallium/targets/egl-static/egl_pipe.c +++ b/src/gallium/targets/egl-static/egl_pipe.c @@ -42,7 +42,6 @@ #include "radeon/drm/radeon_drm_public.h" #include "r300/r300_public.h" /* for r600 */ -#include "r600/drm/r600_drm_public.h" #include "r600/r600_public.h" /* for vmwgfx */ #include "svga/drm/svga_drm_public.h" @@ -141,10 +140,10 @@ static struct pipe_screen * pipe_r600_create_screen(int fd) { #if _EGL_PIPE_R600 - struct radeon *rw; + struct radeon_winsys *rw; struct pipe_screen *screen; - rw = r600_drm_winsys_create(fd); + rw = radeon_drm_winsys_create(fd); if (!rw) return NULL; diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile index 53104253d4f..033a1acaaf9 100644 --- a/src/gallium/targets/gbm/Makefile +++ b/src/gallium/targets/gbm/Makefile @@ -15,7 +15,7 @@ GBM_INCLUDES = \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/include \ -GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) \ +GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) -lm \ $(TOP)/src/gallium/state_trackers/gbm/libgbm.a \ $(TOP)/src/gallium/drivers/identity/libidentity.a \ $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ @@ -79,26 +79,30 @@ nouveau_SYS = -ldrm_nouveau r300_LIBS = \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a -r300_SYS = -ldrm_radeon # r600 pipe driver r600_LIBS = \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r600/libr600.a -r600_SYS = -ldrm_radeon # vmwgfx pipe driver vmwgfx_LIBS = \ $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ $(TOP)/src/gallium/drivers/svga/libsvga.a + + # LLVM ifeq ($(MESA_LLVM),1) -pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a pipe_SYS += $(LLVM_LIBS) pipe_LDFLAGS += $(LLVM_LDFLAGS) endif +ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),) +pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +endif + # determine the targets/sources pipe_TARGETS = pipe_SOURCES = @@ -119,9 +123,11 @@ pipe_SOURCES += pipe_nouveau.c endif ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) +ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),) pipe_TARGETS += $(PIPE_PREFIX)r300.so pipe_SOURCES += pipe_r300.c endif +endif ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),) pipe_TARGETS += $(PIPE_PREFIX)r600.so @@ -148,7 +154,7 @@ $(GBM_EXTRA_TARGETS): $(TOP)/$(LIB_DIR)/gbm/%: % @$(INSTALL) -d $(dir $@) $(INSTALL) $< $(dir $@) -$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o +$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o $(pipe_LIBS) $($*_LIBS) $(MKLIB) -o $@ -noprefix -linker '$(CC)' \ -ldflags '-L$(TOP)/$(LIB_DIR) $(pipe_LDFLAGS) $(LDFLAGS)' \ $(MKLIB_OPTIONS) $< \ diff --git a/src/gallium/targets/gbm/pipe_r600.c b/src/gallium/targets/gbm/pipe_r600.c index 486a6592585..9f61a51404a 100644 --- a/src/gallium/targets/gbm/pipe_r600.c +++ b/src/gallium/targets/gbm/pipe_r600.c @@ -1,16 +1,15 @@ - #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen * create_screen(int fd) { - struct radeon *rw; + struct radeon_winsys *rw; struct pipe_screen *screen; - rw = r600_drm_winsys_create(fd); + rw = radeon_drm_winsys_create(fd); if (!rw) return NULL; diff --git a/src/gallium/targets/va-r600/Makefile b/src/gallium/targets/va-r600/Makefile index 28797ad528d..d09a3aa8ad2 100644 --- a/src/gallium/targets/va-r600/Makefile +++ b/src/gallium/targets/va-r600/Makefile @@ -10,6 +10,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/auxiliary/libgallium.a diff --git a/src/gallium/targets/va-r600/target.c b/src/gallium/targets/va-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/va-r600/target.c +++ b/src/gallium/targets/va-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/targets/vdpau-r600/Makefile b/src/gallium/targets/vdpau-r600/Makefile index 0fd817b8e82..c2d95af295a 100644 --- a/src/gallium/targets/vdpau-r600/Makefile +++ b/src/gallium/targets/vdpau-r600/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/auxiliary/libgallium.a diff --git a/src/gallium/targets/vdpau-r600/target.c b/src/gallium/targets/vdpau-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/vdpau-r600/target.c +++ b/src/gallium/targets/vdpau-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/targets/xorg-r600/Makefile b/src/gallium/targets/xorg-r600/Makefile new file mode 100644 index 00000000000..4577ba605a5 --- /dev/null +++ b/src/gallium/targets/xorg-r600/Makefile @@ -0,0 +1,25 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = r600g_drv.so + +C_SOURCES = \ + target.c \ + xorg.c + +DRIVER_DEFINES = \ + -DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD + +DRIVER_PIPES = \ + $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ + $(TOP)/src/gallium/drivers/r600/libr600.a \ + $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a + +DRIVER_LINKS = \ + $(shell pkg-config --libs libdrm) + +include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-r600/target.c b/src/gallium/targets/xorg-r600/target.c new file mode 100644 index 00000000000..60424359a7b --- /dev/null +++ b/src/gallium/targets/xorg-r600/target.c @@ -0,0 +1,26 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "radeon/drm/radeon_drm_public.h" +#include "r600/r600_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct radeon_winsys *sws; + struct pipe_screen *screen; + + sws = radeon_drm_winsys_create(fd); + if (!sws) + return NULL; + + screen = r600_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen) diff --git a/src/gallium/targets/xorg-r600/xorg.c b/src/gallium/targets/xorg-r600/xorg.c new file mode 100644 index 00000000000..120cf6da6fd --- /dev/null +++ b/src/gallium/targets/xorg-r600/xorg.c @@ -0,0 +1,148 @@ +/* + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + * Author: Alan Hourihane <[email protected]> + * Author: Jakob Bornecrantz <[email protected]> + * Author: Corbin Simpson <[email protected]> + * + */ + +#include "../../state_trackers/xorg/xorg_winsys.h" + +static void r600_xorg_identify(int flags); +static Bool r600_xorg_pci_probe(DriverPtr driver, + int entity_num, + struct pci_device *device, + intptr_t match_data); + +static const struct pci_id_match r600_xorg_device_match[] = { + {0x1002, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0}, + {0, 0, 0}, +}; + +static SymTabRec r600_xorg_chipsets[] = { + {PCI_MATCH_ANY, "AMD R6xx Graphics Chipset"}, + {-1, NULL} +}; + +static PciChipsets r600_xorg_pci_devices[] = { + {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL}, + {-1, -1, NULL} +}; + +static XF86ModuleVersionInfo r600_xorg_version = { + "r600g", + MODULEVENDORSTRING, + MODINFOSTRING1, + MODINFOSTRING2, + XORG_VERSION_CURRENT, + 0, 1, 0, /* major, minor, patch */ + ABI_CLASS_VIDEODRV, + ABI_VIDEODRV_VERSION, + MOD_CLASS_VIDEODRV, + {0, 0, 0, 0} +}; + +/* + * Xorg driver exported structures + */ + +_X_EXPORT DriverRec r600_driver = { + 1, + "r600g", + r600_xorg_identify, + NULL, + xorg_tracker_available_options, + NULL, + 0, + NULL, + r600_xorg_device_match, + r600_xorg_pci_probe +}; + +static MODULESETUPPROTO(r600_xorg_setup); + +_X_EXPORT XF86ModuleData r600gModuleData = { + &r600_xorg_version, + r600_xorg_setup, + NULL +}; + +/* + * Xorg driver functions + */ + +static pointer +r600_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) +{ + static Bool setupDone = 0; + + /* This module should be loaded only once, but check to be sure. + */ + if (!setupDone) { + setupDone = 1; + xf86AddDriver(&r600_driver, module, HaveDriverFuncs); + + /* + * The return value must be non-NULL on success even though there + * is no TearDownProc. + */ + return (pointer) 1; + } else { + if (errmaj) + *errmaj = LDR_ONCEONLY; + return NULL; + } +} + +static void +r600_xorg_identify(int flags) +{ + xf86PrintChipsets("r600", "Driver for R6xx Gallium with KMS", + r600_xorg_chipsets); +} + +static Bool +r600_xorg_pci_probe(DriverPtr driver, + int entity_num, struct pci_device *device, intptr_t match_data) +{ + ScrnInfoPtr scrn = NULL; + EntityInfoPtr entity; + + scrn = xf86ConfigPciEntity(scrn, 0, entity_num, r600_xorg_pci_devices, + NULL, NULL, NULL, NULL, NULL); + if (scrn != NULL) { + scrn->driverVersion = 1; + scrn->driverName = "r600"; + scrn->name = "R600G"; + scrn->Probe = NULL; + + entity = xf86GetEntityInfo(entity_num); + + /* Use all the functions from the xorg tracker */ + xorg_tracker_set_functions(scrn); + } + return scrn != NULL; +} diff --git a/src/gallium/targets/xvmc-r600/Makefile b/src/gallium/targets/xvmc-r600/Makefile index 0bb72f1eff9..a10a42d18ed 100644 --- a/src/gallium/targets/xvmc-r600/Makefile +++ b/src/gallium/targets/xvmc-r600/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/auxiliary/libgallium.a diff --git a/src/gallium/targets/xvmc-r600/target.c b/src/gallium/targets/xvmc-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/xvmc-r600/target.c +++ b/src/gallium/targets/xvmc-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/winsys/r600/drm/Android.mk b/src/gallium/winsys/r600/drm/Android.mk new file mode 100644 index 00000000000..eb79caa19ca --- /dev/null +++ b/src/gallium/winsys/r600/drm/Android.mk @@ -0,0 +1,43 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_CFLAGS := -std=c99 + +LOCAL_C_INCLUDES := \ + $(GALLIUM_TOP)/drivers/r600 \ + $(DRM_TOP) \ + $(DRM_TOP)/include/drm + +LOCAL_MODULE := libmesa_winsys_r600 + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile index fb7b09b3a0d..c23286c8197 100644 --- a/src/gallium/winsys/r600/drm/Makefile +++ b/src/gallium/winsys/r600/drm/Makefile @@ -4,15 +4,8 @@ include $(TOP)/configs/current LIBNAME = r600winsys -C_SOURCES = \ - bof.c \ - evergreen_hw_context.c \ - radeon_bo.c \ - radeon_pciid.c \ - r600_bo.c \ - r600_drm.c \ - r600_hw_context.c \ - r600_bomgr.c +# get C_SOURCES +include Makefile.sources LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \ -I$(TOP)/include \ diff --git a/src/gallium/winsys/r600/drm/Makefile.sources b/src/gallium/winsys/r600/drm/Makefile.sources new file mode 100644 index 00000000000..97980170e6b --- /dev/null +++ b/src/gallium/winsys/r600/drm/Makefile.sources @@ -0,0 +1,7 @@ +C_SOURCES := \ + evergreen_hw_context.c \ + radeon_pciid.c \ + r600_bo.c \ + r600_drm.c \ + r600_hw_context.c + diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript index f55bb265226..2d0d80e8bb9 100644 --- a/src/gallium/winsys/r600/drm/SConscript +++ b/src/gallium/winsys/r600/drm/SConscript @@ -2,16 +2,7 @@ Import('*') env = env.Clone() -r600_sources = [ - 'bof.c', - 'evergreen_hw_context.c', - 'radeon_bo.c', - 'radeon_pciid.c', - 'r600_bo.c', - 'r600_drm.c', - 'r600_hw_context.c', - 'r600_bomgr.c', -] +r600_sources = env.ParseSourceList('Makefile.sources', 'C_SOURCES') env.PkgUseModules('DRM_RADEON') diff --git a/src/gallium/winsys/r600/drm/bof.c b/src/gallium/winsys/r600/drm/bof.c deleted file mode 100644 index 5c923ad38d6..00000000000 --- a/src/gallium/winsys/r600/drm/bof.c +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include "bof.h" - -/* - * helpers - */ -static int bof_entry_grow(bof_t *bof) -{ - bof_t **array; - - if (bof->array_size < bof->nentry) - return 0; - array = realloc(bof->array, (bof->nentry + 16) * sizeof(void*)); - if (array == NULL) - return -ENOMEM; - bof->array = array; - bof->nentry += 16; - return 0; -} - -/* - * object - */ -bof_t *bof_object(void) -{ - bof_t *object; - - object = calloc(1, sizeof(bof_t)); - if (object == NULL) - return NULL; - object->refcount = 1; - object->type = BOF_TYPE_OBJECT; - object->size = 12; - return object; -} - -bof_t *bof_object_get(bof_t *object, const char *keyname) -{ - unsigned i; - - for (i = 0; i < object->array_size; i += 2) { - if (!strcmp(object->array[i]->value, keyname)) { - return object->array[i + 1]; - } - } - return NULL; -} - -int bof_object_set(bof_t *object, const char *keyname, bof_t *value) -{ - bof_t *key; - int r; - - if (object->type != BOF_TYPE_OBJECT) - return -EINVAL; - r = bof_entry_grow(object); - if (r) - return r; - key = bof_string(keyname); - if (key == NULL) - return -ENOMEM; - object->array[object->array_size++] = key; - object->array[object->array_size++] = value; - object->size += value->size; - object->size += key->size; - bof_incref(value); - return 0; -} - -/* - * array - */ -bof_t *bof_array(void) -{ - bof_t *array = bof_object(); - - if (array == NULL) - return NULL; - array->type = BOF_TYPE_ARRAY; - array->size = 12; - return array; -} - -int bof_array_append(bof_t *array, bof_t *value) -{ - int r; - if (array->type != BOF_TYPE_ARRAY) - return -EINVAL; - r = bof_entry_grow(array); - if (r) - return r; - array->array[array->array_size++] = value; - array->size += value->size; - bof_incref(value); - return 0; -} - -bof_t *bof_array_get(bof_t *bof, unsigned i) -{ - if (!bof_is_array(bof) || i >= bof->array_size) - return NULL; - return bof->array[i]; -} - -unsigned bof_array_size(bof_t *bof) -{ - if (!bof_is_array(bof)) - return 0; - return bof->array_size; -} - -/* - * blob - */ -bof_t *bof_blob(unsigned size, void *value) -{ - bof_t *blob = bof_object(); - - if (blob == NULL) - return NULL; - blob->type = BOF_TYPE_BLOB; - blob->value = calloc(1, size); - if (blob->value == NULL) { - bof_decref(blob); - return NULL; - } - blob->size = size; - memcpy(blob->value, value, size); - blob->size += 12; - return blob; -} - -unsigned bof_blob_size(bof_t *bof) -{ - if (!bof_is_blob(bof)) - return 0; - return bof->size - 12; -} - -void *bof_blob_value(bof_t *bof) -{ - if (!bof_is_blob(bof)) - return NULL; - return bof->value; -} - -/* - * string - */ -bof_t *bof_string(const char *value) -{ - bof_t *string = bof_object(); - - if (string == NULL) - return NULL; - string->type = BOF_TYPE_STRING; - string->size = strlen(value) + 1; - string->value = calloc(1, string->size); - if (string->value == NULL) { - bof_decref(string); - return NULL; - } - strcpy(string->value, value); - string->size += 12; - return string; -} - -/* - * int32 - */ -bof_t *bof_int32(int32_t value) -{ - bof_t *int32 = bof_object(); - - if (int32 == NULL) - return NULL; - int32->type = BOF_TYPE_INT32; - int32->size = 4; - int32->value = calloc(1, int32->size); - if (int32->value == NULL) { - bof_decref(int32); - return NULL; - } - memcpy(int32->value, &value, 4); - int32->size += 12; - return int32; -} - -int32_t bof_int32_value(bof_t *bof) -{ - return *((uint32_t*)bof->value); -} - -/* - * common - */ -static void bof_indent(int level) -{ - int i; - - for (i = 0; i < level; i++) - fprintf(stderr, " "); -} - -static void bof_print_bof(bof_t *bof, int level, int entry) -{ - bof_indent(level); - if (bof == NULL) { - fprintf(stderr, "--NULL-- for entry %d\n", entry); - return; - } - switch (bof->type) { - case BOF_TYPE_STRING: - fprintf(stderr, "%p string [%s %d]\n", bof, (char*)bof->value, bof->size); - break; - case BOF_TYPE_INT32: - fprintf(stderr, "%p int32 [%d %d]\n", bof, *(int*)bof->value, bof->size); - break; - case BOF_TYPE_BLOB: - fprintf(stderr, "%p blob [%d]\n", bof, bof->size); - break; - case BOF_TYPE_NULL: - fprintf(stderr, "%p null [%d]\n", bof, bof->size); - break; - case BOF_TYPE_OBJECT: - fprintf(stderr, "%p object [%d %d]\n", bof, bof->array_size / 2, bof->size); - break; - case BOF_TYPE_ARRAY: - fprintf(stderr, "%p array [%d %d]\n", bof, bof->array_size, bof->size); - break; - default: - fprintf(stderr, "%p unknown [%d]\n", bof, bof->type); - return; - } -} - -static void bof_print_rec(bof_t *bof, int level, int entry) -{ - unsigned i; - - bof_print_bof(bof, level, entry); - for (i = 0; i < bof->array_size; i++) { - bof_print_rec(bof->array[i], level + 2, i); - } -} - -void bof_print(bof_t *bof) -{ - bof_print_rec(bof, 0, 0); -} - -static int bof_read(bof_t *root, FILE *file, long end, int level) -{ - bof_t *bof = NULL; - int r; - - if (ftell(file) >= end) { - return 0; - } - r = bof_entry_grow(root); - if (r) - return r; - bof = bof_object(); - if (bof == NULL) - return -ENOMEM; - bof->offset = ftell(file); - r = fread(&bof->type, 4, 1, file); - if (r != 1) - goto out_err; - r = fread(&bof->size, 4, 1, file); - if (r != 1) - goto out_err; - r = fread(&bof->array_size, 4, 1, file); - if (r != 1) - goto out_err; - switch (bof->type) { - case BOF_TYPE_STRING: - case BOF_TYPE_INT32: - case BOF_TYPE_BLOB: - bof->value = calloc(1, bof->size - 12); - if (bof->value == NULL) { - goto out_err; - } - r = fread(bof->value, bof->size - 12, 1, file); - if (r != 1) { - fprintf(stderr, "error reading %d\n", bof->size - 12); - goto out_err; - } - break; - case BOF_TYPE_NULL: - return 0; - case BOF_TYPE_OBJECT: - case BOF_TYPE_ARRAY: - r = bof_read(bof, file, bof->offset + bof->size, level + 2); - if (r) - goto out_err; - break; - default: - fprintf(stderr, "invalid type %d\n", bof->type); - goto out_err; - } - root->array[root->centry++] = bof; - return bof_read(root, file, end, level); -out_err: - bof_decref(bof); - return -EINVAL; -} - -bof_t *bof_load_file(const char *filename) -{ - bof_t *root = bof_object(); - int r; - - if (root == NULL) { - fprintf(stderr, "%s failed to create root object\n", __func__); - return NULL; - } - root->file = fopen(filename, "r"); - if (root->file == NULL) - goto out_err; - r = fseek(root->file, 0L, SEEK_SET); - if (r) { - fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename); - goto out_err; - } - root->offset = ftell(root->file); - r = fread(&root->type, 4, 1, root->file); - if (r != 1) - goto out_err; - r = fread(&root->size, 4, 1, root->file); - if (r != 1) - goto out_err; - r = fread(&root->array_size, 4, 1, root->file); - if (r != 1) - goto out_err; - r = bof_read(root, root->file, root->offset + root->size, 2); - if (r) - goto out_err; - return root; -out_err: - bof_decref(root); - return NULL; -} - -void bof_incref(bof_t *bof) -{ - bof->refcount++; -} - -void bof_decref(bof_t *bof) -{ - unsigned i; - - if (bof == NULL) - return; - if (--bof->refcount > 0) - return; - for (i = 0; i < bof->array_size; i++) { - bof_decref(bof->array[i]); - bof->array[i] = NULL; - } - bof->array_size = 0; - if (bof->file) { - fclose(bof->file); - bof->file = NULL; - } - free(bof->array); - free(bof->value); - free(bof); -} - -static int bof_file_write(bof_t *bof, FILE *file) -{ - unsigned i; - int r; - - r = fwrite(&bof->type, 4, 1, file); - if (r != 1) - return -EINVAL; - r = fwrite(&bof->size, 4, 1, file); - if (r != 1) - return -EINVAL; - r = fwrite(&bof->array_size, 4, 1, file); - if (r != 1) - return -EINVAL; - switch (bof->type) { - case BOF_TYPE_NULL: - if (bof->size) - return -EINVAL; - break; - case BOF_TYPE_STRING: - case BOF_TYPE_INT32: - case BOF_TYPE_BLOB: - r = fwrite(bof->value, bof->size - 12, 1, file); - if (r != 1) - return -EINVAL; - break; - case BOF_TYPE_OBJECT: - case BOF_TYPE_ARRAY: - for (i = 0; i < bof->array_size; i++) { - r = bof_file_write(bof->array[i], file); - if (r) - return r; - } - break; - default: - return -EINVAL; - } - return 0; -} - -int bof_dump_file(bof_t *bof, const char *filename) -{ - unsigned i; - int r = 0; - - if (bof->file) { - fclose(bof->file); - bof->file = NULL; - } - bof->file = fopen(filename, "w"); - if (bof->file == NULL) { - fprintf(stderr, "%s failed to open file %s\n", __func__, filename); - r = -EINVAL; - goto out_err; - } - r = fseek(bof->file, 0L, SEEK_SET); - if (r) { - fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename); - goto out_err; - } - r = fwrite(&bof->type, 4, 1, bof->file); - if (r != 1) - goto out_err; - r = fwrite(&bof->size, 4, 1, bof->file); - if (r != 1) - goto out_err; - r = fwrite(&bof->array_size, 4, 1, bof->file); - if (r != 1) - goto out_err; - for (i = 0; i < bof->array_size; i++) { - r = bof_file_write(bof->array[i], bof->file); - if (r) - return r; - } -out_err: - fclose(bof->file); - bof->file = NULL; - return r; -} diff --git a/src/gallium/winsys/r600/drm/bof.h b/src/gallium/winsys/r600/drm/bof.h deleted file mode 100644 index 014affb74f1..00000000000 --- a/src/gallium/winsys/r600/drm/bof.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#ifndef BOF_H -#define BOF_H - -#include <stdio.h> -#include <stdint.h> - -#define BOF_TYPE_STRING 0 -#define BOF_TYPE_NULL 1 -#define BOF_TYPE_BLOB 2 -#define BOF_TYPE_OBJECT 3 -#define BOF_TYPE_ARRAY 4 -#define BOF_TYPE_INT32 5 - -struct bof; - -typedef struct bof { - struct bof **array; - unsigned centry; - unsigned nentry; - unsigned refcount; - FILE *file; - uint32_t type; - uint32_t size; - uint32_t array_size; - void *value; - long offset; -} bof_t; - -extern int bof_file_flush(bof_t *root); -extern bof_t *bof_file_new(const char *filename); -extern int bof_object_dump(bof_t *object, const char *filename); - -/* object */ -extern bof_t *bof_object(void); -extern bof_t *bof_object_get(bof_t *object, const char *keyname); -extern int bof_object_set(bof_t *object, const char *keyname, bof_t *value); -/* array */ -extern bof_t *bof_array(void); -extern int bof_array_append(bof_t *array, bof_t *value); -extern bof_t *bof_array_get(bof_t *bof, unsigned i); -extern unsigned bof_array_size(bof_t *bof); -/* blob */ -extern bof_t *bof_blob(unsigned size, void *value); -extern unsigned bof_blob_size(bof_t *bof); -extern void *bof_blob_value(bof_t *bof); -/* string */ -extern bof_t *bof_string(const char *value); -/* int32 */ -extern bof_t *bof_int32(int32_t value); -extern int32_t bof_int32_value(bof_t *bof); -/* common functions */ -extern void bof_decref(bof_t *bof); -extern void bof_incref(bof_t *bof); -extern bof_t *bof_load_file(const char *filename); -extern int bof_dump_file(bof_t *bof, const char *filename); -extern void bof_print(bof_t *bof); - -static inline int bof_is_object(bof_t *bof){return (bof->type == BOF_TYPE_OBJECT);} -static inline int bof_is_blob(bof_t *bof){return (bof->type == BOF_TYPE_BLOB);} -static inline int bof_is_null(bof_t *bof){return (bof->type == BOF_TYPE_NULL);} -static inline int bof_is_int32(bof_t *bof){return (bof->type == BOF_TYPE_INT32);} -static inline int bof_is_array(bof_t *bof){return (bof->type == BOF_TYPE_ARRAY);} -static inline int bof_is_string(bof_t *bof){return (bof->type == BOF_TYPE_STRING);} - -#endif diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 60d2e289396..3417eb39192 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -23,20 +23,11 @@ * Authors: * Jerome Glisse */ -#include <errno.h> -#include <stdint.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> -#include "xf86drm.h" #include "r600.h" +#include "r600_priv.h" #include "evergreend.h" -#include "radeon_drm.h" -#include "bof.h" -#include "pipe/p_compiler.h" -#include "util/u_inlines.h" #include "util/u_memory.h" -#include "r600_priv.h" +#include <errno.h> #define GROUP_FORCE_NEW_BLOCK 0 @@ -168,6 +159,7 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, {R_028408_VGT_INDX_OFFSET, 0, 0, 0}, {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, + {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, {R_028414_CB_BLEND_RED, 0, 0, 0}, @@ -532,6 +524,7 @@ static const struct r600_reg cayman_context_reg_list[] = { {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, {R_028408_VGT_INDX_OFFSET, 0, 0, 0}, {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, + {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, {R_028414_CB_BLEND_RED, 0, 0, 0}, @@ -909,6 +902,7 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) memset(ctx, 0, sizeof(struct r600_context)); ctx->radeon = radeon; + LIST_INITHEAD(&ctx->query_list); /* init dirty list */ @@ -992,33 +986,23 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; + ctx->cs = radeon->ws->cs_create(radeon->ws); + /* allocate cs variables */ - ctx->nreloc = RADEON_CTX_MAX_PM4; - ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); - if (ctx->reloc == NULL) { - r = -ENOMEM; - goto out_err; - } - ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *)); if (ctx->bo == NULL) { r = -ENOMEM; goto out_err; } - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; - ctx->pm4 = calloc(ctx->pm4_ndwords, 4); - if (ctx->pm4 == NULL) { - r = -ENOMEM; - goto out_err; - } + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS; + ctx->pm4 = ctx->cs->buf; r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; - ctx->max_db = 8; - LIST_INITHEAD(&ctx->fenced_bo); - + r600_get_backend_mask(ctx); return 0; out_err: r600_context_fini(ctx); @@ -1154,10 +1138,6 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr if (draw->indices) { ndwords = 11; - /* make sure there is enough relocation space before scheduling draw */ - if (ctx->creloc >= (ctx->nreloc - 1)) { - r600_context_flush(ctx); - } } /* queries need some special values */ @@ -1174,11 +1154,11 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr /* update the max dword count to make sure we have enough space * reserved for flushing the destination caches */ - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16; + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16; if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { @@ -1203,13 +1183,12 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr pm4[3] = draw->vgt_num_instances; if (draw->indices) { pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[5] = draw->indices_bo_offset; pm4[6] = 0; pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - pm4[10] = 0; - r600_context_bo_reloc(ctx, &pm4[10], draw->indices); + pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ); } else { pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); pm4[5] = draw->vgt_num_indices; @@ -1270,4 +1249,3 @@ void evergreen_context_flush_dest_caches(struct r600_context *ctx) ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; } - diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 0f5b063cf5a..4beedad233e 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -23,176 +23,109 @@ * Authors: * Dave Airlie */ -#include <pipe/p_compiler.h> -#include <pipe/p_screen.h> -#include <pipebuffer/pb_bufmgr.h> -#include "state_tracker/drm_driver.h" #include "r600_priv.h" #include "r600d.h" -#include "drm.h" -#include "radeon_drm.h" +#include "state_tracker/drm_driver.h" struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned binding, unsigned usage) { struct r600_bo *bo; - struct radeon_bo *rbo; + struct pb_buffer *pb; uint32_t initial_domain, domains; /* Staging resources particpate in transfers and blits only * and are used for uploads and downloads from regular * resources. We generate them internally for some transfers. */ - if (usage == PIPE_USAGE_STAGING) - domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT; - else - domains = (RADEON_GEM_DOMAIN_CPU | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_VRAM); - - if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) { - bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, *radeon->cfence); - if (bo) { - bo->domains = domains; - return bo; + if (usage == PIPE_USAGE_STAGING) { + domains = RADEON_DOMAIN_GTT; + initial_domain = RADEON_DOMAIN_GTT; + } else { + domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM; + + switch(usage) { + case PIPE_USAGE_DYNAMIC: + case PIPE_USAGE_STREAM: + case PIPE_USAGE_STAGING: + initial_domain = RADEON_DOMAIN_GTT; + break; + case PIPE_USAGE_DEFAULT: + case PIPE_USAGE_STATIC: + case PIPE_USAGE_IMMUTABLE: + default: + initial_domain = RADEON_DOMAIN_VRAM; + break; } } - switch(usage) { - case PIPE_USAGE_DYNAMIC: - case PIPE_USAGE_STREAM: - case PIPE_USAGE_STAGING: - initial_domain = RADEON_GEM_DOMAIN_GTT; - break; - case PIPE_USAGE_DEFAULT: - case PIPE_USAGE_STATIC: - case PIPE_USAGE_IMMUTABLE: - default: - initial_domain = RADEON_GEM_DOMAIN_VRAM; - break; - } - rbo = radeon_bo(radeon, 0, size, alignment, initial_domain); - if (rbo == NULL) { + pb = radeon->ws->buffer_create(radeon->ws, size, alignment, binding, initial_domain); + if (!pb) { return NULL; } bo = calloc(1, sizeof(struct r600_bo)); - bo->size = size; - bo->alignment = alignment; bo->domains = domains; - bo->bo = rbo; - if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) { - r600_bomgr_bo_init(radeon->bomgr, bo); - } + bo->buf = pb; + bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb); pipe_reference_init(&bo->reference, 1); return bo; } -struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode) +struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle, + unsigned *stride, unsigned *array_mode) { + struct pb_buffer *pb; struct r600_bo *bo = calloc(1, sizeof(struct r600_bo)); - struct radeon_bo *rbo; - rbo = bo->bo = radeon_bo(radeon, handle, 0, 0, 0); - if (rbo == NULL) { + pb = bo->buf = radeon->ws->buffer_from_handle(radeon->ws, whandle, stride, NULL); + if (!pb) { free(bo); return NULL; } - bo->size = rbo->size; - bo->domains = (RADEON_GEM_DOMAIN_CPU | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_VRAM); pipe_reference_init(&bo->reference, 1); + bo->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM; + bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb); + + if (stride) + *stride = whandle->stride; - radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch); if (array_mode) { - if (bo->tiling_flags) { - if (bo->tiling_flags & RADEON_TILING_MACRO) - *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; - else if (bo->tiling_flags & RADEON_TILING_MICRO) - *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; - } else { + enum radeon_bo_layout micro, macro; + + radeon->ws->buffer_get_tiling(bo->buf, µ, ¯o); + + if (macro == RADEON_LAYOUT_TILED) + *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; + else if (micro == RADEON_LAYOUT_TILED) + *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; + else *array_mode = 0; - } } return bo; } -void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx) +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage) { - struct pipe_context *pctx = ctx; - - if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { - radeon_bo_map(radeon, bo->bo); - return (uint8_t *) bo->bo->data + bo->offset; - } - - if (p_atomic_read(&bo->bo->reference.count) > 1) { - if (usage & PIPE_TRANSFER_DONTBLOCK) { - return NULL; - } - if (ctx) { - pctx->flush(pctx, NULL); - } - } - - if (usage & PIPE_TRANSFER_DONTBLOCK) { - uint32_t domain; - - if (radeon_bo_busy(radeon, bo->bo, &domain)) - return NULL; - if (radeon_bo_map(radeon, bo->bo)) { - return NULL; - } - goto out; - } - - radeon_bo_map(radeon, bo->bo); - if (radeon_bo_wait(radeon, bo->bo)) { - radeon_bo_unmap(radeon, bo->bo); - return NULL; - } - -out: - return (uint8_t *) bo->bo->data + bo->offset; + return radeon->ws->buffer_map(bo->buf, cs, usage); } void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo) { - radeon_bo_unmap(radeon, bo->bo); + radeon->ws->buffer_unmap(bo->buf); } -void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) +void r600_bo_destroy(struct r600_bo *bo) { - if (bo->manager_id) { - if (!r600_bomgr_bo_destroy(radeon->bomgr, bo)) { - /* destroy is delayed by buffer manager */ - return; - } - } - radeon_bo_reference(radeon, &bo->bo, NULL); + pb_reference(&bo->buf, NULL); free(bo); } boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo, - unsigned stride, struct winsys_handle *whandle) + unsigned stride, struct winsys_handle *whandle) { - whandle->stride = stride; - switch(whandle->type) { - case DRM_API_HANDLE_TYPE_KMS: - whandle->handle = bo->bo->handle; - break; - case DRM_API_HANDLE_TYPE_SHARED: - if (radeon_bo_get_name(radeon, bo->bo, &whandle->handle)) - return FALSE; - break; - default: - return FALSE; - } - - return TRUE; + return radeon->ws->buffer_get_handle(bo->buf, stride, whandle); } diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c deleted file mode 100644 index 4918d5eb0b1..00000000000 --- a/src/gallium/winsys/r600/drm/r600_bomgr.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2010 VMWare. - * Copyright 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jose Fonseca <jrfonseca-at-vmware-dot-com> - * Thomas Hellström <thomas-at-vmware-dot-com> - * Jerome Glisse <[email protected]> - */ -#include <util/u_memory.h> -#include <util/u_double_list.h> -#include <util/u_time.h> -#include <pipebuffer/pb_bufmgr.h> -#include "r600_priv.h" - -static void r600_bomgr_timeout_flush(struct r600_bomgr *mgr) -{ - struct r600_bo *bo, *tmp; - int64_t now; - - now = os_time_get(); - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { - if(!os_time_timeout(bo->start, bo->end, now)) - break; - - mgr->num_delayed--; - bo->manager_id = 0; - LIST_DEL(&bo->list); - r600_bo_destroy(mgr->radeon, bo); - } -} - -static INLINE int r600_bo_is_compat(struct r600_bomgr *mgr, - struct r600_bo *bo, - unsigned size, - unsigned alignment, - unsigned cfence) -{ - if(bo->size < size) { - return 0; - } - - /* be lenient with size */ - if(bo->size >= 2*size) { - return 0; - } - - if(!pb_check_alignment(alignment, bo->alignment)) { - return 0; - } - - if (!fence_is_after(cfence, bo->fence)) { - return 0; - } - - return 1; -} - -struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, - unsigned size, - unsigned alignment, - unsigned cfence) -{ - struct r600_bo *bo, *tmp; - int64_t now; - - - pipe_mutex_lock(mgr->mutex); - - now = os_time_get(); - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { - if(r600_bo_is_compat(mgr, bo, size, alignment, cfence)) { - LIST_DEL(&bo->list); - --mgr->num_delayed; - r600_bomgr_timeout_flush(mgr); - pipe_mutex_unlock(mgr->mutex); - LIST_INITHEAD(&bo->list); - pipe_reference_init(&bo->reference, 1); - return bo; - } - - if(os_time_timeout(bo->start, bo->end, now)) { - mgr->num_delayed--; - bo->manager_id = 0; - LIST_DEL(&bo->list); - r600_bo_destroy(mgr->radeon, bo); - } - } - - pipe_mutex_unlock(mgr->mutex); - return NULL; -} - -void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo) -{ - LIST_INITHEAD(&bo->list); - bo->manager_id = 1; -} - -boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo) -{ - bo->start = os_time_get(); - bo->end = bo->start + mgr->usecs; - pipe_mutex_lock(mgr->mutex); - LIST_ADDTAIL(&bo->list, &mgr->delayed); - ++mgr->num_delayed; - pipe_mutex_unlock(mgr->mutex); - return FALSE; -} - -void r600_bomgr_destroy(struct r600_bomgr *mgr) -{ - struct r600_bo *bo, *tmp; - - pipe_mutex_lock(mgr->mutex); - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { - mgr->num_delayed--; - bo->manager_id = 0; - LIST_DEL(&bo->list); - r600_bo_destroy(mgr->radeon, bo); - } - pipe_mutex_unlock(mgr->mutex); - - FREE(mgr); -} - -struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs) -{ - struct r600_bomgr *mgr; - - mgr = CALLOC_STRUCT(r600_bomgr); - if (mgr == NULL) - return NULL; - - mgr->radeon = radeon; - mgr->usecs = usecs; - LIST_INITHEAD(&mgr->delayed); - mgr->num_delayed = 0; - pipe_mutex_init(mgr->mutex); - - return mgr; -} diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index ab0afea5bf5..7d5583fd287 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -25,29 +25,18 @@ * Corbin Simpson <[email protected]> * Joakim Sindholt <[email protected]> */ -#include <stdio.h> -#include <errno.h> -#include <sys/ioctl.h> -#include "util/u_inlines.h" -#include "util/u_debug.h" -#include "util/u_hash_table.h" -#include <pipebuffer/pb_bufmgr.h> -#include "r600.h" + #include "r600_priv.h" #include "r600_drm_public.h" -#include "xf86drm.h" -#include "radeon_drm.h" - -#ifndef RADEON_INFO_TILING_CONFIG -#define RADEON_INFO_TILING_CONFIG 0x6 -#endif +#include "util/u_memory.h" +#include <errno.h> -#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ -#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9 +#ifndef RADEON_INFO_NUM_TILE_PIPES +#define RADEON_INFO_NUM_TILE_PIPES 0xb #endif -#ifndef RADEON_INFO_NUM_BACKENDS -#define RADEON_INFO_NUM_BACKENDS 0xa +#ifndef RADEON_INFO_BACKEND_MAP +#define RADEON_INFO_BACKEND_MAP 0xd #endif enum radeon_family r600_get_family(struct radeon *r600) @@ -67,31 +56,27 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) unsigned r600_get_clock_crystal_freq(struct radeon *radeon) { - return radeon->clock_crystal_freq; + return radeon->info.r600_clock_crystal_freq; } unsigned r600_get_num_backends(struct radeon *radeon) { - return radeon->num_backends; + return radeon->info.r600_num_backends; } -unsigned r600_get_minor_version(struct radeon *radeon) +unsigned r600_get_num_tile_pipes(struct radeon *radeon) { - return radeon->minor_version; + return radeon->info.r600_num_tile_pipes; } - -static int radeon_get_device(struct radeon *radeon) +unsigned r600_get_backend_map(struct radeon *radeon) { - struct drm_radeon_info info = {}; - int r; + return radeon->info.r600_backend_map; +} - radeon->device = 0; - info.request = RADEON_INFO_DEVICE_ID; - info.value = (uintptr_t)&radeon->device; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - return r; +unsigned r600_get_minor_version(struct radeon *radeon) +{ + return radeon->info.drm_minor; } static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) @@ -186,124 +171,32 @@ static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) static int radeon_drm_get_tiling(struct radeon *radeon) { - struct drm_radeon_info info = {}; - int r; - uint32_t tiling_config = 0; + uint32_t tiling_config = radeon->info.r600_tiling_config; - info.request = RADEON_INFO_TILING_CONFIG; - info.value = (uintptr_t)&tiling_config; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - - if (r) + if (!tiling_config) return 0; if (radeon->chip_class == R600 || radeon->chip_class == R700) { - r = r600_interpret_tiling(radeon, tiling_config); + return r600_interpret_tiling(radeon, tiling_config); } else { - r = eg_interpret_tiling(radeon, tiling_config); - } - return r; -} - -static int radeon_get_clock_crystal_freq(struct radeon *radeon) -{ - struct drm_radeon_info info = {}; - uint32_t clock_crystal_freq = 0; - int r; - - info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ; - info.value = (uintptr_t)&clock_crystal_freq; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - if (r) - return r; - - radeon->clock_crystal_freq = clock_crystal_freq; - return 0; -} - - -static int radeon_get_num_backends(struct radeon *radeon) -{ - struct drm_radeon_info info = {}; - uint32_t num_backends = 0; - int r; - - info.request = RADEON_INFO_NUM_BACKENDS; - info.value = (uintptr_t)&num_backends; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - if (r) - return r; - - radeon->num_backends = num_backends; - return 0; -} - - -static int radeon_init_fence(struct radeon *radeon) -{ - radeon->fence = 1; - radeon->fence_bo = r600_bo(radeon, 4096, 0, 0, 0); - if (radeon->fence_bo == NULL) { - return -ENOMEM; + return eg_interpret_tiling(radeon, tiling_config); } - radeon->cfence = r600_bo_map(radeon, radeon->fence_bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL); - *radeon->cfence = 0; - return 0; -} - -#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) - -static unsigned handle_hash(void *key) -{ - return PTR_TO_UINT(key); -} - -static int handle_compare(void *key1, void *key2) -{ - return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); } -static struct radeon *radeon_new(int fd, unsigned device) +struct radeon *radeon_create(struct radeon_winsys *ws) { - struct radeon *radeon; - int r; - drmVersionPtr version; - - radeon = calloc(1, sizeof(*radeon)); + struct radeon *radeon = CALLOC_STRUCT(radeon); if (radeon == NULL) { return NULL; } - radeon->fd = fd; - radeon->device = device; - radeon->refcount = 1; - - version = drmGetVersion(radeon->fd); - if (version->version_major != 2) { - fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " - "only compatible with 2.x.x\n", __FUNCTION__, - version->version_major, version->version_minor, - version->version_patchlevel); - drmFreeVersion(version); - exit(1); - } - - radeon->minor_version = version->version_minor; - drmFreeVersion(version); + radeon->ws = ws; + ws->query_info(ws, &radeon->info); - r = radeon_get_device(radeon); - if (r) { - fprintf(stderr, "Failed to get device id\n"); - return radeon_decref(radeon); - } - - radeon->family = radeon_family_from_device(radeon->device); + radeon->family = radeon_family_from_device(radeon->info.pci_id); if (radeon->family == CHIP_UNKNOWN) { - fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device); - return radeon_decref(radeon); + fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->info.pci_id); + return radeon_destroy(radeon); } /* setup class */ switch (radeon->family) { @@ -349,56 +242,21 @@ static struct radeon *radeon_new(int fd, unsigned device) break; default: fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n", - __func__, radeon->device); + __func__, radeon->info.pci_id); break; } if (radeon_drm_get_tiling(radeon)) return NULL; - /* get the GPU counter frequency, failure is non fatal */ - radeon_get_clock_crystal_freq(radeon); - - if (radeon->minor_version >= 9) - radeon_get_num_backends(radeon); - - radeon->bomgr = r600_bomgr_create(radeon, 1000000); - if (radeon->bomgr == NULL) { - return NULL; - } - r = radeon_init_fence(radeon); - if (r) { - radeon_decref(radeon); - return NULL; - } - - radeon->bo_handles = util_hash_table_create(handle_hash, handle_compare); - pipe_mutex_init(radeon->bo_handles_mutex); return radeon; } -struct radeon *r600_drm_winsys_create(int drmfd) -{ - return radeon_new(drmfd, 0); -} - -struct radeon *radeon_decref(struct radeon *radeon) +struct radeon *radeon_destroy(struct radeon *radeon) { if (radeon == NULL) return NULL; - if (--radeon->refcount > 0) { - return NULL; - } - - util_hash_table_destroy(radeon->bo_handles); - pipe_mutex_destroy(radeon->bo_handles_mutex); - if (radeon->fence_bo) { - r600_bo_reference(radeon, &radeon->fence_bo, NULL); - } - - if (radeon->bomgr) - r600_bomgr_destroy(radeon->bomgr); - free(radeon); + FREE(radeon); return NULL; } diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h index cfce8df9c2c..b8a37c7574f 100644 --- a/src/gallium/winsys/r600/drm/r600_drm_public.h +++ b/src/gallium/winsys/r600/drm/r600_drm_public.h @@ -26,8 +26,8 @@ #ifndef R600_DRM_PUBLIC_H #define R600_DRM_PUBLIC_H -struct radeon; +struct radeon_winsys; -struct radeon *r600_drm_winsys_create(int drmFD); +struct radeon *radeon_create(struct radeon_winsys *ws); #endif diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 07bd544d1a0..6c5b4b8953a 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -23,23 +23,97 @@ * Authors: * Jerome Glisse */ -#include <errno.h> -#include <stdint.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> -#include <pipe/p_compiler.h> -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include <pipebuffer/pb_bufmgr.h> -#include "xf86drm.h" -#include "radeon_drm.h" #include "r600_priv.h" -#include "bof.h" #include "r600d.h" +#include "util/u_memory.h" +#include <errno.h> #define GROUP_FORCE_NEW_BLOCK 0 +/* Get backends mask */ +void r600_get_backend_mask(struct r600_context *ctx) +{ + struct r600_bo * buffer; + u32 * results; + unsigned num_backends = r600_get_num_backends(ctx->radeon); + unsigned i, mask = 0; + + /* if backend_map query is supported by the kernel */ + if (ctx->radeon->info.r600_backend_map_valid) { + unsigned num_tile_pipes = r600_get_num_tile_pipes(ctx->radeon); + unsigned backend_map = r600_get_backend_map(ctx->radeon); + unsigned item_width, item_mask; + + if (ctx->radeon->chip_class >= EVERGREEN) { + item_width = 4; + item_mask = 0x7; + } else { + item_width = 2; + item_mask = 0x3; + } + + while(num_tile_pipes--) { + i = backend_map & item_mask; + mask |= (1<<i); + backend_map >>= item_width; + } + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + } + + /* otherwise backup path for older kernels */ + + /* create buffer for event data */ + buffer = r600_bo(ctx->radeon, ctx->max_db*16, 1, 0, + PIPE_USAGE_STAGING); + if (!buffer) + goto err; + + /* initialize buffer with zeroes */ + results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_WRITE); + if (results) { + memset(results, 0, ctx->max_db * 4 * 4); + r600_bo_unmap(ctx->radeon, buffer); + + /* emit EVENT_WRITE for ZPASS_DONE */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = 0; + + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE); + + /* execute */ + r600_context_flush(ctx, 0); + + /* analyze results */ + results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_READ); + if (results) { + for(i = 0; i < ctx->max_db; i++) { + /* at least highest bit will be set if backend is used */ + if (results[i*4 + 1]) + mask |= (1<<i); + } + r600_bo_unmap(ctx->radeon, buffer); + } + } + + r600_bo_reference(&buffer, NULL); + + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + +err: + /* fallback to old method - set num_backends lower bits to 1 */ + ctx->backend_mask = (~((u32)0))>>(32-num_backends); + return; +} + static inline void r600_context_ps_partial_flush(struct r600_context *ctx) { if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) @@ -66,32 +140,6 @@ void r600_init_cs(struct r600_context *ctx) ctx->init_dwords = ctx->pm4_cdwords; } -static void INLINE r600_context_update_fenced_list(struct r600_context *ctx) -{ - for (int i = 0; i < ctx->creloc; i++) { - if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist)) - LIST_DELINIT(&ctx->bo[i]->fencedlist); - LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo); - ctx->bo[i]->fence = ctx->radeon->fence; - ctx->bo[i]->ctx = ctx; - } -} - -static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence) -{ - struct radeon_bo *bo = NULL; - struct radeon_bo *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { - if (bo->fence <= *ctx->radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - } else { - bo->fence = fence; - } - } -} - static void r600_init_block(struct r600_context *ctx, struct r600_block *block, const struct r600_reg *reg, int index, int nreg, @@ -680,17 +728,6 @@ static int r600_loop_const_init(struct r600_context *ctx, u32 offset) return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, R600_LOOP_CONST_OFFSET); } -static void r600_context_clear_fenced_bo(struct r600_context *ctx) -{ - struct radeon_bo *bo, *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - bo->ctx = NULL; - } -} - static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks) { struct r600_block *block; @@ -699,7 +736,7 @@ static void r600_free_resource_range(struct r600_context *ctx, struct r600_range block = range->blocks[i]; if (block) { for (int k = 1; k <= block->nbo; k++) - r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + r600_bo_reference(&block->reloc[k].bo, NULL); free(block); } } @@ -724,7 +761,7 @@ void r600_context_fini(struct r600_context *ctx) range->blocks[CTX_BLOCK_ID(offset)] = NULL; } for (int k = 1; k <= block->nbo; k++) { - r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + r600_bo_reference(&block->reloc[k].bo, NULL); } free(block); } @@ -736,11 +773,9 @@ void r600_context_fini(struct r600_context *ctx) r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); free(ctx->range); free(ctx->blocks); - free(ctx->reloc); free(ctx->bo); - free(ctx->pm4); + ctx->radeon->ws->cs_destroy(ctx->cs); - r600_context_clear_fenced_bo(ctx); memset(ctx, 0, sizeof(struct r600_context)); } @@ -797,6 +832,7 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) memset(ctx, 0, sizeof(struct r600_context)); ctx->radeon = radeon; + LIST_INITHEAD(&ctx->query_list); /* init dirty list */ @@ -872,33 +908,23 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; + ctx->cs = radeon->ws->cs_create(radeon->ws); + /* allocate cs variables */ - ctx->nreloc = RADEON_CTX_MAX_PM4; - ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); - if (ctx->reloc == NULL) { - r = -ENOMEM; - goto out_err; - } - ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *)); if (ctx->bo == NULL) { r = -ENOMEM; goto out_err; } - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; - ctx->pm4 = calloc(ctx->pm4_ndwords, 4); - if (ctx->pm4 == NULL) { - r = -ENOMEM; - goto out_err; - } + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS; + ctx->pm4 = ctx->cs->buf; r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; - - LIST_INITHEAD(&ctx->fenced_bo); - ctx->max_db = 4; + r600_get_backend_mask(ctx); return 0; out_err: r600_context_fini(ctx); @@ -912,7 +938,7 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); @@ -923,11 +949,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) } void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, - unsigned flush_mask, struct r600_bo *rbo) + unsigned flush_mask, struct r600_bo *bo) { - struct radeon_bo *bo; - - bo = rbo->bo; /* if bo has already been flushed */ if (!(~bo->last_flush & flush_flags)) { bo->last_flush &= flush_mask; @@ -959,29 +982,15 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = flush_flags; - ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8; + ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->base.size + 255) >> 8; ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id; + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); } bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; } -void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo) -{ - struct radeon_bo *bo = rbo->bo; - bo->reloc = &ctx->reloc[ctx->creloc]; - bo->reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4; - ctx->reloc[ctx->creloc].handle = bo->handle; - ctx->reloc[ctx->creloc].read_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - ctx->reloc[ctx->creloc].flags = 0; - radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); - rbo->fence = ctx->radeon->fence; - ctx->creloc++; -} - void r600_context_reg(struct r600_context *ctx, unsigned offset, unsigned value, unsigned mask) @@ -1057,8 +1066,8 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat if (block->pm4_bo_index[id]) { /* find relocation */ reloc_id = block->pm4_bo_index[id]; - r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, reg->bo); - reg->bo->fence = ctx->radeon->fence; + r600_bo_reference(&block->reloc[reloc_id].bo, reg->bo); + block->reloc[reloc_id].bo_usage = reg->bo_usage; /* always force dirty for relocs for now */ dirty |= R600_BLOCK_STATUS_DIRTY; } @@ -1094,10 +1103,10 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY); if (block->reloc[1].bo) - block->reloc[1].bo->bo->binding &= ~BO_BOUND_TEXTURE; + block->reloc[1].bo->binding &= ~BO_BOUND_TEXTURE; - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); + r600_bo_reference(&block->reloc[1].bo, NULL); + r600_bo_reference(&block->reloc[2].bo, NULL); LIST_DELINIT(&block->list); LIST_DELINIT(&block->enable_list); return; @@ -1117,39 +1126,32 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ if (!dirty) { if (is_vertex) { - if (block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) + if (block->reloc[1].bo->buf != state->bo[0]->buf) dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } else { - if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) || - (block->reloc[2].bo->bo->handle != state->bo[1]->bo->handle)) + if ((block->reloc[1].bo->buf != state->bo[0]->buf) || + (block->reloc[2].bo->buf != state->bo[1]->buf)) dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } } - if (!dirty) { - if (is_vertex) - state->bo[0]->fence = ctx->radeon->fence; - else { - state->bo[0]->fence = ctx->radeon->fence; - state->bo[1]->fence = ctx->radeon->fence; - } - } else { + + if (dirty) { if (is_vertex) { /* VERTEX RESOURCE, we preted there is 2 bo to relocate so * we have single case btw VERTEX & TEXTURE resource */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); - state->bo[0]->fence = ctx->radeon->fence; + r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + block->reloc[1].bo_usage = state->bo_usage[0]; + r600_bo_reference(&block->reloc[2].bo, NULL); } else { /* TEXTURE RESOURCE */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]); - state->bo[0]->fence = ctx->radeon->fence; - state->bo[1]->fence = ctx->radeon->fence; - state->bo[0]->bo->binding |= BO_BOUND_TEXTURE; + r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + block->reloc[1].bo_usage = state->bo_usage[0]; + r600_bo_reference(&block->reloc[2].bo, state->bo[1]); + block->reloc[2].bo_usage = state->bo_usage[1]; + state->bo[0]->binding |= BO_BOUND_TEXTURE; } - } - if (dirty) { + if (is_vertex) block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX; else @@ -1281,7 +1283,6 @@ struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block) { - int id; int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); int cp_dwords = block->pm4_ndwords, start_dword = 0; int new_dwords = 0; @@ -1297,14 +1298,13 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block * for (int j = 0; j < block->nreg; j++) { if (block->pm4_bo_index[j]) { /* find relocation */ - id = block->pm4_bo_index[j]; - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[id].bo_pm4_index], - block->reloc[id].bo); + struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; + block->pm4[reloc->bo_pm4_index] = + r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); r600_context_bo_flush(ctx, - block->reloc[id].flush_flags, - block->reloc[id].flush_mask, - block->reloc[id].bo); + reloc->flush_flags, + reloc->flush_mask, + reloc->bo); nbo--; if (nbo == 0) break; @@ -1338,7 +1338,6 @@ out: void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block) { - int id; int cp_dwords = block->pm4_ndwords; int nbo = block->nbo; @@ -1352,14 +1351,13 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60 for (int j = 0; j < nbo; j++) { if (block->pm4_bo_index[j]) { /* find relocation */ - id = block->pm4_bo_index[j]; - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[id].bo_pm4_index], - block->reloc[id].bo); + struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; + block->pm4[reloc->bo_pm4_index] = + r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); r600_context_bo_flush(ctx, - block->reloc[id].flush_flags, - block->reloc[id].flush_mask, - block->reloc[id].bo); + reloc->flush_flags, + reloc->flush_mask, + reloc->bo); } } ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; @@ -1418,10 +1416,6 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) if (draw->indices) { ndwords = 11; - /* make sure there is enough relocation space before scheduling draw */ - if (ctx->creloc >= (ctx->nreloc - 1)) { - r600_context_flush(ctx); - } } /* queries need some special values */ @@ -1440,11 +1434,11 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) /* update the max dword count to make sure we have enough space * reserved for flushing the destination caches */ - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16; + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16; if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { @@ -1469,13 +1463,12 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) pm4[3] = draw->vgt_num_instances; if (draw->indices) { pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[5] = draw->indices_bo_offset; pm4[6] = 0; pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - pm4[10] = 0; - r600_context_bo_reloc(ctx, &pm4[10], draw->indices); + pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ); } else { pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); pm4[5] = draw->vgt_num_indices; @@ -1489,13 +1482,8 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) ctx->pm4_dirty_cdwords = 0; } -void r600_context_flush(struct r600_context *ctx) +void r600_context_flush(struct r600_context *ctx, unsigned flags) { - struct drm_radeon_cs drmib = {}; - struct drm_radeon_cs_chunk chunks[2]; - uint64_t chunk_array[2]; - unsigned fence; - int r; struct r600_block *enable_block = NULL; if (ctx->pm4_cdwords == ctx->init_dwords) @@ -1512,54 +1500,19 @@ void r600_context_flush(struct r600_context *ctx) /* partial flush is needed to avoid lockups on some chips with user fences */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - /* emit fence */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); - ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence; - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo); - -#if 1 - /* emit cs */ - drmib.num_chunks = 2; - drmib.chunks = (uint64_t)(uintptr_t)chunk_array; - chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - chunks[0].length_dw = ctx->pm4_cdwords; - chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; - chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4; - chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; - chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; - chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; - r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, - sizeof(struct drm_radeon_cs)); - if (r) { - fprintf(stderr, "radeon: The kernel rejected CS, " - "see dmesg for more information.\n"); - } -#else - *ctx->radeon->cfence = ctx->radeon->fence; -#endif - - r600_context_update_fenced_list(ctx); - - fence = ctx->radeon->fence + 1; - if (fence < ctx->radeon->fence) { - /* wrap around */ - fence = 1; - r600_context_fence_wraparound(ctx, fence); - } - ctx->radeon->fence = fence; + + /* Flush the CS. */ + ctx->cs->cdw = ctx->pm4_cdwords; + ctx->radeon->ws->cs_flush(ctx->cs, flags); + + /* We need to get the pointer to the other CS, + * the command streams are double-buffered. */ + ctx->pm4 = ctx->cs->buf; /* restart */ for (int i = 0; i < ctx->creloc; i++) { - ctx->bo[i]->reloc = NULL; ctx->bo[i]->last_flush = 0; - radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL); + r600_bo_reference(&ctx->bo[i], NULL); } ctx->creloc = 0; ctx->pm4_dirty_cdwords = 0; @@ -1596,10 +1549,9 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, { unsigned ndwords = 10; - if (((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) || - (ctx->creloc >= (ctx->nreloc - 1))) { + if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); @@ -1611,90 +1563,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */ ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], fence_bo); -} - -void r600_context_dump_bof(struct r600_context *ctx, const char *file) -{ - bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root; - unsigned i; - - root = device_id = bcs = blob = array = bo = size = handle = NULL; - root = bof_object(); - if (root == NULL) - goto out_err; - device_id = bof_int32(ctx->radeon->device); - if (device_id == NULL) - goto out_err; - if (bof_object_set(root, "device_id", device_id)) - goto out_err; - bof_decref(device_id); - device_id = NULL; - /* dump relocs */ - blob = bof_blob(ctx->creloc * 16, ctx->reloc); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "reloc", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump cs */ - blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "pm4", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump bo */ - array = bof_array(); - if (array == NULL) - goto out_err; - for (i = 0; i < ctx->creloc; i++) { - struct radeon_bo *rbo = ctx->bo[i]; - bo = bof_object(); - if (bo == NULL) - goto out_err; - size = bof_int32(rbo->size); - if (size == NULL) - goto out_err; - if (bof_object_set(bo, "size", size)) - goto out_err; - bof_decref(size); - size = NULL; - handle = bof_int32(rbo->handle); - if (handle == NULL) - goto out_err; - if (bof_object_set(bo, "handle", handle)) - goto out_err; - bof_decref(handle); - handle = NULL; - radeon_bo_map(ctx->radeon, rbo); - blob = bof_blob(rbo->size, rbo->data); - radeon_bo_unmap(ctx->radeon, rbo); - if (blob == NULL) - goto out_err; - if (bof_object_set(bo, "data", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - if (bof_array_append(array, bo)) - goto out_err; - bof_decref(bo); - bo = NULL; - } - if (bof_object_set(root, "bo", array)) - goto out_err; - bof_dump_file(root, file); -out_err: - bof_decref(blob); - bof_decref(array); - bof_decref(bo); - bof_decref(size); - bof_decref(handle); - bof_decref(device_id); - bof_decref(root); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE); } static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) @@ -1704,9 +1573,9 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu u32 *results, *current_result; if (wait) - results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_READ, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_READ); else - results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ); if (!results) return FALSE; @@ -1735,7 +1604,6 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { unsigned required_space, new_results_end; - int num_backends = r600_get_num_backends(ctx->radeon); /* query request needs 6/8 dwords for begin + 6/8 dwords for end */ if (query->type == PIPE_QUERY_TIME_ELAPSED) @@ -1745,7 +1613,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { @@ -1756,7 +1624,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) query->queries_emitted = 1; } else { if (++query->queries_emitted > query->buffer_size / query->result_size / 2) - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } } @@ -1767,7 +1635,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) /* collect current results if query buffer is full */ if (new_results_end == query->results_start) { if (!(query->state & R600_QUERY_STATE_FLUSHED)) - r600_context_flush(ctx); + r600_context_flush(ctx, 0); r600_query_result(ctx, query, TRUE); } @@ -1775,15 +1643,17 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) u32 *results; int i; - results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_WRITE, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_WRITE); if (results) { results = (u32*)((char*)results + query->results_end); memset(results, 0, query->result_size); /* Set top bits for unused backends */ - for (i = num_backends; i < ctx->max_db; i++) { - results[(i * 4)+1] = 0x80000000; - results[(i * 4)+3] = 0x80000000; + for (i = 0; i < ctx->max_db; i++) { + if (!(ctx->backend_mask & (1<<i))) { + results[(i * 4)+1] = 0x80000000; + results[(i * 4)+3] = 0x80000000; + } } r600_bo_unmap(ctx->radeon, query->buffer); } @@ -1793,19 +1663,18 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) if (query->type == PIPE_QUERY_TIME_ELAPSED) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end; ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end; ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); query->state |= R600_QUERY_STATE_STARTED; query->state ^= R600_QUERY_STATE_ENDED; @@ -1818,19 +1687,18 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) if (query->type == PIPE_QUERY_TIME_ELAPSED) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); query->results_end += query->result_size; if (query->results_end >= query->buffer_size) @@ -1848,7 +1716,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, { if (operation == PREDICATION_OP_CLEAR) { if (ctx->pm4_cdwords + 3 > ctx->pm4_ndwords) - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; @@ -1864,7 +1732,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, count /= query->result_size; if (ctx->pm4_cdwords + 5 * count > ctx->pm4_ndwords) - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); @@ -1872,11 +1740,11 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, /* emit predicate packets for all data blocks */ while (results_base != query->results_end) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - ctx->pm4[ctx->pm4_cdwords++] = results_base + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = results_base; ctx->pm4[ctx->pm4_cdwords++] = op; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, + RADEON_USAGE_READ); results_base += query->result_size; if (results_base >= query->buffer_size) results_base = 0; @@ -1926,7 +1794,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) { - r600_bo_reference(ctx->radeon, &query->buffer, NULL); + r600_bo_reference(&query->buffer, NULL); LIST_DELINIT(&query->list); free(query); } @@ -1938,7 +1806,7 @@ boolean r600_context_query_result(struct r600_context *ctx, uint64_t *result = (uint64_t*)vresult; if (!(query->state & R600_QUERY_STATE_FLUSHED)) { - r600_context_flush(ctx); + r600_context_flush(ctx, 0); } if (!r600_query_result(ctx, query, wait)) return FALSE; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 69f7251c043..1e901897efd 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -26,40 +26,20 @@ #ifndef R600_PRIV_H #define R600_PRIV_H -#include <errno.h> -#include <stdint.h> -#include <stdlib.h> -#include <assert.h> -#include <util/u_double_list.h> -#include <util/u_inlines.h> -#include "util/u_hash_table.h" -#include <os/os_thread.h> #include "r600.h" +#include "../../radeon/drm/radeon_winsys.h" +#include "util/u_hash_table.h" +#include "os/os_thread.h" #define PKT_COUNT_C 0xC000FFFF #define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) -struct r600_bomgr; -struct r600_bo; - struct radeon { - int fd; - int refcount; - unsigned device; + struct radeon_winsys *ws; + struct radeon_info info; unsigned family; enum chip_class chip_class; struct r600_tiling_info tiling_info; - struct r600_bomgr *bomgr; - unsigned fence; - unsigned *cfence; - struct r600_bo *fence_bo; - unsigned clock_crystal_freq; - unsigned num_backends; - unsigned minor_version; - - /* List of buffer handles and its mutex. */ - struct util_hash_table *bo_handles; - pipe_mutex bo_handles_mutex; }; /* these flags are used in register flags and added into block flags */ @@ -79,85 +59,25 @@ struct r600_reg { }; #define BO_BOUND_TEXTURE 1 -struct radeon_bo { - struct pipe_reference reference; - unsigned handle; - unsigned size; - unsigned alignment; - int map_count; - void *data; - struct list_head fencedlist; - unsigned fence; - struct r600_context *ctx; - boolean shared; - struct r600_reloc *reloc; - unsigned reloc_id; - unsigned last_flush; - unsigned name; - unsigned binding; -}; struct r600_bo { struct pipe_reference reference; /* this must be the first member for the r600_bo_reference inline to work */ /* DO NOT MOVE THIS ^ */ - unsigned size; - unsigned tiling_flags; - unsigned kernel_pitch; + struct pb_buffer *buf; + struct radeon_winsys_cs_handle *cs_buf; unsigned domains; - struct radeon_bo *bo; - unsigned fence; - /* manager data */ - struct list_head list; - unsigned manager_id; - unsigned alignment; - unsigned offset; - int64_t start; - int64_t end; -}; - -struct r600_bomgr { - struct radeon *radeon; - unsigned usecs; - pipe_mutex mutex; - struct list_head delayed; - unsigned num_delayed; + unsigned last_flush; + unsigned binding; }; /* - * r600_drm.c - */ -struct radeon *r600_new(int fd, unsigned device); -void r600_delete(struct radeon *r600); - -/* * radeon_pciid.c */ unsigned radeon_family_from_device(unsigned device); /* - * radeon_bo.c - */ -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, unsigned initial_domain); -void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst, - struct radeon_bo *src); -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); -int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain); -int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo); -int radeon_bo_get_tiling_flags(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *tiling_flags, - uint32_t *pitch); -int radeon_bo_get_name(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *name); -int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo); - -/* * r600_hw_context.c */ -int r600_context_init_fence(struct r600_context *ctx); -void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo); void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_bo *rbo); struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset); @@ -175,70 +95,23 @@ void r600_context_reg(struct r600_context *ctx, void r600_init_cs(struct r600_context *ctx); int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base); -static INLINE void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo) +static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo, + enum radeon_bo_usage usage) { - struct radeon_bo *bo = rbo->bo; + enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? rbo->domains : 0; + enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? rbo->domains : 0; - assert(bo != NULL); + assert(usage); - if (!bo->reloc) - r600_context_get_reloc(ctx, rbo); + unsigned reloc_index = + ctx->radeon->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, + rd, wd); - /* set PKT3 to point to proper reloc */ - *pm4 = bo->reloc_id; -} + if (reloc_index >= ctx->creloc) + ctx->creloc = reloc_index+1; -/* - * r600_bo.c - */ -void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); - -/* - * r600_bomgr.c - */ -struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs); -void r600_bomgr_destroy(struct r600_bomgr *mgr); -boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo); -void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo); -struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, - unsigned size, - unsigned alignment, - unsigned cfence); - - -/* - * helpers - */ - - -/* - * radeon_bo.c - */ -static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo) -{ - if (bo->map_count == 0 && !bo->data) - return radeon_bo_fixed_map(radeon, bo); - bo->map_count++; - return 0; -} - -static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo) -{ - bo->map_count--; - assert(bo->map_count >= 0); -} - -/* - * fence - */ -static inline boolean fence_is_after(unsigned fence, unsigned ofence) -{ - /* handle wrap around */ - if (fence < 0x80000000 && ofence > 0x80000000) - return TRUE; - if (fence > ofence) - return TRUE; - return FALSE; + r600_bo_reference(&ctx->bo[reloc_index], rbo); + return reloc_index * 4; } #endif diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h deleted file mode 100644 index 4a19dcf8ddf..00000000000 --- a/src/gallium/winsys/r600/drm/r600d.h +++ /dev/null @@ -1,2241 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#ifndef R600D_H -#define R600D_H - -/* evergreen values */ -#define EG_RESOURCE_OFFSET 0x00030000 -#define EG_RESOURCE_END 0x00034000 -#define EG_LOOP_CONST_OFFSET 0x0003A200 -#define EG_LOOP_CONST_END 0x0003A26C -#define EG_BOOL_CONST_OFFSET 0x0003A500 -#define EG_BOOL_CONST_END 0x0003A506 - - -#define R600_CONFIG_REG_OFFSET 0X00008000 -#define R600_CONFIG_REG_END 0X0000AC00 -#define R600_CONTEXT_REG_OFFSET 0X00028000 -#define R600_CONTEXT_REG_END 0X00029000 -#define R600_ALU_CONST_OFFSET 0X00030000 -#define R600_ALU_CONST_END 0X00032000 -#define R600_RESOURCE_OFFSET 0X00038000 -#define R600_RESOURCE_END 0X0003C000 -#define R600_SAMPLER_OFFSET 0X0003C000 -#define R600_SAMPLER_END 0X0003CFF0 -#define R600_CTL_CONST_OFFSET 0X0003CFF0 -#define R600_CTL_CONST_END 0X0003E200 -#define R600_LOOP_CONST_OFFSET 0X0003E200 -#define R600_LOOP_CONST_END 0X0003E380 -#define R600_BOOL_CONST_OFFSET 0X0003E380 -#define R600_BOOL_CONST_END 0X00040000 - -#define PKT3_NOP 0x10 -#define PKT3_INDIRECT_BUFFER_END 0x17 -#define PKT3_SET_PREDICATION 0x20 -#define PKT3_REG_RMW 0x21 -#define PKT3_COND_EXEC 0x22 -#define PKT3_PRED_EXEC 0x23 -#define PKT3_START_3D_CMDBUF 0x24 -#define PKT3_DRAW_INDEX_2 0x27 -#define PKT3_CONTEXT_CONTROL 0x28 -#define PKT3_DRAW_INDEX_IMMD_BE 0x29 -#define PKT3_INDEX_TYPE 0x2A -#define PKT3_DRAW_INDEX 0x2B -#define PKT3_DRAW_INDEX_AUTO 0x2D -#define PKT3_DRAW_INDEX_IMMD 0x2E -#define PKT3_NUM_INSTANCES 0x2F -#define PKT3_STRMOUT_BUFFER_UPDATE 0x34 -#define PKT3_INDIRECT_BUFFER_MP 0x38 -#define PKT3_MEM_SEMAPHORE 0x39 -#define PKT3_MPEG_INDEX 0x3A -#define PKT3_WAIT_REG_MEM 0x3C -#define PKT3_MEM_WRITE 0x3D -#define PKT3_INDIRECT_BUFFER 0x32 -#define PKT3_CP_INTERRUPT 0x40 -#define PKT3_SURFACE_SYNC 0x43 -#define PKT3_ME_INITIALIZE 0x44 -#define PKT3_COND_WRITE 0x45 -#define PKT3_EVENT_WRITE 0x46 -#define PKT3_EVENT_WRITE_EOP 0x47 -#define PKT3_ONE_REG_WRITE 0x57 -#define PKT3_SET_CONFIG_REG 0x68 -#define PKT3_SET_CONTEXT_REG 0x69 -#define PKT3_SET_ALU_CONST 0x6A -#define PKT3_SET_BOOL_CONST 0x6B -#define PKT3_SET_LOOP_CONST 0x6C -#define PKT3_SET_RESOURCE 0x6D -#define PKT3_SET_SAMPLER 0x6E -#define PKT3_SET_CTL_CONST 0x6F -#define PKT3_SURFACE_BASE_UPDATE 0x73 -#define SURFACE_BASE_UPDATE_DEPTH (1 << 0) -#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x)) -#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) - -#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 -#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 -#define EVENT_TYPE_ZPASS_DONE 0x15 -#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 -#define EVENT_TYPE(x) ((x) << 0) -#define EVENT_INDEX(x) ((x) << 8) - /* 0 - any non-TS event - * 1 - ZPASS_DONE - * 2 - SAMPLE_PIPELINESTAT - * 3 - SAMPLE_STREAMOUTSTAT* - * 4 - *S_PARTIAL_FLUSH - * 5 - TS events - */ - -#define PREDICATION_OP_CLEAR 0x0 -#define PREDICATION_OP_ZPASS 0x1 -#define PREDICATION_OP_PRIMCOUNT 0x2 - -#define PRED_OP(x) ((x) << 16) - -#define PREDICATION_CONTINUE (1 << 31) - -#define PREDICATION_HINT_WAIT (0 << 12) -#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) - -#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) -#define PREDICATION_DRAW_VISIBLE (1 << 8) - -#define PKT_TYPE_S(x) (((x) & 0x3) << 30) -#define PKT_TYPE_G(x) (((x) >> 30) & 0x3) -#define PKT_TYPE_C 0x3FFFFFFF -#define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) -#define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) -#define PKT_COUNT_C 0xC000FFFF -#define PKT0_BASE_INDEX_S(x) (((x) & 0xFFFF) << 0) -#define PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) -#define PKT0_BASE_INDEX_C 0xFFFF0000 -#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) -#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) -#define PKT3_IT_OPCODE_C 0xFFFF00FF -#define PKT3_PRED_S(x) (((x) >> 0) & 0x1) -#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate)) - -/* Registers */ -#define R_0280A0_CB_COLOR0_INFO 0x0280A0 -#define S_0280A0_ENDIAN(x) (((x) & 0x3) << 0) -#define G_0280A0_ENDIAN(x) (((x) >> 0) & 0x3) -#define C_0280A0_ENDIAN 0xFFFFFFFC -#define S_0280A0_FORMAT(x) (((x) & 0x3F) << 2) -#define G_0280A0_FORMAT(x) (((x) >> 2) & 0x3F) -#define C_0280A0_FORMAT 0xFFFFFF03 -#define V_0280A0_COLOR_INVALID 0x00000000 -#define V_0280A0_COLOR_8 0x00000001 -#define V_0280A0_COLOR_4_4 0x00000002 -#define V_0280A0_COLOR_3_3_2 0x00000003 -#define V_0280A0_COLOR_16 0x00000005 -#define V_0280A0_COLOR_16_FLOAT 0x00000006 -#define V_0280A0_COLOR_8_8 0x00000007 -#define V_0280A0_COLOR_5_6_5 0x00000008 -#define V_0280A0_COLOR_6_5_5 0x00000009 -#define V_0280A0_COLOR_1_5_5_5 0x0000000A -#define V_0280A0_COLOR_4_4_4_4 0x0000000B -#define V_0280A0_COLOR_5_5_5_1 0x0000000C -#define V_0280A0_COLOR_32 0x0000000D -#define V_0280A0_COLOR_32_FLOAT 0x0000000E -#define V_0280A0_COLOR_16_16 0x0000000F -#define V_0280A0_COLOR_16_16_FLOAT 0x00000010 -#define V_0280A0_COLOR_8_24 0x00000011 -#define V_0280A0_COLOR_8_24_FLOAT 0x00000012 -#define V_0280A0_COLOR_24_8 0x00000013 -#define V_0280A0_COLOR_24_8_FLOAT 0x00000014 -#define V_0280A0_COLOR_10_11_11 0x00000015 -#define V_0280A0_COLOR_10_11_11_FLOAT 0x00000016 -#define V_0280A0_COLOR_11_11_10 0x00000017 -#define V_0280A0_COLOR_11_11_10_FLOAT 0x00000018 -#define V_0280A0_COLOR_2_10_10_10 0x00000019 -#define V_0280A0_COLOR_8_8_8_8 0x0000001A -#define V_0280A0_COLOR_10_10_10_2 0x0000001B -#define V_0280A0_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_0280A0_COLOR_32_32 0x0000001D -#define V_0280A0_COLOR_32_32_FLOAT 0x0000001E -#define V_0280A0_COLOR_16_16_16_16 0x0000001F -#define V_0280A0_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_0280A0_COLOR_32_32_32_32 0x00000022 -#define V_0280A0_COLOR_32_32_32_32_FLOAT 0x00000023 -#define S_0280A0_ARRAY_MODE(x) (((x) & 0xF) << 8) -#define G_0280A0_ARRAY_MODE(x) (((x) >> 8) & 0xF) -#define C_0280A0_ARRAY_MODE 0xFFFFF0FF -#define V_0280A0_ARRAY_LINEAR_GENERAL 0x00000000 -#define V_0280A0_ARRAY_LINEAR_ALIGNED 0x00000001 -#define V_0280A0_ARRAY_1D_TILED_THIN1 0x00000002 -#define V_0280A0_ARRAY_2D_TILED_THIN1 0x00000004 -#define S_0280A0_NUMBER_TYPE(x) (((x) & 0x7) << 12) -#define G_0280A0_NUMBER_TYPE(x) (((x) >> 12) & 0x7) -#define C_0280A0_NUMBER_TYPE 0xFFFF8FFF -#define S_0280A0_READ_SIZE(x) (((x) & 0x1) << 15) -#define G_0280A0_READ_SIZE(x) (((x) >> 15) & 0x1) -#define C_0280A0_READ_SIZE 0xFFFF7FFF -#define S_0280A0_COMP_SWAP(x) (((x) & 0x3) << 16) -#define G_0280A0_COMP_SWAP(x) (((x) >> 16) & 0x3) -#define C_0280A0_COMP_SWAP 0xFFFCFFFF -#define S_0280A0_TILE_MODE(x) (((x) & 0x3) << 18) -#define G_0280A0_TILE_MODE(x) (((x) >> 18) & 0x3) -#define C_0280A0_TILE_MODE 0xFFF3FFFF -#define S_0280A0_BLEND_CLAMP(x) (((x) & 0x1) << 20) -#define G_0280A0_BLEND_CLAMP(x) (((x) >> 20) & 0x1) -#define C_0280A0_BLEND_CLAMP 0xFFEFFFFF -#define S_0280A0_CLEAR_COLOR(x) (((x) & 0x1) << 21) -#define G_0280A0_CLEAR_COLOR(x) (((x) >> 21) & 0x1) -#define C_0280A0_CLEAR_COLOR 0xFFDFFFFF -#define S_0280A0_BLEND_BYPASS(x) (((x) & 0x1) << 22) -#define G_0280A0_BLEND_BYPASS(x) (((x) >> 22) & 0x1) -#define C_0280A0_BLEND_BYPASS 0xFFBFFFFF -#define S_0280A0_BLEND_FLOAT32(x) (((x) & 0x1) << 23) -#define G_0280A0_BLEND_FLOAT32(x) (((x) >> 23) & 0x1) -#define C_0280A0_BLEND_FLOAT32 0xFF7FFFFF -#define S_0280A0_SIMPLE_FLOAT(x) (((x) & 0x1) << 24) -#define G_0280A0_SIMPLE_FLOAT(x) (((x) >> 24) & 0x1) -#define C_0280A0_SIMPLE_FLOAT 0xFEFFFFFF -#define S_0280A0_ROUND_MODE(x) (((x) & 0x1) << 25) -#define G_0280A0_ROUND_MODE(x) (((x) >> 25) & 0x1) -#define C_0280A0_ROUND_MODE 0xFDFFFFFF -#define S_0280A0_TILE_COMPACT(x) (((x) & 0x1) << 26) -#define G_0280A0_TILE_COMPACT(x) (((x) >> 26) & 0x1) -#define C_0280A0_TILE_COMPACT 0xFBFFFFFF -#define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27) -#define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1) -#define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF -#define R_028060_CB_COLOR0_SIZE 0x028060 -#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028060_PITCH_TILE_MAX 0xFFFFFC00 -#define S_028060_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) -#define G_028060_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) -#define C_028060_SLICE_TILE_MAX 0xC00003FF -#define R_028800_DB_DEPTH_CONTROL 0x028800 -#define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0) -#define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028800_STENCIL_ENABLE 0xFFFFFFFE -#define S_028800_Z_ENABLE(x) (((x) & 0x1) << 1) -#define G_028800_Z_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028800_Z_ENABLE 0xFFFFFFFD -#define S_028800_Z_WRITE_ENABLE(x) (((x) & 0x1) << 2) -#define G_028800_Z_WRITE_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028800_Z_WRITE_ENABLE 0xFFFFFFFB -#define S_028800_ZFUNC(x) (((x) & 0x7) << 4) -#define G_028800_ZFUNC(x) (((x) >> 4) & 0x7) -#define C_028800_ZFUNC 0xFFFFFF8F -#define S_028800_BACKFACE_ENABLE(x) (((x) & 0x1) << 7) -#define G_028800_BACKFACE_ENABLE(x) (((x) >> 7) & 0x1) -#define C_028800_BACKFACE_ENABLE 0xFFFFFF7F -#define S_028800_STENCILFUNC(x) (((x) & 0x7) << 8) -#define G_028800_STENCILFUNC(x) (((x) >> 8) & 0x7) -#define C_028800_STENCILFUNC 0xFFFFF8FF -#define S_028800_STENCILFAIL(x) (((x) & 0x7) << 11) -#define G_028800_STENCILFAIL(x) (((x) >> 11) & 0x7) -#define C_028800_STENCILFAIL 0xFFFFC7FF -#define S_028800_STENCILZPASS(x) (((x) & 0x7) << 14) -#define G_028800_STENCILZPASS(x) (((x) >> 14) & 0x7) -#define C_028800_STENCILZPASS 0xFFFE3FFF -#define S_028800_STENCILZFAIL(x) (((x) & 0x7) << 17) -#define G_028800_STENCILZFAIL(x) (((x) >> 17) & 0x7) -#define C_028800_STENCILZFAIL 0xFFF1FFFF -#define S_028800_STENCILFUNC_BF(x) (((x) & 0x7) << 20) -#define G_028800_STENCILFUNC_BF(x) (((x) >> 20) & 0x7) -#define C_028800_STENCILFUNC_BF 0xFF8FFFFF -#define S_028800_STENCILFAIL_BF(x) (((x) & 0x7) << 23) -#define G_028800_STENCILFAIL_BF(x) (((x) >> 23) & 0x7) -#define C_028800_STENCILFAIL_BF 0xFC7FFFFF -#define S_028800_STENCILZPASS_BF(x) (((x) & 0x7) << 26) -#define G_028800_STENCILZPASS_BF(x) (((x) >> 26) & 0x7) -#define C_028800_STENCILZPASS_BF 0xE3FFFFFF -#define S_028800_STENCILZFAIL_BF(x) (((x) & 0x7) << 29) -#define G_028800_STENCILZFAIL_BF(x) (((x) >> 29) & 0x7) -#define C_028800_STENCILZFAIL_BF 0x1FFFFFFF -#define R_028010_DB_DEPTH_INFO 0x028010 -#define S_028010_FORMAT(x) (((x) & 0x7) << 0) -#define G_028010_FORMAT(x) (((x) >> 0) & 0x7) -#define C_028010_FORMAT 0xFFFFFFF8 -#define V_028010_DEPTH_INVALID 0x00000000 -#define V_028010_DEPTH_16 0x00000001 -#define V_028010_DEPTH_X8_24 0x00000002 -#define V_028010_DEPTH_8_24 0x00000003 -#define V_028010_DEPTH_X8_24_FLOAT 0x00000004 -#define V_028010_DEPTH_8_24_FLOAT 0x00000005 -#define V_028010_DEPTH_32_FLOAT 0x00000006 -#define V_028010_DEPTH_X24_8_32_FLOAT 0x00000007 -#define S_028010_READ_SIZE(x) (((x) & 0x1) << 3) -#define G_028010_READ_SIZE(x) (((x) >> 3) & 0x1) -#define C_028010_READ_SIZE 0xFFFFFFF7 -#define S_028010_ARRAY_MODE(x) (((x) & 0xF) << 15) -#define G_028010_ARRAY_MODE(x) (((x) >> 15) & 0xF) -#define C_028010_ARRAY_MODE 0xFFF87FFF -#define S_028010_TILE_SURFACE_ENABLE(x) (((x) & 0x1) << 25) -#define G_028010_TILE_SURFACE_ENABLE(x) (((x) >> 25) & 0x1) -#define C_028010_TILE_SURFACE_ENABLE 0xFDFFFFFF -#define S_028010_TILE_COMPACT(x) (((x) & 0x1) << 26) -#define G_028010_TILE_COMPACT(x) (((x) >> 26) & 0x1) -#define C_028010_TILE_COMPACT 0xFBFFFFFF -#define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31) -#define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1) -#define C_028010_ZRANGE_PRECISION 0x7FFFFFFF -#define R_028000_DB_DEPTH_SIZE 0x028000 -#define S_028000_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028000_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028000_PITCH_TILE_MAX 0xFFFFFC00 -#define S_028000_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) -#define G_028000_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) -#define C_028000_SLICE_TILE_MAX 0xC00003FF -#define R_028004_DB_DEPTH_VIEW 0x028004 -#define S_028004_SLICE_START(x) (((x) & 0x7FF) << 0) -#define G_028004_SLICE_START(x) (((x) >> 0) & 0x7FF) -#define C_028004_SLICE_START 0xFFFFF800 -#define S_028004_SLICE_MAX(x) (((x) & 0x7FF) << 13) -#define G_028004_SLICE_MAX(x) (((x) >> 13) & 0x7FF) -#define C_028004_SLICE_MAX 0xFF001FFF -#define R_028D24_DB_HTILE_SURFACE 0x028D24 -#define S_028D24_HTILE_WIDTH(x) (((x) & 0x1) << 0) -#define G_028D24_HTILE_WIDTH(x) (((x) >> 0) & 0x1) -#define C_028D24_HTILE_WIDTH 0xFFFFFFFE -#define S_028D24_HTILE_HEIGHT(x) (((x) & 0x1) << 1) -#define G_028D24_HTILE_HEIGHT(x) (((x) >> 1) & 0x1) -#define C_028D24_HTILE_HEIGHT 0xFFFFFFFD -#define S_028D24_LINEAR(x) (((x) & 0x1) << 2) -#define G_028D24_LINEAR(x) (((x) >> 2) & 0x1) -#define C_028D24_LINEAR 0xFFFFFFFB -#define S_028D24_FULL_CACHE(x) (((x) & 0x1) << 3) -#define G_028D24_FULL_CACHE(x) (((x) >> 3) & 0x1) -#define C_028D24_FULL_CACHE 0xFFFFFFF7 -#define S_028D24_HTILE_USES_PRELOAD_WIN(x) (((x) & 0x1) << 4) -#define G_028D24_HTILE_USES_PRELOAD_WIN(x) (((x) >> 4) & 0x1) -#define C_028D24_HTILE_USES_PRELOAD_WIN 0xFFFFFFEF -#define S_028D24_PRELOAD(x) (((x) & 0x1) << 5) -#define G_028D24_PRELOAD(x) (((x) >> 5) & 0x1) -#define C_028D24_PRELOAD 0xFFFFFFDF -#define S_028D24_PREFETCH_WIDTH(x) (((x) & 0x3F) << 6) -#define G_028D24_PREFETCH_WIDTH(x) (((x) >> 6) & 0x3F) -#define C_028D24_PREFETCH_WIDTH 0xFFFFF03F -#define S_028D24_PREFETCH_HEIGHT(x) (((x) & 0x3F) << 12) -#define G_028D24_PREFETCH_HEIGHT(x) (((x) >> 12) & 0x3F) -#define C_028D24_PREFETCH_HEIGHT 0xFFFC0FFF -#define R_028D34_DB_PREFETCH_LIMIT 0x028D34 -#define S_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028D34_DEPTH_HEIGHT_TILE_MAX 0xFFFFFC00 -#define R_028D10_DB_RENDER_OVERRIDE 0x028D10 -#define S_028D10_FORCE_HIZ_ENABLE(x) (((x) & 0x3) << 0) -#define G_028D10_FORCE_HIZ_ENABLE(x) (((x) >> 0) & 0x3) -#define C_028D10_FORCE_HIZ_ENABLE 0xFFFFFFFC -#define S_028D10_FORCE_HIS_ENABLE0(x) (((x) & 0x3) << 2) -#define G_028D10_FORCE_HIS_ENABLE0(x) (((x) >> 2) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE0 0xFFFFFFF3 -#define S_028D10_FORCE_HIS_ENABLE1(x) (((x) & 0x3) << 4) -#define G_028D10_FORCE_HIS_ENABLE1(x) (((x) >> 4) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE1 0xFFFFFFCF -#define S_028D10_FORCE_SHADER_Z_ORDER(x) (((x) & 0x1) << 6) -#define G_028D10_FORCE_SHADER_Z_ORDER(x) (((x) >> 6) & 0x1) -#define C_028D10_FORCE_SHADER_Z_ORDER 0xFFFFFFBF -#define S_028D10_FAST_Z_DISABLE(x) (((x) & 0x1) << 7) -#define G_028D10_FAST_Z_DISABLE(x) (((x) >> 7) & 0x1) -#define C_028D10_FAST_Z_DISABLE 0xFFFFFF7F -#define S_028D10_FAST_STENCIL_DISABLE(x) (((x) & 0x1) << 8) -#define G_028D10_FAST_STENCIL_DISABLE(x) (((x) >> 8) & 0x1) -#define C_028D10_FAST_STENCIL_DISABLE 0xFFFFFEFF -#define S_028D10_NOOP_CULL_DISABLE(x) (((x) & 0x1) << 9) -#define G_028D10_NOOP_CULL_DISABLE(x) (((x) >> 9) & 0x1) -#define C_028D10_NOOP_CULL_DISABLE 0xFFFFFDFF -#define S_028D10_FORCE_COLOR_KILL(x) (((x) & 0x1) << 10) -#define G_028D10_FORCE_COLOR_KILL(x) (((x) >> 10) & 0x1) -#define C_028D10_FORCE_COLOR_KILL 0xFFFFFBFF -#define S_028D10_FORCE_Z_READ(x) (((x) & 0x1) << 11) -#define G_028D10_FORCE_Z_READ(x) (((x) >> 11) & 0x1) -#define C_028D10_FORCE_Z_READ 0xFFFFF7FF -#define S_028D10_FORCE_STENCIL_READ(x) (((x) & 0x1) << 12) -#define G_028D10_FORCE_STENCIL_READ(x) (((x) >> 12) & 0x1) -#define C_028D10_FORCE_STENCIL_READ 0xFFFFEFFF -#define S_028D10_FORCE_FULL_Z_RANGE(x) (((x) & 0x3) << 13) -#define G_028D10_FORCE_FULL_Z_RANGE(x) (((x) >> 13) & 0x3) -#define C_028D10_FORCE_FULL_Z_RANGE 0xFFFF9FFF -#define S_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) & 0x1) << 15) -#define G_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) >> 15) & 0x1) -#define C_028D10_FORCE_QC_SMASK_CONFLICT 0xFFFF7FFF -#define S_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) & 0x1) << 16) -#define G_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) >> 16) & 0x1) -#define C_028D10_DISABLE_VIEWPORT_CLAMP 0xFFFEFFFF -#define S_028D10_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) -#define G_028D10_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) -#define C_028D10_IGNORE_SC_ZRANGE 0xFFFDFFFF -#define R_028A40_VGT_GS_MODE 0x028A40 -#define S_028A40_MODE(x) (((x) & 0x3) << 0) -#define G_028A40_MODE(x) (((x) >> 0) & 0x3) -#define C_028A40_MODE 0xFFFFFFFC -#define S_028A40_ES_PASSTHRU(x) (((x) & 0x1) << 2) -#define G_028A40_ES_PASSTHRU(x) (((x) >> 2) & 0x1) -#define C_028A40_ES_PASSTHRU 0xFFFFFFFB -#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3) -#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3) -#define C_028A40_CUT_MODE 0xFFFFFFE7 -#define R_008DFC_SQ_CF_WORD0 0x008DFC -#define S_008DFC_ADDR(x) (((x) & 0xFFFFFFFF) << 0) -#define G_008DFC_ADDR(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_008DFC_ADDR 0x00000000 -#define R_008DFC_SQ_CF_WORD1 0x008DFC -#define S_008DFC_POP_COUNT(x) (((x) & 0x7) << 0) -#define G_008DFC_POP_COUNT(x) (((x) >> 0) & 0x7) -#define C_008DFC_POP_COUNT 0xFFFFFFF8 -#define S_008DFC_CF_CONST(x) (((x) & 0x1F) << 3) -#define G_008DFC_CF_CONST(x) (((x) >> 3) & 0x1F) -#define C_008DFC_CF_CONST 0xFFFFFF07 -#define S_008DFC_COND(x) (((x) & 0x3) << 8) -#define G_008DFC_COND(x) (((x) >> 8) & 0x3) -#define C_008DFC_COND 0xFFFFFCFF -#define S_008DFC_COUNT(x) (((x) & 0x7) << 10) -#define G_008DFC_COUNT(x) (((x) >> 10) & 0x7) -#define C_008DFC_COUNT 0xFFFFE3FF -#define S_008DFC_CALL_COUNT(x) (((x) & 0x3F) << 13) -#define G_008DFC_CALL_COUNT(x) (((x) >> 13) & 0x3F) -#define C_008DFC_CALL_COUNT 0xFFF81FFF -#define S_008DFC_END_OF_PROGRAM(x) (((x) & 0x1) << 21) -#define G_008DFC_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) -#define C_008DFC_END_OF_PROGRAM 0xFFDFFFFF -#define S_008DFC_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) -#define G_008DFC_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) -#define C_008DFC_VALID_PIXEL_MODE 0xFFBFFFFF -#define S_008DFC_CF_INST(x) (((x) & 0x7F) << 23) -#define G_008DFC_CF_INST(x) (((x) >> 23) & 0x7F) -#define C_008DFC_CF_INST 0xC07FFFFF -#define V_008DFC_SQ_CF_INST_NOP 0x00000000 -#define V_008DFC_SQ_CF_INST_TEX 0x00000001 -#define V_008DFC_SQ_CF_INST_VTX 0x00000002 -#define V_008DFC_SQ_CF_INST_VTX_TC 0x00000003 -#define V_008DFC_SQ_CF_INST_LOOP_START 0x00000004 -#define V_008DFC_SQ_CF_INST_LOOP_END 0x00000005 -#define V_008DFC_SQ_CF_INST_LOOP_START_DX10 0x00000006 -#define V_008DFC_SQ_CF_INST_LOOP_START_NO_AL 0x00000007 -#define V_008DFC_SQ_CF_INST_LOOP_CONTINUE 0x00000008 -#define V_008DFC_SQ_CF_INST_LOOP_BREAK 0x00000009 -#define V_008DFC_SQ_CF_INST_JUMP 0x0000000A -#define V_008DFC_SQ_CF_INST_PUSH 0x0000000B -#define V_008DFC_SQ_CF_INST_PUSH_ELSE 0x0000000C -#define V_008DFC_SQ_CF_INST_ELSE 0x0000000D -#define V_008DFC_SQ_CF_INST_POP 0x0000000E -#define V_008DFC_SQ_CF_INST_POP_JUMP 0x0000000F -#define V_008DFC_SQ_CF_INST_POP_PUSH 0x00000010 -#define V_008DFC_SQ_CF_INST_POP_PUSH_ELSE 0x00000011 -#define V_008DFC_SQ_CF_INST_CALL 0x00000012 -#define V_008DFC_SQ_CF_INST_CALL_FS 0x00000013 -#define V_008DFC_SQ_CF_INST_RETURN 0x00000014 -#define V_008DFC_SQ_CF_INST_EMIT_VERTEX 0x00000015 -#define V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX 0x00000016 -#define V_008DFC_SQ_CF_INST_CUT_VERTEX 0x00000017 -#define V_008DFC_SQ_CF_INST_KILL 0x00000018 -#define S_008DFC_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) -#define G_008DFC_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) -#define C_008DFC_WHOLE_QUAD_MODE 0xBFFFFFFF -#define S_008DFC_BARRIER(x) (((x) & 0x1) << 31) -#define G_008DFC_BARRIER(x) (((x) >> 31) & 0x1) -#define C_008DFC_BARRIER 0x7FFFFFFF -#define R_008DFC_SQ_CF_ALU_WORD0 0x008DFC -#define S_008DFC_ALU_ADDR(x) (((x) & 0x3FFFFF) << 0) -#define G_008DFC_ALU_ADDR(x) (((x) >> 0) & 0x3FFFFF) -#define C_008DFC_ALU_ADDR 0xFFC00000 -#define S_008DFC_KCACHE_BANK0(x) (((x) & 0xF) << 22) -#define G_008DFC_KCACHE_BANK0(x) (((x) >> 22) & 0xF) -#define C_008DFC_KCACHE_BANK0 0xFC3FFFFF -#define S_008DFC_KCACHE_BANK1(x) (((x) & 0xF) << 26) -#define G_008DFC_KCACHE_BANK1(x) (((x) >> 26) & 0xF) -#define C_008DFC_KCACHE_BANK1 0xC3FFFFFF -#define S_008DFC_KCACHE_MODE0(x) (((x) & 0x3) << 30) -#define G_008DFC_KCACHE_MODE0(x) (((x) >> 30) & 0x3) -#define C_008DFC_KCACHE_MODE0 0x3FFFFFFF -#define R_008DFC_SQ_CF_ALU_WORD1 0x008DFC -#define S_008DFC_KCACHE_MODE1(x) (((x) & 0x3) << 0) -#define G_008DFC_KCACHE_MODE1(x) (((x) >> 0) & 0x3) -#define C_008DFC_KCACHE_MODE1 0xFFFFFFFC -#define S_008DFC_KCACHE_ADDR0(x) (((x) & 0xFF) << 2) -#define G_008DFC_KCACHE_ADDR0(x) (((x) >> 2) & 0xFF) -#define C_008DFC_KCACHE_ADDR0 0xFFFFFC03 -#define S_008DFC_KCACHE_ADDR1(x) (((x) & 0xFF) << 10) -#define G_008DFC_KCACHE_ADDR1(x) (((x) >> 10) & 0xFF) -#define C_008DFC_KCACHE_ADDR1 0xFFFC03FF -#define S_008DFC_ALU_COUNT(x) (((x) & 0x7F) << 18) -#define G_008DFC_ALU_COUNT(x) (((x) >> 18) & 0x7F) -#define C_008DFC_ALU_COUNT 0xFE03FFFF -#define S_008DFC_USES_WATERFALL(x) (((x) & 0x1) << 25) -#define G_008DFC_USES_WATERFALL(x) (((x) >> 25) & 0x1) -#define C_008DFC_USES_WATERFALL 0xFDFFFFFF -#define S_008DFC_CF_ALU_INST(x) (((x) & 0xF) << 26) -#define G_008DFC_CF_ALU_INST(x) (((x) >> 26) & 0xF) -#define C_008DFC_CF_ALU_INST 0xC3FFFFFF -#define V_008DFC_SQ_CF_INST_ALU 0x00000008 -#define V_008DFC_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 -#define V_008DFC_SQ_CF_INST_ALU_POP_AFTER 0x0000000A -#define V_008DFC_SQ_CF_INST_ALU_POP2_AFTER 0x0000000B -#define V_008DFC_SQ_CF_INST_ALU_CONTINUE 0x0000000D -#define V_008DFC_SQ_CF_INST_ALU_BREAK 0x0000000E -#define V_008DFC_SQ_CF_INST_ALU_ELSE_AFTER 0x0000000F -#define S_008DFC_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) -#define G_008DFC_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) -#define C_008DFC_WHOLE_QUAD_MODE 0xBFFFFFFF -#define S_008DFC_BARRIER(x) (((x) & 0x1) << 31) -#define G_008DFC_BARRIER(x) (((x) >> 31) & 0x1) -#define C_008DFC_BARRIER 0x7FFFFFFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD0 0x008DFC -#define S_008DFC_ARRAY_BASE(x) (((x) & 0x1FFF) << 0) -#define G_008DFC_ARRAY_BASE(x) (((x) >> 0) & 0x1FFF) -#define C_008DFC_ARRAY_BASE 0xFFFFE000 -#define S_008DFC_TYPE(x) (((x) & 0x3) << 13) -#define G_008DFC_TYPE(x) (((x) >> 13) & 0x3) -#define C_008DFC_TYPE 0xFFFF9FFF -#define S_008DFC_RW_GPR(x) (((x) & 0x7F) << 15) -#define G_008DFC_RW_GPR(x) (((x) >> 15) & 0x7F) -#define C_008DFC_RW_GPR 0xFFC07FFF -#define S_008DFC_RW_REL(x) (((x) & 0x1) << 22) -#define G_008DFC_RW_REL(x) (((x) >> 22) & 0x1) -#define C_008DFC_RW_REL 0xFFBFFFFF -#define S_008DFC_INDEX_GPR(x) (((x) & 0x7F) << 23) -#define G_008DFC_INDEX_GPR(x) (((x) >> 23) & 0x7F) -#define C_008DFC_INDEX_GPR 0xC07FFFFF -#define S_008DFC_ELEM_SIZE(x) (((x) & 0x3) << 30) -#define G_008DFC_ELEM_SIZE(x) (((x) >> 30) & 0x3) -#define C_008DFC_ELEM_SIZE 0x3FFFFFFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1 0x008DFC -#define S_008DFC_BURST_COUNT(x) (((x) & 0xF) << 17) -#define G_008DFC_BURST_COUNT(x) (((x) >> 17) & 0xF) -#define C_008DFC_BURST_COUNT 0xFFE1FFFF -#define S_008DFC_END_OF_PROGRAM(x) (((x) & 0x1) << 21) -#define G_008DFC_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) -#define C_008DFC_END_OF_PROGRAM 0xFFDFFFFF -#define S_008DFC_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) -#define G_008DFC_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) -#define C_008DFC_VALID_PIXEL_MODE 0xFFBFFFFF -#define S_008DFC_CF_INST(x) (((x) & 0x7F) << 23) -#define G_008DFC_CF_INST(x) (((x) >> 23) & 0x7F) -#define C_008DFC_CF_INST 0xC07FFFFF -#define V_008DFC_SQ_CF_INST_MEM_STREAM0 0x00000020 -#define V_008DFC_SQ_CF_INST_MEM_STREAM1 0x00000021 -#define V_008DFC_SQ_CF_INST_MEM_STREAM2 0x00000022 -#define V_008DFC_SQ_CF_INST_MEM_STREAM3 0x00000023 -#define V_008DFC_SQ_CF_INST_MEM_SCRATCH 0x00000024 -#define V_008DFC_SQ_CF_INST_MEM_REDUCTION 0x00000025 -#define V_008DFC_SQ_CF_INST_MEM_RING 0x00000026 -#define V_008DFC_SQ_CF_INST_EXPORT 0x00000027 -#define V_008DFC_SQ_CF_INST_EXPORT_DONE 0x00000028 -#define S_008DFC_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) -#define G_008DFC_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) -#define C_008DFC_WHOLE_QUAD_MODE 0xBFFFFFFF -#define S_008DFC_BARRIER(x) (((x) & 0x1) << 31) -#define G_008DFC_BARRIER(x) (((x) >> 31) & 0x1) -#define C_008DFC_BARRIER 0x7FFFFFFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_BUF 0x008DFC -#define S_008DFC_ARRAY_SIZE(x) (((x) & 0xFFF) << 0) -#define G_008DFC_ARRAY_SIZE(x) (((x) >> 0) & 0xFFF) -#define C_008DFC_ARRAY_SIZE 0xFFFFF000 -#define S_008DFC_COMP_MASK(x) (((x) & 0xF) << 12) -#define G_008DFC_COMP_MASK(x) (((x) >> 12) & 0xF) -#define C_008DFC_COMP_MASK 0xFFFF0FFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ 0x008DFC -#define S_008DFC_SEL_X(x) (((x) & 0x7) << 0) -#define G_008DFC_SEL_X(x) (((x) >> 0) & 0x7) -#define C_008DFC_SEL_X 0xFFFFFFF8 -#define S_008DFC_SEL_Y(x) (((x) & 0x7) << 3) -#define G_008DFC_SEL_Y(x) (((x) >> 3) & 0x7) -#define C_008DFC_SEL_Y 0xFFFFFFC7 -#define S_008DFC_SEL_Z(x) (((x) & 0x7) << 6) -#define G_008DFC_SEL_Z(x) (((x) >> 6) & 0x7) -#define C_008DFC_SEL_Z 0xFFFFFE3F -#define S_008DFC_SEL_W(x) (((x) & 0x7) << 9) -#define G_008DFC_SEL_W(x) (((x) >> 9) & 0x7) -#define C_008DFC_SEL_W 0xFFFFF1FF -#define R_008DFC_SQ_VTX_WORD0 0x008DFC -#define S_008DFC_VTX_INST(x) (((x) & 0x1F) << 0) -#define G_008DFC_VTX_INST(x) (((x) >> 0) & 0x1F) -#define C_008DFC_VTX_INST 0xFFFFFFE0 -#define S_008DFC_FETCH_TYPE(x) (((x) & 0x3) << 5) -#define G_008DFC_FETCH_TYPE(x) (((x) >> 5) & 0x3) -#define C_008DFC_FETCH_TYPE 0xFFFFFF9F -#define S_008DFC_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) -#define G_008DFC_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) -#define C_008DFC_FETCH_WHOLE_QUAD 0xFFFFFF7F -#define S_008DFC_BUFFER_ID(x) (((x) & 0xFF) << 8) -#define G_008DFC_BUFFER_ID(x) (((x) >> 8) & 0xFF) -#define C_008DFC_BUFFER_ID 0xFFFF00FF -#define S_008DFC_SRC_GPR(x) (((x) & 0x7F) << 16) -#define G_008DFC_SRC_GPR(x) (((x) >> 16) & 0x7F) -#define C_008DFC_SRC_GPR 0xFF80FFFF -#define S_008DFC_SRC_REL(x) (((x) & 0x1) << 23) -#define G_008DFC_SRC_REL(x) (((x) >> 23) & 0x1) -#define C_008DFC_SRC_REL 0xFF7FFFFF -#define S_008DFC_SRC_SEL_X(x) (((x) & 0x3) << 24) -#define G_008DFC_SRC_SEL_X(x) (((x) >> 24) & 0x3) -#define C_008DFC_SRC_SEL_X 0xFCFFFFFF -#define S_008DFC_MEGA_FETCH_COUNT(x) (((x) & 0x3F) << 26) -#define G_008DFC_MEGA_FETCH_COUNT(x) (((x) >> 26) & 0x3F) -#define C_008DFC_MEGA_FETCH_COUNT 0x03FFFFFF -#define R_008DFC_SQ_VTX_WORD1 0x008DFC -#define S_008DFC_DST_SEL_X(x) (((x) & 0x7) << 9) -#define G_008DFC_DST_SEL_X(x) (((x) >> 9) & 0x7) -#define C_008DFC_DST_SEL_X 0xFFFFF1FF -#define S_008DFC_DST_SEL_Y(x) (((x) & 0x7) << 12) -#define G_008DFC_DST_SEL_Y(x) (((x) >> 12) & 0x7) -#define C_008DFC_DST_SEL_Y 0xFFFF8FFF -#define S_008DFC_DST_SEL_Z(x) (((x) & 0x7) << 15) -#define G_008DFC_DST_SEL_Z(x) (((x) >> 15) & 0x7) -#define C_008DFC_DST_SEL_Z 0xFFFC7FFF -#define S_008DFC_DST_SEL_W(x) (((x) & 0x7) << 18) -#define G_008DFC_DST_SEL_W(x) (((x) >> 18) & 0x7) -#define C_008DFC_DST_SEL_W 0xFFE3FFFF -#define S_008DFC_USE_CONST_FIELDS(x) (((x) & 0x1) << 21) -#define G_008DFC_USE_CONST_FIELDS(x) (((x) >> 21) & 0x1) -#define C_008DFC_USE_CONST_FIELDS 0xFFDFFFFF -#define S_008DFC_DATA_FORMAT(x) (((x) & 0x3F) << 22) -#define G_008DFC_DATA_FORMAT(x) (((x) >> 22) & 0x3F) -#define C_008DFC_DATA_FORMAT 0xF03FFFFF -#define S_008DFC_NUM_FORMAT_ALL(x) (((x) & 0x3) << 28) -#define G_008DFC_NUM_FORMAT_ALL(x) (((x) >> 28) & 0x3) -#define C_008DFC_NUM_FORMAT_ALL 0xCFFFFFFF -#define S_008DFC_FORMAT_COMP_ALL(x) (((x) & 0x1) << 30) -#define G_008DFC_FORMAT_COMP_ALL(x) (((x) >> 30) & 0x1) -#define C_008DFC_FORMAT_COMP_ALL 0xBFFFFFFF -#define S_008DFC_SRF_MODE_ALL(x) (((x) & 0x1) << 31) -#define G_008DFC_SRF_MODE_ALL(x) (((x) >> 31) & 0x1) -#define C_008DFC_SRF_MODE_ALL 0x7FFFFFFF -#define R_008DFC_SQ_VTX_WORD1_GPR 0x008DFC -#define S_008DFC_DST_GPR(x) (((x) & 0x7F) << 0) -#define G_008DFC_DST_GPR(x) (((x) >> 0) & 0x7F) -#define C_008DFC_DST_GPR 0xFFFFFF80 -#define S_008DFC_DST_REL(x) (((x) & 0x1) << 7) -#define G_008DFC_DST_REL(x) (((x) >> 7) & 0x1) -#define C_008DFC_DST_REL 0xFFFFFF7F -#define R_008DFC_SQ_VTX_WORD2 0x008DFC -#define S_008DFC_OFFSET(x) (((x) & 0xFFFF) << 0) -#define G_008DFC_OFFSET(x) (((x) >> 0) & 0xFFFF) -#define C_008DFC_OFFSET 0xFFFF0000 -#define S_008DFC_ENDIAN_SWAP(x) (((x) & 0x3) << 16) -#define G_008DFC_ENDIAN_SWAP(x) (((x) >> 16) & 0x3) -#define C_008DFC_ENDIAN_SWAP 0xFFFCFFFF -#define S_008DFC_CONST_BUF_NO_STRIDE(x) (((x) & 0x1) << 18) -#define G_008DFC_CONST_BUF_NO_STRIDE(x) (((x) >> 18) & 0x1) -#define C_008DFC_CONST_BUF_NO_STRIDE 0xFFFBFFFF -#define S_008DFC_MEGA_FETCH(x) (((x) & 0x1) << 19) -#define G_008DFC_MEGA_FETCH(x) (((x) >> 19) & 0x1) -#define C_008DFC_MEGA_FETCH 0xFFF7FFFF -#define S_008DFC_ALT_CONST(x) (((x) & 0x1) << 20) -#define G_008DFC_ALT_CONST(x) (((x) >> 20) & 0x1) -#define C_008DFC_ALT_CONST 0xFFEFFFFF -#define R_008040_WAIT_UNTIL 0x008040 -#define S_008040_WAIT_CP_DMA_IDLE(x) (((x) & 0x1) << 8) -#define G_008040_WAIT_CP_DMA_IDLE(x) (((x) >> 8) & 0x1) -#define C_008040_WAIT_CP_DMA_IDLE 0xFFFFFEFF -#define S_008040_WAIT_CMDFIFO(x) (((x) & 0x1) << 10) -#define G_008040_WAIT_CMDFIFO(x) (((x) >> 10) & 0x1) -#define C_008040_WAIT_CMDFIFO 0xFFFFFBFF -#define S_008040_WAIT_2D_IDLE(x) (((x) & 0x1) << 14) -#define G_008040_WAIT_2D_IDLE(x) (((x) >> 14) & 0x1) -#define C_008040_WAIT_2D_IDLE 0xFFFFBFFF -#define S_008040_WAIT_3D_IDLE(x) (((x) & 0x1) << 15) -#define G_008040_WAIT_3D_IDLE(x) (((x) >> 15) & 0x1) -#define C_008040_WAIT_3D_IDLE 0xFFFF7FFF -#define S_008040_WAIT_2D_IDLECLEAN(x) (((x) & 0x1) << 16) -#define G_008040_WAIT_2D_IDLECLEAN(x) (((x) >> 16) & 0x1) -#define C_008040_WAIT_2D_IDLECLEAN 0xFFFEFFFF -#define S_008040_WAIT_3D_IDLECLEAN(x) (((x) & 0x1) << 17) -#define G_008040_WAIT_3D_IDLECLEAN(x) (((x) >> 17) & 0x1) -#define C_008040_WAIT_3D_IDLECLEAN 0xFFFDFFFF -#define S_008040_WAIT_EXTERN_SIG(x) (((x) & 0x1) << 19) -#define G_008040_WAIT_EXTERN_SIG(x) (((x) >> 19) & 0x1) -#define C_008040_WAIT_EXTERN_SIG 0xFFF7FFFF -#define S_008040_CMDFIFO_ENTRIES(x) (((x) & 0x1F) << 20) -#define G_008040_CMDFIFO_ENTRIES(x) (((x) >> 20) & 0x1F) -#define C_008040_CMDFIFO_ENTRIES 0xFE0FFFFF -#define R_0286CC_SPI_PS_IN_CONTROL_0 0x0286CC -#define S_0286CC_NUM_INTERP(x) (((x) & 0x3F) << 0) -#define G_0286CC_NUM_INTERP(x) (((x) >> 0) & 0x3F) -#define C_0286CC_NUM_INTERP 0xFFFFFFC0 -#define S_0286CC_POSITION_ENA(x) (((x) & 0x1) << 8) -#define G_0286CC_POSITION_ENA(x) (((x) >> 8) & 0x1) -#define C_0286CC_POSITION_ENA 0xFFFFFEFF -#define S_0286CC_POSITION_CENTROID(x) (((x) & 0x1) << 9) -#define G_0286CC_POSITION_CENTROID(x) (((x) >> 9) & 0x1) -#define C_0286CC_POSITION_CENTROID 0xFFFFFDFF -#define S_0286CC_POSITION_ADDR(x) (((x) & 0x1F) << 10) -#define G_0286CC_POSITION_ADDR(x) (((x) >> 10) & 0x1F) -#define C_0286CC_POSITION_ADDR 0xFFFF83FF -#define S_0286CC_PARAM_GEN(x) (((x) & 0xF) << 15) -#define G_0286CC_PARAM_GEN(x) (((x) >> 15) & 0xF) -#define C_0286CC_PARAM_GEN 0xFFF87FFF -#define S_0286CC_PARAM_GEN_ADDR(x) (((x) & 0x7F) << 19) -#define G_0286CC_PARAM_GEN_ADDR(x) (((x) >> 19) & 0x7F) -#define C_0286CC_PARAM_GEN_ADDR 0xFC07FFFF -#define S_0286CC_BARYC_SAMPLE_CNTL(x) (((x) & 0x3) << 26) -#define G_0286CC_BARYC_SAMPLE_CNTL(x) (((x) >> 26) & 0x3) -#define C_0286CC_BARYC_SAMPLE_CNTL 0xF3FFFFFF -#define S_0286CC_PERSP_GRADIENT_ENA(x) (((x) & 0x1) << 28) -#define G_0286CC_PERSP_GRADIENT_ENA(x) (((x) >> 28) & 0x1) -#define C_0286CC_PERSP_GRADIENT_ENA 0xEFFFFFFF -#define S_0286CC_LINEAR_GRADIENT_ENA(x) (((x) & 0x1) << 29) -#define G_0286CC_LINEAR_GRADIENT_ENA(x) (((x) >> 29) & 0x1) -#define C_0286CC_LINEAR_GRADIENT_ENA 0xDFFFFFFF -#define S_0286CC_POSITION_SAMPLE(x) (((x) & 0x1) << 30) -#define G_0286CC_POSITION_SAMPLE(x) (((x) >> 30) & 0x1) -#define C_0286CC_POSITION_SAMPLE 0xBFFFFFFF -#define S_0286CC_BARYC_AT_SAMPLE_ENA(x) (((x) & 0x1) << 31) -#define G_0286CC_BARYC_AT_SAMPLE_ENA(x) (((x) >> 31) & 0x1) -#define C_0286CC_BARYC_AT_SAMPLE_ENA 0x7FFFFFFF -#define R_0286D0_SPI_PS_IN_CONTROL_1 0x0286D0 -#define S_0286D0_GEN_INDEX_PIX(x) (((x) & 0x1) << 0) -#define G_0286D0_GEN_INDEX_PIX(x) (((x) >> 0) & 0x1) -#define C_0286D0_GEN_INDEX_PIX 0xFFFFFFFE -#define S_0286D0_GEN_INDEX_PIX_ADDR(x) (((x) & 0x7F) << 1) -#define G_0286D0_GEN_INDEX_PIX_ADDR(x) (((x) >> 1) & 0x7F) -#define C_0286D0_GEN_INDEX_PIX_ADDR 0xFFFFFF01 -#define S_0286D0_FRONT_FACE_ENA(x) (((x) & 0x1) << 8) -#define G_0286D0_FRONT_FACE_ENA(x) (((x) >> 8) & 0x1) -#define C_0286D0_FRONT_FACE_ENA 0xFFFFFEFF -#define S_0286D0_FRONT_FACE_CHAN(x) (((x) & 0x3) << 9) -#define G_0286D0_FRONT_FACE_CHAN(x) (((x) >> 9) & 0x3) -#define C_0286D0_FRONT_FACE_CHAN 0xFFFFF9FF -#define S_0286D0_FRONT_FACE_ALL_BITS(x) (((x) & 0x1) << 11) -#define G_0286D0_FRONT_FACE_ALL_BITS(x) (((x) >> 11) & 0x1) -#define C_0286D0_FRONT_FACE_ALL_BITS 0xFFFFF7FF -#define S_0286D0_FRONT_FACE_ADDR(x) (((x) & 0x1F) << 12) -#define G_0286D0_FRONT_FACE_ADDR(x) (((x) >> 12) & 0x1F) -#define C_0286D0_FRONT_FACE_ADDR 0xFFFE0FFF -#define S_0286D0_FOG_ADDR(x) (((x) & 0x7F) << 17) -#define G_0286D0_FOG_ADDR(x) (((x) >> 17) & 0x7F) -#define C_0286D0_FOG_ADDR 0xFF01FFFF -#define S_0286D0_FIXED_PT_POSITION_ENA(x) (((x) & 0x1) << 24) -#define G_0286D0_FIXED_PT_POSITION_ENA(x) (((x) >> 24) & 0x1) -#define C_0286D0_FIXED_PT_POSITION_ENA 0xFEFFFFFF -#define S_0286D0_FIXED_PT_POSITION_ADDR(x) (((x) & 0x1F) << 25) -#define G_0286D0_FIXED_PT_POSITION_ADDR(x) (((x) >> 25) & 0x1F) -#define C_0286D0_FIXED_PT_POSITION_ADDR 0xC1FFFFFF -#define R_0286C4_SPI_VS_OUT_CONFIG 0x0286C4 -#define S_0286C4_VS_PER_COMPONENT(x) (((x) & 0x1) << 0) -#define G_0286C4_VS_PER_COMPONENT(x) (((x) >> 0) & 0x1) -#define C_0286C4_VS_PER_COMPONENT 0xFFFFFFFE -#define S_0286C4_VS_EXPORT_COUNT(x) (((x) & 0x1F) << 1) -#define G_0286C4_VS_EXPORT_COUNT(x) (((x) >> 1) & 0x1F) -#define C_0286C4_VS_EXPORT_COUNT 0xFFFFFFC1 -#define S_0286C4_VS_EXPORTS_FOG(x) (((x) & 0x1) << 8) -#define G_0286C4_VS_EXPORTS_FOG(x) (((x) >> 8) & 0x1) -#define C_0286C4_VS_EXPORTS_FOG 0xFFFFFEFF -#define S_0286C4_VS_OUT_FOG_VEC_ADDR(x) (((x) & 0x1F) << 9) -#define G_0286C4_VS_OUT_FOG_VEC_ADDR(x) (((x) >> 9) & 0x1F) -#define C_0286C4_VS_OUT_FOG_VEC_ADDR 0xFFFFC1FF -#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 -#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028240_TL_X 0xFFFFC000 -#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028240_TL_Y 0xC000FFFF -#define S_028240_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028240_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028240_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028244_PA_SC_GENERIC_SCISSOR_BR 0x028244 -#define S_028244_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028244_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028244_BR_X 0xFFFFC000 -#define S_028244_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028244_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028244_BR_Y 0xC000FFFF -#define R_028030_PA_SC_SCREEN_SCISSOR_TL 0x028030 -#define S_028030_TL_X(x) (((x) & 0x7FFF) << 0) -#define G_028030_TL_X(x) (((x) >> 0) & 0x7FFF) -#define C_028030_TL_X 0xFFFF8000 -#define S_028030_TL_Y(x) (((x) & 0x7FFF) << 16) -#define G_028030_TL_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028030_TL_Y 0x8000FFFF -#define R_028034_PA_SC_SCREEN_SCISSOR_BR 0x028034 -#define S_028034_BR_X(x) (((x) & 0x7FFF) << 0) -#define G_028034_BR_X(x) (((x) >> 0) & 0x7FFF) -#define C_028034_BR_X 0xFFFF8000 -#define S_028034_BR_Y(x) (((x) & 0x7FFF) << 16) -#define G_028034_BR_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028034_BR_Y 0x8000FFFF -#define R_028204_PA_SC_WINDOW_SCISSOR_TL 0x028204 -#define S_028204_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028204_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028204_TL_X 0xFFFFC000 -#define S_028204_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028204_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028204_TL_Y 0xC000FFFF -#define S_028204_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028204_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028204_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028208_PA_SC_WINDOW_SCISSOR_BR 0x028208 -#define S_028208_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028208_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028208_BR_X 0xFFFFC000 -#define S_028208_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028208_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028208_BR_Y 0xC000FFFF -#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 -#define S_0287F0_SOURCE_SELECT(x) (((x) & 0x3) << 0) -#define G_0287F0_SOURCE_SELECT(x) (((x) >> 0) & 0x3) -#define C_0287F0_SOURCE_SELECT 0xFFFFFFFC -#define S_0287F0_MAJOR_MODE(x) (((x) & 0x3) << 2) -#define G_0287F0_MAJOR_MODE(x) (((x) >> 2) & 0x3) -#define C_0287F0_MAJOR_MODE 0xFFFFFFF3 -#define S_0287F0_SPRITE_EN(x) (((x) & 0x1) << 4) -#define G_0287F0_SPRITE_EN(x) (((x) >> 4) & 0x1) -#define C_0287F0_SPRITE_EN 0xFFFFFFEF -#define S_0287F0_NOT_EOP(x) (((x) & 0x1) << 5) -#define G_0287F0_NOT_EOP(x) (((x) >> 5) & 0x1) -#define C_0287F0_NOT_EOP 0xFFFFFFDF -#define S_0287F0_USE_OPAQUE(x) (((x) & 0x1) << 6) -#define G_0287F0_USE_OPAQUE(x) (((x) >> 6) & 0x1) -#define C_0287F0_USE_OPAQUE 0xFFFFFFBF -#define R_0280A0_CB_COLOR0_INFO 0x0280A0 -#define R_0280A4_CB_COLOR1_INFO 0x0280A4 -#define R_0280A8_CB_COLOR2_INFO 0x0280A8 -#define R_0280AC_CB_COLOR3_INFO 0x0280AC -#define R_0280B0_CB_COLOR4_INFO 0x0280B0 -#define R_0280B4_CB_COLOR5_INFO 0x0280B4 -#define R_0280B8_CB_COLOR6_INFO 0x0280B8 -#define R_0280BC_CB_COLOR7_INFO 0x0280BC -#define R_02800C_DB_DEPTH_BASE 0x02800C -#define R_028000_DB_DEPTH_SIZE 0x028000 -#define R_028004_DB_DEPTH_VIEW 0x028004 -#define R_028010_DB_DEPTH_INFO 0x028010 -#define R_028D24_DB_HTILE_SURFACE 0x028D24 -#define R_028D34_DB_PREFETCH_LIMIT 0x028D34 -#define R_0286D4_SPI_INTERP_CONTROL_0 0x0286D4 -#define R_028A48_PA_SC_MPASS_PS_CNTL 0x028A48 -#define R_028C00_PA_SC_LINE_CNTL 0x028C00 -#define R_028C04_PA_SC_AA_CONFIG 0x028C04 -#define R_028C08_PA_SU_VTX_CNTL 0x028C08 -#define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX 0x028C1C -#define R_028C48_PA_SC_AA_MASK 0x028C48 -#define R_028810_PA_CL_CLIP_CNTL 0x028810 -#define R_02881C_PA_CL_VS_OUT_CNTL 0x02881C -#define R_028820_PA_CL_NANINF_CNTL 0x028820 -#define R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x028C0C -#define R_028C10_PA_CL_GB_VERT_DISC_ADJ 0x028C10 -#define R_028C14_PA_CL_GB_HORZ_CLIP_ADJ 0x028C14 -#define R_028C18_PA_CL_GB_HORZ_DISC_ADJ 0x028C18 -#define R_028814_PA_SU_SC_MODE_CNTL 0x028814 -#define R_028A00_PA_SU_POINT_SIZE 0x028A00 -#define R_028A04_PA_SU_POINT_MINMAX 0x028A04 -#define R_028A08_PA_SU_LINE_CNTL 0x028A08 -#define R_028A0C_PA_SC_LINE_STIPPLE 0x028A0C -#define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 -#define R_028DFC_PA_SU_POLY_OFFSET_CLAMP 0x028DFC -#define R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE 0x028E00 -#define R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET 0x028E04 -#define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 -#define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C -#define R_028818_PA_CL_VTE_CNTL 0x028818 -#define R_02843C_PA_CL_VPORT_XSCALE_0 0x02843C -#define R_028444_PA_CL_VPORT_YSCALE_0 0x028444 -#define R_02844C_PA_CL_VPORT_ZSCALE_0 0x02844C -#define R_028440_PA_CL_VPORT_XOFFSET_0 0x028440 -#define R_028448_PA_CL_VPORT_YOFFSET_0 0x028448 -#define R_028450_PA_CL_VPORT_ZOFFSET_0 0x028450 -#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250 -#define R_028254_PA_SC_VPORT_SCISSOR_0_BR 0x028254 -#define R_028780_CB_BLEND0_CONTROL 0x028780 -#define R_028784_CB_BLEND1_CONTROL 0x028784 -#define R_028788_CB_BLEND2_CONTROL 0x028788 -#define R_02878C_CB_BLEND3_CONTROL 0x02878C -#define R_028790_CB_BLEND4_CONTROL 0x028790 -#define R_028794_CB_BLEND5_CONTROL 0x028794 -#define R_028798_CB_BLEND6_CONTROL 0x028798 -#define R_02879C_CB_BLEND7_CONTROL 0x02879C -#define R_028804_CB_BLEND_CONTROL 0x028804 -#define R_028028_DB_STENCIL_CLEAR 0x028028 -#define R_02802C_DB_DEPTH_CLEAR 0x02802C -#define R_028430_DB_STENCILREFMASK 0x028430 -#define R_028434_DB_STENCILREFMASK_BF 0x028434 -#define R_028800_DB_DEPTH_CONTROL 0x028800 -#define R_02880C_DB_SHADER_CONTROL 0x02880C -#define R_028D0C_DB_RENDER_CONTROL 0x028D0C -#define S_028D0C_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0) -#define S_028D0C_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1) -#define S_028D0C_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2) -#define S_028D0C_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3) -#define S_028D0C_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4) -#define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) -#define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) -#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7) -#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8) -#define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15) -#define R_028D10_DB_RENDER_OVERRIDE 0x028D10 -#define R_028D2C_DB_SRESULTS_COMPARE_STATE1 0x028D2C -#define R_028D30_DB_PRELOAD_CONTROL 0x028D30 -#define R_028D44_DB_ALPHA_TO_MASK 0x028D44 -#define R_028868_SQ_PGM_RESOURCES_VS 0x028868 -#define R_0286CC_SPI_PS_IN_CONTROL_0 0x0286CC -#define R_0286D0_SPI_PS_IN_CONTROL_1 0x0286D0 -#define R_028644_SPI_PS_INPUT_CNTL_0 0x028644 -#define R_028648_SPI_PS_INPUT_CNTL_1 0x028648 -#define R_02864C_SPI_PS_INPUT_CNTL_2 0x02864C -#define R_028650_SPI_PS_INPUT_CNTL_3 0x028650 -#define R_028654_SPI_PS_INPUT_CNTL_4 0x028654 -#define R_028658_SPI_PS_INPUT_CNTL_5 0x028658 -#define R_02865C_SPI_PS_INPUT_CNTL_6 0x02865C -#define R_028660_SPI_PS_INPUT_CNTL_7 0x028660 -#define R_028664_SPI_PS_INPUT_CNTL_8 0x028664 -#define R_028668_SPI_PS_INPUT_CNTL_9 0x028668 -#define R_02866C_SPI_PS_INPUT_CNTL_10 0x02866C -#define R_028670_SPI_PS_INPUT_CNTL_11 0x028670 -#define R_028674_SPI_PS_INPUT_CNTL_12 0x028674 -#define R_028678_SPI_PS_INPUT_CNTL_13 0x028678 -#define R_02867C_SPI_PS_INPUT_CNTL_14 0x02867C -#define R_028680_SPI_PS_INPUT_CNTL_15 0x028680 -#define R_028684_SPI_PS_INPUT_CNTL_16 0x028684 -#define R_028688_SPI_PS_INPUT_CNTL_17 0x028688 -#define R_02868C_SPI_PS_INPUT_CNTL_18 0x02868C -#define R_028690_SPI_PS_INPUT_CNTL_19 0x028690 -#define R_028694_SPI_PS_INPUT_CNTL_20 0x028694 -#define R_028698_SPI_PS_INPUT_CNTL_21 0x028698 -#define R_02869C_SPI_PS_INPUT_CNTL_22 0x02869C -#define R_0286A0_SPI_PS_INPUT_CNTL_23 0x0286A0 -#define R_0286A4_SPI_PS_INPUT_CNTL_24 0x0286A4 -#define R_0286A8_SPI_PS_INPUT_CNTL_25 0x0286A8 -#define R_0286AC_SPI_PS_INPUT_CNTL_26 0x0286AC -#define R_0286B0_SPI_PS_INPUT_CNTL_27 0x0286B0 -#define R_0286B4_SPI_PS_INPUT_CNTL_28 0x0286B4 -#define R_0286B8_SPI_PS_INPUT_CNTL_29 0x0286B8 -#define R_0286BC_SPI_PS_INPUT_CNTL_30 0x0286BC -#define R_0286C0_SPI_PS_INPUT_CNTL_31 0x0286C0 -#define R_028850_SQ_PGM_RESOURCES_PS 0x028850 -#define R_028854_SQ_PGM_EXPORTS_PS 0x028854 -#define R_008958_VGT_PRIMITIVE_TYPE 0x008958 -#define R_028A7C_VGT_DMA_INDEX_TYPE 0x028A7C -#define R_028A88_VGT_DMA_NUM_INSTANCES 0x028A88 -#define R_008970_VGT_NUM_INDICES 0x008970 -#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 -#define R_028238_CB_TARGET_MASK 0x028238 -#define R_02823C_CB_SHADER_MASK 0x02823C -#define R_028060_CB_COLOR0_SIZE 0x028060 -#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028060_PITCH_TILE_MAX 0xFFFFFC00 -#define S_028060_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) -#define G_028060_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) -#define C_028060_SLICE_TILE_MAX 0xC00003FF -#define R_028064_CB_COLOR1_SIZE 0x028064 -#define R_028068_CB_COLOR2_SIZE 0x028068 -#define R_02806C_CB_COLOR3_SIZE 0x02806C -#define R_028070_CB_COLOR4_SIZE 0x028070 -#define R_028074_CB_COLOR5_SIZE 0x028074 -#define R_028078_CB_COLOR6_SIZE 0x028078 -#define R_02807C_CB_COLOR7_SIZE 0x02807C -#define R_028040_CB_COLOR0_BASE 0x028040 -#define R_028044_CB_COLOR1_BASE 0x028044 -#define R_028048_CB_COLOR2_BASE 0x028048 -#define R_02804C_CB_COLOR3_BASE 0x02804C -#define R_028050_CB_COLOR4_BASE 0x028050 -#define R_028054_CB_COLOR5_BASE 0x028054 -#define R_028058_CB_COLOR6_BASE 0x028058 -#define R_02805C_CB_COLOR7_BASE 0x02805C -#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 -#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028240_TL_X 0xFFFFC000 -#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028240_TL_Y 0xC000FFFF -#define R_028C04_PA_SC_AA_CONFIG 0x028C04 -#define S_028C04_MSAA_NUM_SAMPLES(x) (((x) & 0x3) << 0) -#define G_028C04_MSAA_NUM_SAMPLES(x) (((x) >> 0) & 0x3) -#define C_028C04_MSAA_NUM_SAMPLES 0xFFFFFFFC -#define S_028C04_AA_MASK_CENTROID_DTMN(x) (((x) & 0x1) << 4) -#define G_028C04_AA_MASK_CENTROID_DTMN(x) (((x) >> 4) & 0x1) -#define C_028C04_AA_MASK_CENTROID_DTMN 0xFFFFFFEF -#define S_028C04_MAX_SAMPLE_DIST(x) (((x) & 0xF) << 13) -#define G_028C04_MAX_SAMPLE_DIST(x) (((x) >> 13) & 0xF) -#define C_028C04_MAX_SAMPLE_DIST 0xFFFE1FFF -#define R_0288CC_SQ_PGM_CF_OFFSET_PS 0x0288CC -#define R_0288DC_SQ_PGM_CF_OFFSET_FS 0x0288DC -#define R_0288D0_SQ_PGM_CF_OFFSET_VS 0x0288D0 -#define R_028840_SQ_PGM_START_PS 0x028840 -#define R_028894_SQ_PGM_START_FS 0x028894 -#define R_028858_SQ_PGM_START_VS 0x028858 -#define R_028080_CB_COLOR0_VIEW 0x028080 -#define S_028080_SLICE_START(x) (((x) & 0x7FF) << 0) -#define G_028080_SLICE_START(x) (((x) >> 0) & 0x7FF) -#define C_028080_SLICE_START 0xFFFFF800 -#define S_028080_SLICE_MAX(x) (((x) & 0x7FF) << 13) -#define G_028080_SLICE_MAX(x) (((x) >> 13) & 0x7FF) -#define C_028080_SLICE_MAX 0xFF001FFF -#define R_028084_CB_COLOR1_VIEW 0x028084 -#define R_028088_CB_COLOR2_VIEW 0x028088 -#define R_02808C_CB_COLOR3_VIEW 0x02808C -#define R_028090_CB_COLOR4_VIEW 0x028090 -#define R_028094_CB_COLOR5_VIEW 0x028094 -#define R_028098_CB_COLOR6_VIEW 0x028098 -#define R_02809C_CB_COLOR7_VIEW 0x02809C -#define R_028100_CB_COLOR0_MASK 0x028100 -#define S_028100_CMASK_BLOCK_MAX(x) (((x) & 0xFFF) << 0) -#define G_028100_CMASK_BLOCK_MAX(x) (((x) >> 0) & 0xFFF) -#define C_028100_CMASK_BLOCK_MAX 0xFFFFF000 -#define S_028100_FMASK_TILE_MAX(x) (((x) & 0xFFFFF) << 12) -#define G_028100_FMASK_TILE_MAX(x) (((x) >> 12) & 0xFFFFF) -#define C_028100_FMASK_TILE_MAX 0x00000FFF -#define R_028104_CB_COLOR1_MASK 0x028104 -#define R_028108_CB_COLOR2_MASK 0x028108 -#define R_02810C_CB_COLOR3_MASK 0x02810C -#define R_028110_CB_COLOR4_MASK 0x028110 -#define R_028114_CB_COLOR5_MASK 0x028114 -#define R_028118_CB_COLOR6_MASK 0x028118 -#define R_02811C_CB_COLOR7_MASK 0x02811C -#define R_028040_CB_COLOR0_BASE 0x028040 -#define S_028040_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028040_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028040_BASE_256B 0x00000000 -#define R_0280E0_CB_COLOR0_FRAG 0x0280E0 -#define S_0280E0_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0280E0_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0280E0_BASE_256B 0x00000000 -#define R_0280E4_CB_COLOR1_FRAG 0x0280E4 -#define R_0280E8_CB_COLOR2_FRAG 0x0280E8 -#define R_0280EC_CB_COLOR3_FRAG 0x0280EC -#define R_0280F0_CB_COLOR4_FRAG 0x0280F0 -#define R_0280F4_CB_COLOR5_FRAG 0x0280F4 -#define R_0280F8_CB_COLOR6_FRAG 0x0280F8 -#define R_0280FC_CB_COLOR7_FRAG 0x0280FC -#define R_0280C0_CB_COLOR0_TILE 0x0280C0 -#define S_0280C0_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0280C0_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0280C0_BASE_256B 0x00000000 -#define R_0280C4_CB_COLOR1_TILE 0x0280C4 -#define R_0280C8_CB_COLOR2_TILE 0x0280C8 -#define R_0280CC_CB_COLOR3_TILE 0x0280CC -#define R_0280D0_CB_COLOR4_TILE 0x0280D0 -#define R_0280D4_CB_COLOR5_TILE 0x0280D4 -#define R_0280D8_CB_COLOR6_TILE 0x0280D8 -#define R_0280DC_CB_COLOR7_TILE 0x0280DC -#define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB -#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) -#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 -#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4) -#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7) -#define C_028808_SPECIAL_OP 0xFFFFFF8F -#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7) -#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1) -#define C_028808_PER_MRT_BLEND 0xFFFFFF7F -#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8) -#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF) -#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF -#define S_028808_ROP3(x) (((x) & 0xFF) << 16) -#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) -#define C_028808_ROP3 0xFF00FFFF -#define R_028614_SPI_VS_OUT_ID_0 0x028614 -#define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0) -#define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF) -#define C_028614_SEMANTIC_0 0xFFFFFF00 -#define S_028614_SEMANTIC_1(x) (((x) & 0xFF) << 8) -#define G_028614_SEMANTIC_1(x) (((x) >> 8) & 0xFF) -#define C_028614_SEMANTIC_1 0xFFFF00FF -#define S_028614_SEMANTIC_2(x) (((x) & 0xFF) << 16) -#define G_028614_SEMANTIC_2(x) (((x) >> 16) & 0xFF) -#define C_028614_SEMANTIC_2 0xFF00FFFF -#define S_028614_SEMANTIC_3(x) (((x) & 0xFF) << 24) -#define G_028614_SEMANTIC_3(x) (((x) >> 24) & 0xFF) -#define C_028614_SEMANTIC_3 0x00FFFFFF -#define R_028618_SPI_VS_OUT_ID_1 0x028618 -#define R_02861C_SPI_VS_OUT_ID_2 0x02861C -#define R_028620_SPI_VS_OUT_ID_3 0x028620 -#define R_028624_SPI_VS_OUT_ID_4 0x028624 -#define R_028628_SPI_VS_OUT_ID_5 0x028628 -#define R_02862C_SPI_VS_OUT_ID_6 0x02862C -#define R_028630_SPI_VS_OUT_ID_7 0x028630 -#define R_028634_SPI_VS_OUT_ID_8 0x028634 -#define R_028638_SPI_VS_OUT_ID_9 0x028638 -#define R_038000_SQ_TEX_RESOURCE_WORD0_0 0x038000 -#define S_038000_DIM(x) (((x) & 0x7) << 0) -#define G_038000_DIM(x) (((x) >> 0) & 0x7) -#define C_038000_DIM 0xFFFFFFF8 -#define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) -#define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) -#define C_038000_TILE_MODE 0xFFFFFF87 -#define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) -#define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) -#define C_038000_TILE_TYPE 0xFFFFFF7F -#define S_038000_PITCH(x) (((x) & 0x7FF) << 8) -#define G_038000_PITCH(x) (((x) >> 8) & 0x7FF) -#define C_038000_PITCH 0xFFF800FF -#define S_038000_TEX_WIDTH(x) (((x) & 0x1FFF) << 19) -#define G_038000_TEX_WIDTH(x) (((x) >> 19) & 0x1FFF) -#define C_038000_TEX_WIDTH 0x0007FFFF -#define R_038004_SQ_TEX_RESOURCE_WORD1_0 0x038004 -#define S_038004_TEX_HEIGHT(x) (((x) & 0x1FFF) << 0) -#define G_038004_TEX_HEIGHT(x) (((x) >> 0) & 0x1FFF) -#define C_038004_TEX_HEIGHT 0xFFFFE000 -#define S_038004_TEX_DEPTH(x) (((x) & 0x1FFF) << 13) -#define G_038004_TEX_DEPTH(x) (((x) >> 13) & 0x1FFF) -#define C_038004_TEX_DEPTH 0xFC001FFF -#define S_038004_DATA_FORMAT(x) (((x) & 0x3F) << 26) -#define G_038004_DATA_FORMAT(x) (((x) >> 26) & 0x3F) -#define C_038004_DATA_FORMAT 0x03FFFFFF -#define V_038004_COLOR_INVALID 0x00000000 -#define V_038004_COLOR_8 0x00000001 -#define V_038004_COLOR_4_4 0x00000002 -#define V_038004_COLOR_3_3_2 0x00000003 -#define V_038004_COLOR_16 0x00000005 -#define V_038004_COLOR_16_FLOAT 0x00000006 -#define V_038004_COLOR_8_8 0x00000007 -#define V_038004_COLOR_5_6_5 0x00000008 -#define V_038004_COLOR_6_5_5 0x00000009 -#define V_038004_COLOR_1_5_5_5 0x0000000A -#define V_038004_COLOR_4_4_4_4 0x0000000B -#define V_038004_COLOR_5_5_5_1 0x0000000C -#define V_038004_COLOR_32 0x0000000D -#define V_038004_COLOR_32_FLOAT 0x0000000E -#define V_038004_COLOR_16_16 0x0000000F -#define V_038004_COLOR_16_16_FLOAT 0x00000010 -#define V_038004_COLOR_8_24 0x00000011 -#define V_038004_COLOR_8_24_FLOAT 0x00000012 -#define V_038004_COLOR_24_8 0x00000013 -#define V_038004_COLOR_24_8_FLOAT 0x00000014 -#define V_038004_COLOR_10_11_11 0x00000015 -#define V_038004_COLOR_10_11_11_FLOAT 0x00000016 -#define V_038004_COLOR_11_11_10 0x00000017 -#define V_038004_COLOR_11_11_10_FLOAT 0x00000018 -#define V_038004_COLOR_2_10_10_10 0x00000019 -#define V_038004_COLOR_8_8_8_8 0x0000001A -#define V_038004_COLOR_10_10_10_2 0x0000001B -#define V_038004_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_038004_COLOR_32_32 0x0000001D -#define V_038004_COLOR_32_32_FLOAT 0x0000001E -#define V_038004_COLOR_16_16_16_16 0x0000001F -#define V_038004_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_038004_COLOR_32_32_32_32 0x00000022 -#define V_038004_COLOR_32_32_32_32_FLOAT 0x00000023 -#define R_038008_SQ_TEX_RESOURCE_WORD2_0 0x038008 -#define S_038008_BASE_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_038008_BASE_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_038008_BASE_ADDRESS 0x00000000 -#define R_03800C_SQ_TEX_RESOURCE_WORD3_0 0x03800C -#define S_03800C_MIP_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_03800C_MIP_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_03800C_MIP_ADDRESS 0x00000000 -#define R_038010_SQ_TEX_RESOURCE_WORD4_0 0x038010 -#define S_038010_FORMAT_COMP_X(x) (((x) & 0x3) << 0) -#define G_038010_FORMAT_COMP_X(x) (((x) >> 0) & 0x3) -#define C_038010_FORMAT_COMP_X 0xFFFFFFFC -#define S_038010_FORMAT_COMP_Y(x) (((x) & 0x3) << 2) -#define G_038010_FORMAT_COMP_Y(x) (((x) >> 2) & 0x3) -#define C_038010_FORMAT_COMP_Y 0xFFFFFFF3 -#define S_038010_FORMAT_COMP_Z(x) (((x) & 0x3) << 4) -#define G_038010_FORMAT_COMP_Z(x) (((x) >> 4) & 0x3) -#define C_038010_FORMAT_COMP_Z 0xFFFFFFCF -#define S_038010_FORMAT_COMP_W(x) (((x) & 0x3) << 6) -#define G_038010_FORMAT_COMP_W(x) (((x) >> 6) & 0x3) -#define C_038010_FORMAT_COMP_W 0xFFFFFF3F -#define S_038010_NUM_FORMAT_ALL(x) (((x) & 0x3) << 8) -#define G_038010_NUM_FORMAT_ALL(x) (((x) >> 8) & 0x3) -#define C_038010_NUM_FORMAT_ALL 0xFFFFFCFF -#define S_038010_SRF_MODE_ALL(x) (((x) & 0x1) << 10) -#define G_038010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1) -#define C_038010_SRF_MODE_ALL 0xFFFFFBFF -#define S_038010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11) -#define G_038010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1) -#define C_038010_FORCE_DEGAMMA 0xFFFFF7FF -#define S_038010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) -#define G_038010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) -#define C_038010_ENDIAN_SWAP 0xFFFFCFFF -#define S_038010_REQUEST_SIZE(x) (((x) & 0x3) << 14) -#define G_038010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) -#define C_038010_REQUEST_SIZE 0xFFFF3FFF -#define S_038010_DST_SEL_X(x) (((x) & 0x7) << 16) -#define G_038010_DST_SEL_X(x) (((x) >> 16) & 0x7) -#define C_038010_DST_SEL_X 0xFFF8FFFF -#define S_038010_DST_SEL_Y(x) (((x) & 0x7) << 19) -#define G_038010_DST_SEL_Y(x) (((x) >> 19) & 0x7) -#define C_038010_DST_SEL_Y 0xFFC7FFFF -#define S_038010_DST_SEL_Z(x) (((x) & 0x7) << 22) -#define G_038010_DST_SEL_Z(x) (((x) >> 22) & 0x7) -#define C_038010_DST_SEL_Z 0xFE3FFFFF -#define S_038010_DST_SEL_W(x) (((x) & 0x7) << 25) -#define G_038010_DST_SEL_W(x) (((x) >> 25) & 0x7) -#define C_038010_DST_SEL_W 0xF1FFFFFF -#define S_038010_BASE_LEVEL(x) (((x) & 0xF) << 28) -#define G_038010_BASE_LEVEL(x) (((x) >> 28) & 0xF) -#define C_038010_BASE_LEVEL 0x0FFFFFFF -#define R_038014_SQ_TEX_RESOURCE_WORD5_0 0x038014 -#define S_038014_LAST_LEVEL(x) (((x) & 0xF) << 0) -#define G_038014_LAST_LEVEL(x) (((x) >> 0) & 0xF) -#define C_038014_LAST_LEVEL 0xFFFFFFF0 -#define S_038014_BASE_ARRAY(x) (((x) & 0x1FFF) << 4) -#define G_038014_BASE_ARRAY(x) (((x) >> 4) & 0x1FFF) -#define C_038014_BASE_ARRAY 0xFFFE000F -#define S_038014_LAST_ARRAY(x) (((x) & 0x1FFF) << 17) -#define G_038014_LAST_ARRAY(x) (((x) >> 17) & 0x1FFF) -#define C_038014_LAST_ARRAY 0xC001FFFF -#define R_038018_SQ_TEX_RESOURCE_WORD6_0 0x038018 -#define S_038018_MPEG_CLAMP(x) (((x) & 0x3) << 0) -#define G_038018_MPEG_CLAMP(x) (((x) >> 0) & 0x3) -#define C_038018_MPEG_CLAMP 0xFFFFFFFC -#define S_038018_PERF_MODULATION(x) (((x) & 0x7) << 5) -#define G_038018_PERF_MODULATION(x) (((x) >> 5) & 0x7) -#define C_038018_PERF_MODULATION 0xFFFFFF1F -#define S_038018_INTERLACED(x) (((x) & 0x1) << 8) -#define G_038018_INTERLACED(x) (((x) >> 8) & 0x1) -#define C_038018_INTERLACED 0xFFFFFEFF -#define S_038018_TYPE(x) (((x) & 0x3) << 30) -#define G_038018_TYPE(x) (((x) >> 30) & 0x3) -#define C_038018_TYPE 0x3FFFFFFF -#define R_008040_WAIT_UNTIL 0x008040 -#define S_008040_WAIT_CP_DMA_IDLE(x) (((x) & 0x1) << 8) -#define G_008040_WAIT_CP_DMA_IDLE(x) (((x) >> 8) & 0x1) -#define C_008040_WAIT_CP_DMA_IDLE 0xFFFFFEFF -#define S_008040_WAIT_CMDFIFO(x) (((x) & 0x1) << 10) -#define G_008040_WAIT_CMDFIFO(x) (((x) >> 10) & 0x1) -#define C_008040_WAIT_CMDFIFO 0xFFFFFBFF -#define S_008040_WAIT_2D_IDLE(x) (((x) & 0x1) << 14) -#define G_008040_WAIT_2D_IDLE(x) (((x) >> 14) & 0x1) -#define C_008040_WAIT_2D_IDLE 0xFFFFBFFF -#define S_008040_WAIT_3D_IDLE(x) (((x) & 0x1) << 15) -#define G_008040_WAIT_3D_IDLE(x) (((x) >> 15) & 0x1) -#define C_008040_WAIT_3D_IDLE 0xFFFF7FFF -#define S_008040_WAIT_2D_IDLECLEAN(x) (((x) & 0x1) << 16) -#define G_008040_WAIT_2D_IDLECLEAN(x) (((x) >> 16) & 0x1) -#define C_008040_WAIT_2D_IDLECLEAN 0xFFFEFFFF -#define S_008040_WAIT_3D_IDLECLEAN(x) (((x) & 0x1) << 17) -#define G_008040_WAIT_3D_IDLECLEAN(x) (((x) >> 17) & 0x1) -#define C_008040_WAIT_3D_IDLECLEAN 0xFFFDFFFF -#define S_008040_WAIT_EXTERN_SIG(x) (((x) & 0x1) << 19) -#define G_008040_WAIT_EXTERN_SIG(x) (((x) >> 19) & 0x1) -#define C_008040_WAIT_EXTERN_SIG 0xFFF7FFFF -#define S_008040_CMDFIFO_ENTRIES(x) (((x) & 0x1F) << 20) -#define G_008040_CMDFIFO_ENTRIES(x) (((x) >> 20) & 0x1F) -#define C_008040_CMDFIFO_ENTRIES 0xFE0FFFFF -#define R_008958_VGT_PRIMITIVE_TYPE 0x008958 -#define S_008958_PRIM_TYPE(x) (((x) & 0x3F) << 0) -#define G_008958_PRIM_TYPE(x) (((x) >> 0) & 0x3F) -#define C_008958_PRIM_TYPE 0xFFFFFFC0 -#define R_008C00_SQ_CONFIG 0x008C00 -#define S_008C00_VC_ENABLE(x) (((x) & 0x1) << 0) -#define G_008C00_VC_ENABLE(x) (((x) >> 0) & 0x1) -#define C_008C00_VC_ENABLE 0xFFFFFFFE -#define S_008C00_EXPORT_SRC_C(x) (((x) & 0x1) << 1) -#define G_008C00_EXPORT_SRC_C(x) (((x) >> 1) & 0x1) -#define C_008C00_EXPORT_SRC_C 0xFFFFFFFD -#define S_008C00_DX9_CONSTS(x) (((x) & 0x1) << 2) -#define G_008C00_DX9_CONSTS(x) (((x) >> 2) & 0x1) -#define C_008C00_DX9_CONSTS 0xFFFFFFFB -#define S_008C00_ALU_INST_PREFER_VECTOR(x) (((x) & 0x1) << 3) -#define G_008C00_ALU_INST_PREFER_VECTOR(x) (((x) >> 3) & 0x1) -#define C_008C00_ALU_INST_PREFER_VECTOR 0xFFFFFFF7 -#define S_008C00_DX10_CLAMP(x) (((x) & 0x1) << 4) -#define G_008C00_DX10_CLAMP(x) (((x) >> 4) & 0x1) -#define C_008C00_DX10_CLAMP 0xFFFFFFEF -#define S_008C00_ALU_PREFER_ONE_WATERFALL(x) (((x) & 0x1) << 5) -#define G_008C00_ALU_PREFER_ONE_WATERFALL(x) (((x) >> 5) & 0x1) -#define C_008C00_ALU_PREFER_ONE_WATERFALL 0xFFFFFFDF -#define S_008C00_ALU_MAX_ONE_WATERFALL(x) (((x) & 0x1) << 6) -#define G_008C00_ALU_MAX_ONE_WATERFALL(x) (((x) >> 6) & 0x1) -#define C_008C00_ALU_MAX_ONE_WATERFALL 0xFFFFFFBF -#define S_008C00_CLAUSE_SEQ_PRIO(x) (((x) & 0x3) << 8) -#define G_008C00_CLAUSE_SEQ_PRIO(x) (((x) >> 8) & 0x3) -#define C_008C00_CLAUSE_SEQ_PRIO 0xFFFFFCFF -#define S_008C00_PS_PRIO(x) (((x) & 0x3) << 24) -#define G_008C00_PS_PRIO(x) (((x) >> 24) & 0x3) -#define C_008C00_PS_PRIO 0xFCFFFFFF -#define S_008C00_VS_PRIO(x) (((x) & 0x3) << 26) -#define G_008C00_VS_PRIO(x) (((x) >> 26) & 0x3) -#define C_008C00_VS_PRIO 0xF3FFFFFF -#define S_008C00_GS_PRIO(x) (((x) & 0x3) << 28) -#define G_008C00_GS_PRIO(x) (((x) >> 28) & 0x3) -#define C_008C00_GS_PRIO 0xCFFFFFFF -#define S_008C00_ES_PRIO(x) (((x) & 0x3) << 30) -#define G_008C00_ES_PRIO(x) (((x) >> 30) & 0x3) -#define C_008C00_ES_PRIO 0x3FFFFFFF -#define R_008C04_SQ_GPR_RESOURCE_MGMT_1 0x008C04 -#define S_008C04_NUM_PS_GPRS(x) (((x) & 0xFF) << 0) -#define G_008C04_NUM_PS_GPRS(x) (((x) >> 0) & 0xFF) -#define C_008C04_NUM_PS_GPRS 0xFFFFFF00 -#define S_008C04_NUM_VS_GPRS(x) (((x) & 0xFF) << 16) -#define G_008C04_NUM_VS_GPRS(x) (((x) >> 16) & 0xFF) -#define C_008C04_NUM_VS_GPRS 0xFF00FFFF -#define S_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) & 0xF) << 28) -#define G_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) >> 28) & 0xF) -#define C_008C04_NUM_CLAUSE_TEMP_GPRS 0x0FFFFFFF -#define R_008C08_SQ_GPR_RESOURCE_MGMT_2 0x008C08 -#define S_008C08_NUM_GS_GPRS(x) (((x) & 0xFF) << 0) -#define G_008C08_NUM_GS_GPRS(x) (((x) >> 0) & 0xFF) -#define C_008C08_NUM_GS_GPRS 0xFFFFFF00 -#define S_008C08_NUM_ES_GPRS(x) (((x) & 0xFF) << 16) -#define G_008C08_NUM_ES_GPRS(x) (((x) >> 16) & 0xFF) -#define C_008C08_NUM_ES_GPRS 0xFF00FFFF -#define R_008C0C_SQ_THREAD_RESOURCE_MGMT 0x008C0C -#define S_008C0C_NUM_PS_THREADS(x) (((x) & 0xFF) << 0) -#define G_008C0C_NUM_PS_THREADS(x) (((x) >> 0) & 0xFF) -#define C_008C0C_NUM_PS_THREADS 0xFFFFFF00 -#define S_008C0C_NUM_VS_THREADS(x) (((x) & 0xFF) << 8) -#define G_008C0C_NUM_VS_THREADS(x) (((x) >> 8) & 0xFF) -#define C_008C0C_NUM_VS_THREADS 0xFFFF00FF -#define S_008C0C_NUM_GS_THREADS(x) (((x) & 0xFF) << 16) -#define G_008C0C_NUM_GS_THREADS(x) (((x) >> 16) & 0xFF) -#define C_008C0C_NUM_GS_THREADS 0xFF00FFFF -#define S_008C0C_NUM_ES_THREADS(x) (((x) & 0xFF) << 24) -#define G_008C0C_NUM_ES_THREADS(x) (((x) >> 24) & 0xFF) -#define C_008C0C_NUM_ES_THREADS 0x00FFFFFF -#define R_008C10_SQ_STACK_RESOURCE_MGMT_1 0x008C10 -#define S_008C10_NUM_PS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0) -#define G_008C10_NUM_PS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF) -#define C_008C10_NUM_PS_STACK_ENTRIES 0xFFFFF000 -#define S_008C10_NUM_VS_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) -#define G_008C10_NUM_VS_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) -#define C_008C10_NUM_VS_STACK_ENTRIES 0xF000FFFF -#define R_008C14_SQ_STACK_RESOURCE_MGMT_2 0x008C14 -#define S_008C14_NUM_GS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0) -#define G_008C14_NUM_GS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF) -#define C_008C14_NUM_GS_STACK_ENTRIES 0xFFFFF000 -#define S_008C14_NUM_ES_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) -#define G_008C14_NUM_ES_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) -#define C_008C14_NUM_ES_STACK_ENTRIES 0xF000FFFF -#define R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x008D8C -#define S_008D8C_RING0_OFFSET(x) (((x) & 0xFF) << 0) -#define G_008D8C_RING0_OFFSET(x) (((x) >> 0) & 0xFF) -#define C_008D8C_RING0_OFFSET 0xFFFFFF00 -#define S_008D8C_ISOLATE_ES_ENABLE(x) (((x) & 0x1) << 12) -#define G_008D8C_ISOLATE_ES_ENABLE(x) (((x) >> 12) & 0x1) -#define C_008D8C_ISOLATE_ES_ENABLE 0xFFFFEFFF -#define S_008D8C_ISOLATE_GS_ENABLE(x) (((x) & 0x1) << 13) -#define G_008D8C_ISOLATE_GS_ENABLE(x) (((x) >> 13) & 0x1) -#define C_008D8C_ISOLATE_GS_ENABLE 0xFFFFDFFF -#define S_008D8C_VS_PC_LIMIT_ENABLE(x) (((x) & 0x1) << 14) -#define G_008D8C_VS_PC_LIMIT_ENABLE(x) (((x) >> 14) & 0x1) -#define C_008D8C_VS_PC_LIMIT_ENABLE 0xFFFFBFFF -#define R_009508_TA_CNTL_AUX 0x009508 -#define S_009508_DISABLE_CUBE_WRAP(x) (((x) & 0x1) << 0) -#define G_009508_DISABLE_CUBE_WRAP(x) (((x) >> 0) & 0x1) -#define C_009508_DISABLE_CUBE_WRAP 0xFFFFFFFE -#define S_009508_SYNC_GRADIENT(x) (((x) & 0x1) << 24) -#define G_009508_SYNC_GRADIENT(x) (((x) >> 24) & 0x1) -#define C_009508_SYNC_GRADIENT 0xFEFFFFFF -#define S_009508_SYNC_WALKER(x) (((x) & 0x1) << 25) -#define G_009508_SYNC_WALKER(x) (((x) >> 25) & 0x1) -#define C_009508_SYNC_WALKER 0xFDFFFFFF -#define S_009508_SYNC_ALIGNER(x) (((x) & 0x1) << 26) -#define G_009508_SYNC_ALIGNER(x) (((x) >> 26) & 0x1) -#define C_009508_SYNC_ALIGNER 0xFBFFFFFF -#define S_009508_BILINEAR_PRECISION(x) (((x) & 0x1) << 31) -#define G_009508_BILINEAR_PRECISION(x) (((x) >> 31) & 0x1) -#define C_009508_BILINEAR_PRECISION 0x7FFFFFFF -#define R_009714_VC_ENHANCE 0x009714 -#define R_009830_DB_DEBUG 0x009830 -#define R_009838_DB_WATERMARKS 0x009838 -#define S_009838_DEPTH_FREE(x) (((x) & 0x1F) << 0) -#define G_009838_DEPTH_FREE(x) (((x) >> 0) & 0x1F) -#define C_009838_DEPTH_FREE 0xFFFFFFE0 -#define S_009838_DEPTH_FLUSH(x) (((x) & 0x3F) << 5) -#define G_009838_DEPTH_FLUSH(x) (((x) >> 5) & 0x3F) -#define C_009838_DEPTH_FLUSH 0xFFFFF81F -#define S_009838_FORCE_SUMMARIZE(x) (((x) & 0xF) << 11) -#define G_009838_FORCE_SUMMARIZE(x) (((x) >> 11) & 0xF) -#define C_009838_FORCE_SUMMARIZE 0xFFFF87FF -#define S_009838_DEPTH_PENDING_FREE(x) (((x) & 0x1F) << 15) -#define G_009838_DEPTH_PENDING_FREE(x) (((x) >> 15) & 0x1F) -#define C_009838_DEPTH_PENDING_FREE 0xFFF07FFF -#define S_009838_DEPTH_CACHELINE_FREE(x) (((x) & 0x1F) << 20) -#define G_009838_DEPTH_CACHELINE_FREE(x) (((x) >> 20) & 0x1F) -#define C_009838_DEPTH_CACHELINE_FREE 0xFE0FFFFF -#define S_009838_EARLY_Z_PANIC_DISABLE(x) (((x) & 0x1) << 25) -#define G_009838_EARLY_Z_PANIC_DISABLE(x) (((x) >> 25) & 0x1) -#define C_009838_EARLY_Z_PANIC_DISABLE 0xFDFFFFFF -#define S_009838_LATE_Z_PANIC_DISABLE(x) (((x) & 0x1) << 26) -#define G_009838_LATE_Z_PANIC_DISABLE(x) (((x) >> 26) & 0x1) -#define C_009838_LATE_Z_PANIC_DISABLE 0xFBFFFFFF -#define S_009838_RE_Z_PANIC_DISABLE(x) (((x) & 0x1) << 27) -#define G_009838_RE_Z_PANIC_DISABLE(x) (((x) >> 27) & 0x1) -#define C_009838_RE_Z_PANIC_DISABLE 0xF7FFFFFF -#define S_009838_DB_EXTRA_DEBUG(x) (((x) & 0xF) << 28) -#define G_009838_DB_EXTRA_DEBUG(x) (((x) >> 28) & 0xF) -#define C_009838_DB_EXTRA_DEBUG 0x0FFFFFFF -#define R_028030_PA_SC_SCREEN_SCISSOR_TL 0x028030 -#define S_028030_TL_X(x) (((x) & 0x7FFF) << 0) -#define G_028030_TL_X(x) (((x) >> 0) & 0x7FFF) -#define C_028030_TL_X 0xFFFF8000 -#define S_028030_TL_Y(x) (((x) & 0x7FFF) << 16) -#define G_028030_TL_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028030_TL_Y 0x8000FFFF -#define R_028034_PA_SC_SCREEN_SCISSOR_BR 0x028034 -#define S_028034_BR_X(x) (((x) & 0x7FFF) << 0) -#define G_028034_BR_X(x) (((x) >> 0) & 0x7FFF) -#define C_028034_BR_X 0xFFFF8000 -#define S_028034_BR_Y(x) (((x) & 0x7FFF) << 16) -#define G_028034_BR_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028034_BR_Y 0x8000FFFF -#define R_028200_PA_SC_WINDOW_OFFSET 0x028200 -#define S_028200_WINDOW_X_OFFSET(x) (((x) & 0x7FFF) << 0) -#define G_028200_WINDOW_X_OFFSET(x) (((x) >> 0) & 0x7FFF) -#define C_028200_WINDOW_X_OFFSET 0xFFFF8000 -#define S_028200_WINDOW_Y_OFFSET(x) (((x) & 0x7FFF) << 16) -#define G_028200_WINDOW_Y_OFFSET(x) (((x) >> 16) & 0x7FFF) -#define C_028200_WINDOW_Y_OFFSET 0x8000FFFF -#define R_028204_PA_SC_WINDOW_SCISSOR_TL 0x028204 -#define S_028204_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028204_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028204_TL_X 0xFFFFC000 -#define S_028204_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028204_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028204_TL_Y 0xC000FFFF -#define S_028204_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028204_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028204_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028208_PA_SC_WINDOW_SCISSOR_BR 0x028208 -#define S_028208_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028208_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028208_BR_X 0xFFFFC000 -#define S_028208_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028208_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028208_BR_Y 0xC000FFFF -#define R_02820C_PA_SC_CLIPRECT_RULE 0x02820C -#define S_02820C_CLIP_RULE(x) (((x) & 0xFFFF) << 0) -#define G_02820C_CLIP_RULE(x) (((x) >> 0) & 0xFFFF) -#define C_02820C_CLIP_RULE 0xFFFF0000 -#define R_028210_PA_SC_CLIPRECT_0_TL 0x028210 -#define S_028210_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028210_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028210_TL_X 0xFFFFC000 -#define S_028210_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028210_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028210_TL_Y 0xC000FFFF -#define R_028214_PA_SC_CLIPRECT_0_BR 0x028214 -#define S_028214_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028214_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028214_BR_X 0xFFFFC000 -#define S_028214_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028214_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028214_BR_Y 0xC000FFFF -#define R_028218_PA_SC_CLIPRECT_1_TL 0x028218 -#define R_02821C_PA_SC_CLIPRECT_1_BR 0x02821C -#define R_028220_PA_SC_CLIPRECT_2_TL 0x028220 -#define R_028224_PA_SC_CLIPRECT_2_BR 0x028224 -#define R_028228_PA_SC_CLIPRECT_3_TL 0x028228 -#define R_02822C_PA_SC_CLIPRECT_3_BR 0x02822C -#define R_028230_PA_SC_EDGERULE 0x028230 -#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 -#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028240_TL_X 0xFFFFC000 -#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028240_TL_Y 0xC000FFFF -#define S_028240_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028240_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028240_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028244_PA_SC_GENERIC_SCISSOR_BR 0x028244 -#define S_028244_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028244_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028244_BR_X 0xFFFFC000 -#define S_028244_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028244_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028244_BR_Y 0xC000FFFF -#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0 -#define S_0282D0_VPORT_ZMIN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0282D0_VPORT_ZMIN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0282D0_VPORT_ZMIN 0x00000000 -#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4 -#define S_0282D4_VPORT_ZMAX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0282D4_VPORT_ZMAX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0282D4_VPORT_ZMAX 0x00000000 -#define R_028350_SX_MISC 0x028350 -#define S_028350_MULTIPASS(x) (((x) & 0x1) << 0) -#define G_028350_MULTIPASS(x) (((x) >> 0) & 0x1) -#define C_028350_MULTIPASS 0xFFFFFFFE -#define R_028380_SQ_VTX_SEMANTIC_0 0x028380 -#define S_028380_SEMANTIC_ID(x) (((x) & 0xFF) << 0) -#define G_028380_SEMANTIC_ID(x) (((x) >> 0) & 0xFF) -#define C_028380_SEMANTIC_ID 0xFFFFFF00 -#define R_028384_SQ_VTX_SEMANTIC_1 0x028384 -#define R_028388_SQ_VTX_SEMANTIC_2 0x028388 -#define R_02838C_SQ_VTX_SEMANTIC_3 0x02838C -#define R_028390_SQ_VTX_SEMANTIC_4 0x028390 -#define R_028394_SQ_VTX_SEMANTIC_5 0x028394 -#define R_028398_SQ_VTX_SEMANTIC_6 0x028398 -#define R_02839C_SQ_VTX_SEMANTIC_7 0x02839C -#define R_0283A0_SQ_VTX_SEMANTIC_8 0x0283A0 -#define R_0283A4_SQ_VTX_SEMANTIC_9 0x0283A4 -#define R_0283A8_SQ_VTX_SEMANTIC_10 0x0283A8 -#define R_0283AC_SQ_VTX_SEMANTIC_11 0x0283AC -#define R_0283B0_SQ_VTX_SEMANTIC_12 0x0283B0 -#define R_0283B4_SQ_VTX_SEMANTIC_13 0x0283B4 -#define R_0283B8_SQ_VTX_SEMANTIC_14 0x0283B8 -#define R_0283BC_SQ_VTX_SEMANTIC_15 0x0283BC -#define R_0283C0_SQ_VTX_SEMANTIC_16 0x0283C0 -#define R_0283C4_SQ_VTX_SEMANTIC_17 0x0283C4 -#define R_0283C8_SQ_VTX_SEMANTIC_18 0x0283C8 -#define R_0283CC_SQ_VTX_SEMANTIC_19 0x0283CC -#define R_0283D0_SQ_VTX_SEMANTIC_20 0x0283D0 -#define R_0283D4_SQ_VTX_SEMANTIC_21 0x0283D4 -#define R_0283D8_SQ_VTX_SEMANTIC_22 0x0283D8 -#define R_0283DC_SQ_VTX_SEMANTIC_23 0x0283DC -#define R_0283E0_SQ_VTX_SEMANTIC_24 0x0283E0 -#define R_0283E4_SQ_VTX_SEMANTIC_25 0x0283E4 -#define R_0283E8_SQ_VTX_SEMANTIC_26 0x0283E8 -#define R_0283EC_SQ_VTX_SEMANTIC_27 0x0283EC -#define R_0283F0_SQ_VTX_SEMANTIC_28 0x0283F0 -#define R_0283F4_SQ_VTX_SEMANTIC_29 0x0283F4 -#define R_0283F8_SQ_VTX_SEMANTIC_30 0x0283F8 -#define R_0283FC_SQ_VTX_SEMANTIC_31 0x0283FC -#define R_028400_VGT_MAX_VTX_INDX 0x028400 -#define S_028400_MAX_INDX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028400_MAX_INDX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028400_MAX_INDX 0x00000000 -#define R_028404_VGT_MIN_VTX_INDX 0x028404 -#define S_028404_MIN_INDX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028404_MIN_INDX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028404_MIN_INDX 0x00000000 -#define R_028408_VGT_INDX_OFFSET 0x028408 -#define S_028408_INDX_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028408_INDX_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028408_INDX_OFFSET 0x00000000 -#define R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX 0x02840C -#define S_02840C_RESET_INDX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02840C_RESET_INDX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02840C_RESET_INDX 0x00000000 -#define R_028410_SX_ALPHA_TEST_CONTROL 0x028410 -#define S_028410_ALPHA_FUNC(x) (((x) & 0x7) << 0) -#define G_028410_ALPHA_FUNC(x) (((x) >> 0) & 0x7) -#define C_028410_ALPHA_FUNC 0xFFFFFFF8 -#define S_028410_ALPHA_TEST_ENABLE(x) (((x) & 0x1) << 3) -#define G_028410_ALPHA_TEST_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028410_ALPHA_TEST_ENABLE 0xFFFFFFF7 -#define S_028410_ALPHA_TEST_BYPASS(x) (((x) & 0x1) << 8) -#define G_028410_ALPHA_TEST_BYPASS(x) (((x) >> 8) & 0x1) -#define C_028410_ALPHA_TEST_BYPASS 0xFFFFFEFF -#define R_028414_CB_BLEND_RED 0x028414 -#define S_028414_BLEND_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028414_BLEND_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028414_BLEND_RED 0x00000000 -#define R_028418_CB_BLEND_GREEN 0x028418 -#define S_028418_BLEND_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028418_BLEND_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028418_BLEND_GREEN 0x00000000 -#define R_02841C_CB_BLEND_BLUE 0x02841C -#define S_02841C_BLEND_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02841C_BLEND_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02841C_BLEND_BLUE 0x00000000 -#define R_028420_CB_BLEND_ALPHA 0x028420 -#define S_028420_BLEND_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028420_BLEND_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028420_BLEND_ALPHA 0x00000000 -#define R_028438_SX_ALPHA_REF 0x028438 -#define S_028438_ALPHA_REF(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028438_ALPHA_REF(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028438_ALPHA_REF 0x00000000 -#define R_0286C8_SPI_THREAD_GROUPING 0x0286C8 -#define S_0286C8_PS_GROUPING(x) (((x) & 0x1F) << 0) -#define G_0286C8_PS_GROUPING(x) (((x) >> 0) & 0x1F) -#define C_0286C8_PS_GROUPING 0xFFFFFFE0 -#define S_0286C8_VS_GROUPING(x) (((x) & 0x1F) << 8) -#define G_0286C8_VS_GROUPING(x) (((x) >> 8) & 0x1F) -#define C_0286C8_VS_GROUPING 0xFFFFE0FF -#define S_0286C8_GS_GROUPING(x) (((x) & 0x1F) << 16) -#define G_0286C8_GS_GROUPING(x) (((x) >> 16) & 0x1F) -#define C_0286C8_GS_GROUPING 0xFFE0FFFF -#define S_0286C8_ES_GROUPING(x) (((x) & 0x1F) << 24) -#define G_0286C8_ES_GROUPING(x) (((x) >> 24) & 0x1F) -#define C_0286C8_ES_GROUPING 0xE0FFFFFF -#define R_0286D8_SPI_INPUT_Z 0x0286D8 -#define S_0286D8_PROVIDE_Z_TO_SPI(x) (((x) & 0x1) << 0) -#define G_0286D8_PROVIDE_Z_TO_SPI(x) (((x) >> 0) & 0x1) -#define C_0286D8_PROVIDE_Z_TO_SPI 0xFFFFFFFE -#define R_0286DC_SPI_FOG_CNTL 0x0286DC -#define S_0286DC_PASS_FOG_THROUGH_PS(x) (((x) & 0x1) << 0) -#define G_0286DC_PASS_FOG_THROUGH_PS(x) (((x) >> 0) & 0x1) -#define C_0286DC_PASS_FOG_THROUGH_PS 0xFFFFFFFE -#define S_0286DC_PIXEL_FOG_FUNC(x) (((x) & 0x3) << 1) -#define G_0286DC_PIXEL_FOG_FUNC(x) (((x) >> 1) & 0x3) -#define C_0286DC_PIXEL_FOG_FUNC 0xFFFFFFF9 -#define S_0286DC_PIXEL_FOG_SRC_SEL(x) (((x) & 0x1) << 3) -#define G_0286DC_PIXEL_FOG_SRC_SEL(x) (((x) >> 3) & 0x1) -#define C_0286DC_PIXEL_FOG_SRC_SEL 0xFFFFFFF7 -#define S_0286DC_VS_FOG_CLAMP_DISABLE(x) (((x) & 0x1) << 4) -#define G_0286DC_VS_FOG_CLAMP_DISABLE(x) (((x) >> 4) & 0x1) -#define C_0286DC_VS_FOG_CLAMP_DISABLE 0xFFFFFFEF -#define R_0286E0_SPI_FOG_FUNC_SCALE 0x0286E0 -#define S_0286E0_VALUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0286E0_VALUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0286E0_VALUE 0x00000000 -#define R_0286E4_SPI_FOG_FUNC_BIAS 0x0286E4 -#define S_0286E4_VALUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0286E4_VALUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0286E4_VALUE 0x00000000 -#define R_0287A0_CB_SHADER_CONTROL 0x0287A0 -#define S_0287A0_RT0_ENABLE(x) (((x) & 0x1) << 0) -#define G_0287A0_RT0_ENABLE(x) (((x) >> 0) & 0x1) -#define C_0287A0_RT0_ENABLE 0xFFFFFFFE -#define S_0287A0_RT1_ENABLE(x) (((x) & 0x1) << 1) -#define G_0287A0_RT1_ENABLE(x) (((x) >> 1) & 0x1) -#define C_0287A0_RT1_ENABLE 0xFFFFFFFD -#define S_0287A0_RT2_ENABLE(x) (((x) & 0x1) << 2) -#define G_0287A0_RT2_ENABLE(x) (((x) >> 2) & 0x1) -#define C_0287A0_RT2_ENABLE 0xFFFFFFFB -#define S_0287A0_RT3_ENABLE(x) (((x) & 0x1) << 3) -#define G_0287A0_RT3_ENABLE(x) (((x) >> 3) & 0x1) -#define C_0287A0_RT3_ENABLE 0xFFFFFFF7 -#define S_0287A0_RT4_ENABLE(x) (((x) & 0x1) << 4) -#define G_0287A0_RT4_ENABLE(x) (((x) >> 4) & 0x1) -#define C_0287A0_RT4_ENABLE 0xFFFFFFEF -#define S_0287A0_RT5_ENABLE(x) (((x) & 0x1) << 5) -#define G_0287A0_RT5_ENABLE(x) (((x) >> 5) & 0x1) -#define C_0287A0_RT5_ENABLE 0xFFFFFFDF -#define S_0287A0_RT6_ENABLE(x) (((x) & 0x1) << 6) -#define G_0287A0_RT6_ENABLE(x) (((x) >> 6) & 0x1) -#define C_0287A0_RT6_ENABLE 0xFFFFFFBF -#define S_0287A0_RT7_ENABLE(x) (((x) & 0x1) << 7) -#define G_0287A0_RT7_ENABLE(x) (((x) >> 7) & 0x1) -#define C_0287A0_RT7_ENABLE 0xFFFFFF7F -#define R_028894_SQ_PGM_START_FS 0x028894 -#define S_028894_PGM_START(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028894_PGM_START(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028894_PGM_START 0x00000000 -#define R_0288A4_SQ_PGM_RESOURCES_FS 0x0288A4 -#define S_0288A4_NUM_GPRS(x) (((x) & 0xFF) << 0) -#define G_0288A4_NUM_GPRS(x) (((x) >> 0) & 0xFF) -#define C_0288A4_NUM_GPRS 0xFFFFFF00 -#define S_0288A4_STACK_SIZE(x) (((x) & 0xFF) << 8) -#define G_0288A4_STACK_SIZE(x) (((x) >> 8) & 0xFF) -#define C_0288A4_STACK_SIZE 0xFFFF00FF -#define S_0288A4_DX10_CLAMP(x) (((x) & 0x1) << 21) -#define G_0288A4_DX10_CLAMP(x) (((x) >> 21) & 0x1) -#define C_0288A4_DX10_CLAMP 0xFFDFFFFF -#define R_0288A8_SQ_ESGS_RING_ITEMSIZE 0x0288A8 -#define S_0288A8_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288A8_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288A8_ITEMSIZE 0xFFFF8000 -#define R_0288AC_SQ_GSVS_RING_ITEMSIZE 0x0288AC -#define S_0288AC_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288AC_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288AC_ITEMSIZE 0xFFFF8000 -#define R_0288B0_SQ_ESTMP_RING_ITEMSIZE 0x0288B0 -#define S_0288B0_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288B0_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288B0_ITEMSIZE 0xFFFF8000 -#define R_0288B4_SQ_GSTMP_RING_ITEMSIZE 0x0288B4 -#define S_0288B4_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288B4_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288B4_ITEMSIZE 0xFFFF8000 -#define R_0288B8_SQ_VSTMP_RING_ITEMSIZE 0x0288B8 -#define S_0288B8_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288B8_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288B8_ITEMSIZE 0xFFFF8000 -#define R_0288BC_SQ_PSTMP_RING_ITEMSIZE 0x0288BC -#define S_0288BC_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288BC_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288BC_ITEMSIZE 0xFFFF8000 -#define R_0288C0_SQ_FBUF_RING_ITEMSIZE 0x0288C0 -#define S_0288C0_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288C0_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288C0_ITEMSIZE 0xFFFF8000 -#define R_0288C4_SQ_REDUC_RING_ITEMSIZE 0x0288C4 -#define S_0288C4_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288C4_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288C4_ITEMSIZE 0xFFFF8000 -#define R_0288C8_SQ_GS_VERT_ITEMSIZE 0x0288C8 -#define S_0288C8_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288C8_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288C8_ITEMSIZE 0xFFFF8000 -#define R_0288DC_SQ_PGM_CF_OFFSET_FS 0x0288DC -#define S_0288DC_PGM_CF_OFFSET(x) (((x) & 0xFFFFF) << 0) -#define G_0288DC_PGM_CF_OFFSET(x) (((x) >> 0) & 0xFFFFF) -#define C_0288DC_PGM_CF_OFFSET 0xFFF00000 -#define R_028A10_VGT_OUTPUT_PATH_CNTL 0x028A10 -#define S_028A10_PATH_SELECT(x) (((x) & 0x3) << 0) -#define G_028A10_PATH_SELECT(x) (((x) >> 0) & 0x3) -#define C_028A10_PATH_SELECT 0xFFFFFFFC -#define R_028A14_VGT_HOS_CNTL 0x028A14 -#define S_028A14_TESS_MODE(x) (((x) & 0x3) << 0) -#define G_028A14_TESS_MODE(x) (((x) >> 0) & 0x3) -#define C_028A14_TESS_MODE 0xFFFFFFFC -#define R_028A18_VGT_HOS_MAX_TESS_LEVEL 0x028A18 -#define S_028A18_MAX_TESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028A18_MAX_TESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028A18_MAX_TESS 0x00000000 -#define R_028A1C_VGT_HOS_MIN_TESS_LEVEL 0x028A1C -#define S_028A1C_MIN_TESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028A1C_MIN_TESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028A1C_MIN_TESS 0x00000000 -#define R_028A20_VGT_HOS_REUSE_DEPTH 0x028A20 -#define S_028A20_REUSE_DEPTH(x) (((x) & 0xFF) << 0) -#define G_028A20_REUSE_DEPTH(x) (((x) >> 0) & 0xFF) -#define C_028A20_REUSE_DEPTH 0xFFFFFF00 -#define R_028A24_VGT_GROUP_PRIM_TYPE 0x028A24 -#define S_028A24_PRIM_TYPE(x) (((x) & 0x1F) << 0) -#define G_028A24_PRIM_TYPE(x) (((x) >> 0) & 0x1F) -#define C_028A24_PRIM_TYPE 0xFFFFFFE0 -#define S_028A24_RETAIN_ORDER(x) (((x) & 0x1) << 14) -#define G_028A24_RETAIN_ORDER(x) (((x) >> 14) & 0x1) -#define C_028A24_RETAIN_ORDER 0xFFFFBFFF -#define S_028A24_RETAIN_QUADS(x) (((x) & 0x1) << 15) -#define G_028A24_RETAIN_QUADS(x) (((x) >> 15) & 0x1) -#define C_028A24_RETAIN_QUADS 0xFFFF7FFF -#define S_028A24_PRIM_ORDER(x) (((x) & 0x7) << 16) -#define G_028A24_PRIM_ORDER(x) (((x) >> 16) & 0x7) -#define C_028A24_PRIM_ORDER 0xFFF8FFFF -#define R_028A28_VGT_GROUP_FIRST_DECR 0x028A28 -#define S_028A28_FIRST_DECR(x) (((x) & 0xF) << 0) -#define G_028A28_FIRST_DECR(x) (((x) >> 0) & 0xF) -#define C_028A28_FIRST_DECR 0xFFFFFFF0 -#define R_028A2C_VGT_GROUP_DECR 0x028A2C -#define S_028A2C_DECR(x) (((x) & 0xF) << 0) -#define G_028A2C_DECR(x) (((x) >> 0) & 0xF) -#define C_028A2C_DECR 0xFFFFFFF0 -#define R_028A30_VGT_GROUP_VECT_0_CNTL 0x028A30 -#define S_028A30_COMP_X_EN(x) (((x) & 0x1) << 0) -#define G_028A30_COMP_X_EN(x) (((x) >> 0) & 0x1) -#define C_028A30_COMP_X_EN 0xFFFFFFFE -#define S_028A30_COMP_Y_EN(x) (((x) & 0x1) << 1) -#define G_028A30_COMP_Y_EN(x) (((x) >> 1) & 0x1) -#define C_028A30_COMP_Y_EN 0xFFFFFFFD -#define S_028A30_COMP_Z_EN(x) (((x) & 0x1) << 2) -#define G_028A30_COMP_Z_EN(x) (((x) >> 2) & 0x1) -#define C_028A30_COMP_Z_EN 0xFFFFFFFB -#define S_028A30_COMP_W_EN(x) (((x) & 0x1) << 3) -#define G_028A30_COMP_W_EN(x) (((x) >> 3) & 0x1) -#define C_028A30_COMP_W_EN 0xFFFFFFF7 -#define S_028A30_STRIDE(x) (((x) & 0xFF) << 8) -#define G_028A30_STRIDE(x) (((x) >> 8) & 0xFF) -#define C_028A30_STRIDE 0xFFFF00FF -#define S_028A30_SHIFT(x) (((x) & 0xFF) << 16) -#define G_028A30_SHIFT(x) (((x) >> 16) & 0xFF) -#define C_028A30_SHIFT 0xFF00FFFF -#define R_028A34_VGT_GROUP_VECT_1_CNTL 0x028A34 -#define S_028A34_COMP_X_EN(x) (((x) & 0x1) << 0) -#define G_028A34_COMP_X_EN(x) (((x) >> 0) & 0x1) -#define C_028A34_COMP_X_EN 0xFFFFFFFE -#define S_028A34_COMP_Y_EN(x) (((x) & 0x1) << 1) -#define G_028A34_COMP_Y_EN(x) (((x) >> 1) & 0x1) -#define C_028A34_COMP_Y_EN 0xFFFFFFFD -#define S_028A34_COMP_Z_EN(x) (((x) & 0x1) << 2) -#define G_028A34_COMP_Z_EN(x) (((x) >> 2) & 0x1) -#define C_028A34_COMP_Z_EN 0xFFFFFFFB -#define S_028A34_COMP_W_EN(x) (((x) & 0x1) << 3) -#define G_028A34_COMP_W_EN(x) (((x) >> 3) & 0x1) -#define C_028A34_COMP_W_EN 0xFFFFFFF7 -#define S_028A34_STRIDE(x) (((x) & 0xFF) << 8) -#define G_028A34_STRIDE(x) (((x) >> 8) & 0xFF) -#define C_028A34_STRIDE 0xFFFF00FF -#define S_028A34_SHIFT(x) (((x) & 0xFF) << 16) -#define G_028A34_SHIFT(x) (((x) >> 16) & 0xFF) -#define C_028A34_SHIFT 0xFF00FFFF -#define R_028A38_VGT_GROUP_VECT_0_FMT_CNTL 0x028A38 -#define S_028A38_X_CONV(x) (((x) & 0xF) << 0) -#define G_028A38_X_CONV(x) (((x) >> 0) & 0xF) -#define C_028A38_X_CONV 0xFFFFFFF0 -#define S_028A38_X_OFFSET(x) (((x) & 0xF) << 4) -#define G_028A38_X_OFFSET(x) (((x) >> 4) & 0xF) -#define C_028A38_X_OFFSET 0xFFFFFF0F -#define S_028A38_Y_CONV(x) (((x) & 0xF) << 8) -#define G_028A38_Y_CONV(x) (((x) >> 8) & 0xF) -#define C_028A38_Y_CONV 0xFFFFF0FF -#define S_028A38_Y_OFFSET(x) (((x) & 0xF) << 12) -#define G_028A38_Y_OFFSET(x) (((x) >> 12) & 0xF) -#define C_028A38_Y_OFFSET 0xFFFF0FFF -#define S_028A38_Z_CONV(x) (((x) & 0xF) << 16) -#define G_028A38_Z_CONV(x) (((x) >> 16) & 0xF) -#define C_028A38_Z_CONV 0xFFF0FFFF -#define S_028A38_Z_OFFSET(x) (((x) & 0xF) << 20) -#define G_028A38_Z_OFFSET(x) (((x) >> 20) & 0xF) -#define C_028A38_Z_OFFSET 0xFF0FFFFF -#define S_028A38_W_CONV(x) (((x) & 0xF) << 24) -#define G_028A38_W_CONV(x) (((x) >> 24) & 0xF) -#define C_028A38_W_CONV 0xF0FFFFFF -#define S_028A38_W_OFFSET(x) (((x) & 0xF) << 28) -#define G_028A38_W_OFFSET(x) (((x) >> 28) & 0xF) -#define C_028A38_W_OFFSET 0x0FFFFFFF -#define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL 0x028A3C -#define S_028A3C_X_CONV(x) (((x) & 0xF) << 0) -#define G_028A3C_X_CONV(x) (((x) >> 0) & 0xF) -#define C_028A3C_X_CONV 0xFFFFFFF0 -#define S_028A3C_X_OFFSET(x) (((x) & 0xF) << 4) -#define G_028A3C_X_OFFSET(x) (((x) >> 4) & 0xF) -#define C_028A3C_X_OFFSET 0xFFFFFF0F -#define S_028A3C_Y_CONV(x) (((x) & 0xF) << 8) -#define G_028A3C_Y_CONV(x) (((x) >> 8) & 0xF) -#define C_028A3C_Y_CONV 0xFFFFF0FF -#define S_028A3C_Y_OFFSET(x) (((x) & 0xF) << 12) -#define G_028A3C_Y_OFFSET(x) (((x) >> 12) & 0xF) -#define C_028A3C_Y_OFFSET 0xFFFF0FFF -#define S_028A3C_Z_CONV(x) (((x) & 0xF) << 16) -#define G_028A3C_Z_CONV(x) (((x) >> 16) & 0xF) -#define C_028A3C_Z_CONV 0xFFF0FFFF -#define S_028A3C_Z_OFFSET(x) (((x) & 0xF) << 20) -#define G_028A3C_Z_OFFSET(x) (((x) >> 20) & 0xF) -#define C_028A3C_Z_OFFSET 0xFF0FFFFF -#define S_028A3C_W_CONV(x) (((x) & 0xF) << 24) -#define G_028A3C_W_CONV(x) (((x) >> 24) & 0xF) -#define C_028A3C_W_CONV 0xF0FFFFFF -#define S_028A3C_W_OFFSET(x) (((x) & 0xF) << 28) -#define G_028A3C_W_OFFSET(x) (((x) >> 28) & 0xF) -#define C_028A3C_W_OFFSET 0x0FFFFFFF -#define R_028A40_VGT_GS_MODE 0x028A40 -#define S_028A40_MODE(x) (((x) & 0x3) << 0) -#define G_028A40_MODE(x) (((x) >> 0) & 0x3) -#define C_028A40_MODE 0xFFFFFFFC -#define S_028A40_ES_PASSTHRU(x) (((x) & 0x1) << 2) -#define G_028A40_ES_PASSTHRU(x) (((x) >> 2) & 0x1) -#define C_028A40_ES_PASSTHRU 0xFFFFFFFB -#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3) -#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3) -#define C_028A40_CUT_MODE 0xFFFFFFE7 -#define R_028A4C_PA_SC_MODE_CNTL 0x028A4C -#define S_028A4C_MSAA_ENABLE(x) (((x) & 0x1) << 0) -#define G_028A4C_MSAA_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028A4C_MSAA_ENABLE 0xFFFFFFFE -#define S_028A4C_CLIPRECT_ENABLE(x) (((x) & 0x1) << 1) -#define G_028A4C_CLIPRECT_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028A4C_CLIPRECT_ENABLE 0xFFFFFFFD -#define S_028A4C_LINE_STIPPLE_ENABLE(x) (((x) & 0x1) << 2) -#define G_028A4C_LINE_STIPPLE_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028A4C_LINE_STIPPLE_ENABLE 0xFFFFFFFB -#define S_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x) (((x) & 0x1) << 3) -#define G_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x) (((x) >> 3) & 0x1) -#define C_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB 0xFFFFFFF7 -#define S_028A4C_WALK_ORDER_ENABLE(x) (((x) & 0x1) << 4) -#define G_028A4C_WALK_ORDER_ENABLE(x) (((x) >> 4) & 0x1) -#define C_028A4C_WALK_ORDER_ENABLE 0xFFFFFFEF -#define S_028A4C_HALVE_DETAIL_SAMPLE_PERF(x) (((x) & 0x1) << 5) -#define G_028A4C_HALVE_DETAIL_SAMPLE_PERF(x) (((x) >> 5) & 0x1) -#define C_028A4C_HALVE_DETAIL_SAMPLE_PERF 0xFFFFFFDF -#define S_028A4C_WALK_SIZE(x) (((x) & 0x1) << 6) -#define G_028A4C_WALK_SIZE(x) (((x) >> 6) & 0x1) -#define C_028A4C_WALK_SIZE 0xFFFFFFBF -#define S_028A4C_WALK_ALIGNMENT(x) (((x) & 0x1) << 7) -#define G_028A4C_WALK_ALIGNMENT(x) (((x) >> 7) & 0x1) -#define C_028A4C_WALK_ALIGNMENT 0xFFFFFF7F -#define S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x) (((x) & 0x1) << 8) -#define G_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x) (((x) >> 8) & 0x1) -#define C_028A4C_WALK_ALIGN8_PRIM_FITS_ST 0xFFFFFEFF -#define S_028A4C_TILE_COVER_NO_SCISSOR(x) (((x) & 0x1) << 9) -#define G_028A4C_TILE_COVER_NO_SCISSOR(x) (((x) >> 9) & 0x1) -#define C_028A4C_TILE_COVER_NO_SCISSOR 0xFFFFFDFF -#define S_028A4C_KILL_PIX_POST_HI_Z(x) (((x) & 0x1) << 10) -#define G_028A4C_KILL_PIX_POST_HI_Z(x) (((x) >> 10) & 0x1) -#define C_028A4C_KILL_PIX_POST_HI_Z 0xFFFFFBFF -#define S_028A4C_KILL_PIX_POST_DETAIL_MASK(x) (((x) & 0x1) << 11) -#define G_028A4C_KILL_PIX_POST_DETAIL_MASK(x) (((x) >> 11) & 0x1) -#define C_028A4C_KILL_PIX_POST_DETAIL_MASK 0xFFFFF7FF -#define S_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x) (((x) & 0x1) << 12) -#define G_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x) (((x) >> 12) & 0x1) -#define C_028A4C_MULTI_CHIP_SUPERTILE_ENABLE 0xFFFFEFFF -#define S_028A4C_TILE_COVER_DISABLE(x) (((x) & 0x1) << 13) -#define G_028A4C_TILE_COVER_DISABLE(x) (((x) >> 13) & 0x1) -#define C_028A4C_TILE_COVER_DISABLE 0xFFFFDFFF -#define S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) & 0x1) << 14) -#define G_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) >> 14) & 0x1) -#define C_028A4C_FORCE_EOV_CNTDWN_ENABLE 0xFFFFBFFF -#define S_028A4C_FORCE_EOV_TILE_ENABLE(x) (((x) & 0x1) << 15) -#define G_028A4C_FORCE_EOV_TILE_ENABLE(x) (((x) >> 15) & 0x1) -#define C_028A4C_FORCE_EOV_TILE_ENABLE 0xFFFF7FFF -#define S_028A4C_FORCE_EOV_REZ_ENABLE(x) (((x) & 0x1) << 16) -#define G_028A4C_FORCE_EOV_REZ_ENABLE(x) (((x) >> 16) & 0x1) -#define C_028A4C_FORCE_EOV_REZ_ENABLE 0xFFFEFFFF -#define S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 17) -#define G_028A4C_PS_ITER_SAMPLE(x) (((x) >> 17) & 0x1) -#define C_028A4C_PS_ITER_SAMPLE 0xFFFDFFFF -#define R_028A84_VGT_PRIMITIVEID_EN 0x028A84 -#define S_028A84_PRIMITIVEID_EN(x) (((x) & 0x1) << 0) -#define G_028A84_PRIMITIVEID_EN(x) (((x) >> 0) & 0x1) -#define C_028A84_PRIMITIVEID_EN 0xFFFFFFFE -#define R_028A94_VGT_MULTI_PRIM_IB_RESET_EN 0x028A94 -#define S_028A94_RESET_EN(x) (((x) & 0x1) << 0) -#define G_028A94_RESET_EN(x) (((x) >> 0) & 0x1) -#define C_028A94_RESET_EN 0xFFFFFFFE -#define R_028AA0_VGT_INSTANCE_STEP_RATE_0 0x028AA0 -#define S_028AA0_STEP_RATE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028AA0_STEP_RATE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028AA0_STEP_RATE 0x00000000 -#define R_028AA4_VGT_INSTANCE_STEP_RATE_1 0x028AA4 -#define S_028AA4_STEP_RATE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028AA4_STEP_RATE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028AA4_STEP_RATE 0x00000000 -#define R_028AB0_VGT_STRMOUT_EN 0x028AB0 -#define S_028AB0_STREAMOUT(x) (((x) & 0x1) << 0) -#define G_028AB0_STREAMOUT(x) (((x) >> 0) & 0x1) -#define C_028AB0_STREAMOUT 0xFFFFFFFE -#define R_028AB4_VGT_REUSE_OFF 0x028AB4 -#define S_028AB4_REUSE_OFF(x) (((x) & 0x1) << 0) -#define G_028AB4_REUSE_OFF(x) (((x) >> 0) & 0x1) -#define C_028AB4_REUSE_OFF 0xFFFFFFFE -#define R_028AB8_VGT_VTX_CNT_EN 0x028AB8 -#define S_028AB8_VTX_CNT_EN(x) (((x) & 0x1) << 0) -#define G_028AB8_VTX_CNT_EN(x) (((x) >> 0) & 0x1) -#define C_028AB8_VTX_CNT_EN 0xFFFFFFFE -#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20 -#define S_028B20_BUFFER_0_EN(x) (((x) & 0x1) << 0) -#define G_028B20_BUFFER_0_EN(x) (((x) >> 0) & 0x1) -#define C_028B20_BUFFER_0_EN 0xFFFFFFFE -#define S_028B20_BUFFER_1_EN(x) (((x) & 0x1) << 1) -#define G_028B20_BUFFER_1_EN(x) (((x) >> 1) & 0x1) -#define C_028B20_BUFFER_1_EN 0xFFFFFFFD -#define S_028B20_BUFFER_2_EN(x) (((x) & 0x1) << 2) -#define G_028B20_BUFFER_2_EN(x) (((x) >> 2) & 0x1) -#define C_028B20_BUFFER_2_EN 0xFFFFFFFB -#define S_028B20_BUFFER_3_EN(x) (((x) & 0x1) << 3) -#define G_028B20_BUFFER_3_EN(x) (((x) >> 3) & 0x1) -#define C_028B20_BUFFER_3_EN 0xFFFFFFF7 -#define R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 0x028C20 -#define S_028C20_S4_X(x) (((x) & 0xF) << 0) -#define G_028C20_S4_X(x) (((x) >> 0) & 0xF) -#define C_028C20_S4_X 0xFFFFFFF0 -#define S_028C20_S4_Y(x) (((x) & 0xF) << 4) -#define G_028C20_S4_Y(x) (((x) >> 4) & 0xF) -#define C_028C20_S4_Y 0xFFFFFF0F -#define S_028C20_S5_X(x) (((x) & 0xF) << 8) -#define G_028C20_S5_X(x) (((x) >> 8) & 0xF) -#define C_028C20_S5_X 0xFFFFF0FF -#define S_028C20_S5_Y(x) (((x) & 0xF) << 12) -#define G_028C20_S5_Y(x) (((x) >> 12) & 0xF) -#define C_028C20_S5_Y 0xFFFF0FFF -#define S_028C20_S6_X(x) (((x) & 0xF) << 16) -#define G_028C20_S6_X(x) (((x) >> 16) & 0xF) -#define C_028C20_S6_X 0xFFF0FFFF -#define S_028C20_S6_Y(x) (((x) & 0xF) << 20) -#define G_028C20_S6_Y(x) (((x) >> 20) & 0xF) -#define C_028C20_S6_Y 0xFF0FFFFF -#define S_028C20_S7_X(x) (((x) & 0xF) << 24) -#define G_028C20_S7_X(x) (((x) >> 24) & 0xF) -#define C_028C20_S7_X 0xF0FFFFFF -#define S_028C20_S7_Y(x) (((x) & 0xF) << 28) -#define G_028C20_S7_Y(x) (((x) >> 28) & 0xF) -#define C_028C20_S7_Y 0x0FFFFFFF -#define R_028C30_CB_CLRCMP_CONTROL 0x028C30 -#define S_028C30_CLRCMP_FCN_SRC(x) (((x) & 0x7) << 0) -#define G_028C30_CLRCMP_FCN_SRC(x) (((x) >> 0) & 0x7) -#define C_028C30_CLRCMP_FCN_SRC 0xFFFFFFF8 -#define S_028C30_CLRCMP_FCN_DST(x) (((x) & 0x7) << 8) -#define G_028C30_CLRCMP_FCN_DST(x) (((x) >> 8) & 0x7) -#define C_028C30_CLRCMP_FCN_DST 0xFFFFF8FF -#define S_028C30_CLRCMP_FCN_SEL(x) (((x) & 0x3) << 24) -#define G_028C30_CLRCMP_FCN_SEL(x) (((x) >> 24) & 0x3) -#define C_028C30_CLRCMP_FCN_SEL 0xFCFFFFFF -#define R_028C34_CB_CLRCMP_SRC 0x028C34 -#define S_028C34_CLRCMP_SRC(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028C34_CLRCMP_SRC(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028C34_CLRCMP_SRC 0x00000000 -#define R_028C38_CB_CLRCMP_DST 0x028C38 -#define S_028C38_CLRCMP_DST(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028C38_CLRCMP_DST(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028C38_CLRCMP_DST 0x00000000 -#define R_028C3C_CB_CLRCMP_MSK 0x028C3C -#define S_028C3C_CLRCMP_MSK(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028C3C_CLRCMP_MSK(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028C3C_CLRCMP_MSK 0x00000000 -#define R_0085F0_CP_COHER_CNTL 0x0085F0 -#define S_0085F0_DEST_BASE_0_ENA(x) (((x) & 0x1) << 0) -#define G_0085F0_DEST_BASE_0_ENA(x) (((x) >> 0) & 0x1) -#define C_0085F0_DEST_BASE_0_ENA 0xFFFFFFFE -#define S_0085F0_DEST_BASE_1_ENA(x) (((x) & 0x1) << 1) -#define G_0085F0_DEST_BASE_1_ENA(x) (((x) >> 1) & 0x1) -#define C_0085F0_DEST_BASE_1_ENA 0xFFFFFFFD -#define S_0085F0_SO0_DEST_BASE_ENA(x) (((x) & 0x1) << 2) -#define G_0085F0_SO0_DEST_BASE_ENA(x) (((x) >> 2) & 0x1) -#define C_0085F0_SO0_DEST_BASE_ENA 0xFFFFFFFB -#define S_0085F0_SO1_DEST_BASE_ENA(x) (((x) & 0x1) << 3) -#define G_0085F0_SO1_DEST_BASE_ENA(x) (((x) >> 3) & 0x1) -#define C_0085F0_SO1_DEST_BASE_ENA 0xFFFFFFF7 -#define S_0085F0_SO2_DEST_BASE_ENA(x) (((x) & 0x1) << 4) -#define G_0085F0_SO2_DEST_BASE_ENA(x) (((x) >> 4) & 0x1) -#define C_0085F0_SO2_DEST_BASE_ENA 0xFFFFFFEF -#define S_0085F0_SO3_DEST_BASE_ENA(x) (((x) & 0x1) << 5) -#define G_0085F0_SO3_DEST_BASE_ENA(x) (((x) >> 5) & 0x1) -#define C_0085F0_SO3_DEST_BASE_ENA 0xFFFFFFDF -#define S_0085F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6) -#define G_0085F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1) -#define C_0085F0_CB0_DEST_BASE_ENA 0xFFFFFFBF -#define S_0085F0_CB1_DEST_BASE_ENA(x) (((x) & 0x1) << 7) -#define G_0085F0_CB1_DEST_BASE_ENA(x) (((x) >> 7) & 0x1) -#define C_0085F0_CB1_DEST_BASE_ENA 0xFFFFFF7F -#define S_0085F0_CB2_DEST_BASE_ENA(x) (((x) & 0x1) << 8) -#define G_0085F0_CB2_DEST_BASE_ENA(x) (((x) >> 8) & 0x1) -#define C_0085F0_CB2_DEST_BASE_ENA 0xFFFFFEFF -#define S_0085F0_CB3_DEST_BASE_ENA(x) (((x) & 0x1) << 9) -#define G_0085F0_CB3_DEST_BASE_ENA(x) (((x) >> 9) & 0x1) -#define C_0085F0_CB3_DEST_BASE_ENA 0xFFFFFDFF -#define S_0085F0_CB4_DEST_BASE_ENA(x) (((x) & 0x1) << 10) -#define G_0085F0_CB4_DEST_BASE_ENA(x) (((x) >> 10) & 0x1) -#define C_0085F0_CB4_DEST_BASE_ENA 0xFFFFFBFF -#define S_0085F0_CB5_DEST_BASE_ENA(x) (((x) & 0x1) << 11) -#define G_0085F0_CB5_DEST_BASE_ENA(x) (((x) >> 11) & 0x1) -#define C_0085F0_CB5_DEST_BASE_ENA 0xFFFFF7FF -#define S_0085F0_CB6_DEST_BASE_ENA(x) (((x) & 0x1) << 12) -#define G_0085F0_CB6_DEST_BASE_ENA(x) (((x) >> 12) & 0x1) -#define C_0085F0_CB6_DEST_BASE_ENA 0xFFFFEFFF -#define S_0085F0_CB7_DEST_BASE_ENA(x) (((x) & 0x1) << 13) -#define G_0085F0_CB7_DEST_BASE_ENA(x) (((x) >> 13) & 0x1) -#define C_0085F0_CB7_DEST_BASE_ENA 0xFFFFDFFF -#define S_0085F0_DB_DEST_BASE_ENA(x) (((x) & 0x1) << 14) -#define G_0085F0_DB_DEST_BASE_ENA(x) (((x) >> 14) & 0x1) -#define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF -#define S_0085F0_CR_DEST_BASE_ENA(x) (((x) & 0x1) << 15) -#define G_0085F0_CR_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) -#define C_0085F0_CR_DEST_BASE_ENA 0xFFFF7FFF -#define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) -#define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) -#define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF -#define S_0085F0_VC_ACTION_ENA(x) (((x) & 0x1) << 24) -#define G_0085F0_VC_ACTION_ENA(x) (((x) >> 24) & 0x1) -#define C_0085F0_VC_ACTION_ENA 0xFEFFFFFF -#define S_0085F0_CB_ACTION_ENA(x) (((x) & 0x1) << 25) -#define G_0085F0_CB_ACTION_ENA(x) (((x) >> 25) & 0x1) -#define C_0085F0_CB_ACTION_ENA 0xFDFFFFFF -#define S_0085F0_DB_ACTION_ENA(x) (((x) & 0x1) << 26) -#define G_0085F0_DB_ACTION_ENA(x) (((x) >> 26) & 0x1) -#define C_0085F0_DB_ACTION_ENA 0xFBFFFFFF -#define S_0085F0_SH_ACTION_ENA(x) (((x) & 0x1) << 27) -#define G_0085F0_SH_ACTION_ENA(x) (((x) >> 27) & 0x1) -#define C_0085F0_SH_ACTION_ENA 0xF7FFFFFF -#define S_0085F0_SMX_ACTION_ENA(x) (((x) & 0x1) << 28) -#define G_0085F0_SMX_ACTION_ENA(x) (((x) >> 28) & 0x1) -#define C_0085F0_SMX_ACTION_ENA 0xEFFFFFFF -#define S_0085F0_CR0_ACTION_ENA(x) (((x) & 0x1) << 29) -#define G_0085F0_CR0_ACTION_ENA(x) (((x) >> 29) & 0x1) -#define C_0085F0_CR0_ACTION_ENA 0xDFFFFFFF -#define S_0085F0_CR1_ACTION_ENA(x) (((x) & 0x1) << 30) -#define G_0085F0_CR1_ACTION_ENA(x) (((x) >> 30) & 0x1) -#define C_0085F0_CR1_ACTION_ENA 0xBFFFFFFF -#define S_0085F0_CR2_ACTION_ENA(x) (((x) & 0x1) << 31) -#define G_0085F0_CR2_ACTION_ENA(x) (((x) >> 31) & 0x1) -#define C_0085F0_CR2_ACTION_ENA 0x7FFFFFFF - - -#define R_02812C_CB_CLEAR_ALPHA 0x02812C -#define S_02812C_CLEAR_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02812C_CLEAR_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02812C_CLEAR_ALPHA 0x00000000 -#define R_028128_CB_CLEAR_BLUE 0x028128 -#define S_028128_CLEAR_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028128_CLEAR_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028128_CLEAR_BLUE 0x00000000 -#define R_028124_CB_CLEAR_GREEN 0x028124 -#define S_028124_CLEAR_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028124_CLEAR_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028124_CLEAR_GREEN 0x00000000 -#define R_028120_CB_CLEAR_RED 0x028120 -#define S_028120_CLEAR_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028120_CLEAR_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028120_CLEAR_RED 0x00000000 -#define R_02842C_CB_FOG_BLUE 0x02842C -#define S_02842C_FOG_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02842C_FOG_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02842C_FOG_BLUE 0x00000000 -#define R_028428_CB_FOG_GREEN 0x028428 -#define S_028428_FOG_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028428_FOG_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028428_FOG_GREEN 0x00000000 -#define R_028424_CB_FOG_RED 0x028424 -#define S_028424_FOG_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028424_FOG_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028424_FOG_RED 0x00000000 -#define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000 -#define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0) -#define G_03C000_CLAMP_X(x) (((x) >> 0) & 0x7) -#define C_03C000_CLAMP_X 0xFFFFFFF8 -#define S_03C000_CLAMP_Y(x) (((x) & 0x7) << 3) -#define G_03C000_CLAMP_Y(x) (((x) >> 3) & 0x7) -#define C_03C000_CLAMP_Y 0xFFFFFFC7 -#define S_03C000_CLAMP_Z(x) (((x) & 0x7) << 6) -#define G_03C000_CLAMP_Z(x) (((x) >> 6) & 0x7) -#define C_03C000_CLAMP_Z 0xFFFFFE3F -#define S_03C000_XY_MAG_FILTER(x) (((x) & 0x7) << 9) -#define G_03C000_XY_MAG_FILTER(x) (((x) >> 9) & 0x7) -#define C_03C000_XY_MAG_FILTER 0xFFFFF1FF -#define S_03C000_XY_MIN_FILTER(x) (((x) & 0x7) << 12) -#define G_03C000_XY_MIN_FILTER(x) (((x) >> 12) & 0x7) -#define C_03C000_XY_MIN_FILTER 0xFFFF8FFF -#define S_03C000_Z_FILTER(x) (((x) & 0x3) << 15) -#define G_03C000_Z_FILTER(x) (((x) >> 15) & 0x3) -#define C_03C000_Z_FILTER 0xFFFE7FFF -#define S_03C000_MIP_FILTER(x) (((x) & 0x3) << 17) -#define G_03C000_MIP_FILTER(x) (((x) >> 17) & 0x3) -#define C_03C000_MIP_FILTER 0xFFF9FFFF -#define S_03C000_BORDER_COLOR_TYPE(x) (((x) & 0x3) << 22) -#define G_03C000_BORDER_COLOR_TYPE(x) (((x) >> 22) & 0x3) -#define C_03C000_BORDER_COLOR_TYPE 0xFF3FFFFF -#define S_03C000_POINT_SAMPLING_CLAMP(x) (((x) & 0x1) << 24) -#define G_03C000_POINT_SAMPLING_CLAMP(x) (((x) >> 24) & 0x1) -#define C_03C000_POINT_SAMPLING_CLAMP 0xFEFFFFFF -#define S_03C000_TEX_ARRAY_OVERRIDE(x) (((x) & 0x1) << 25) -#define G_03C000_TEX_ARRAY_OVERRIDE(x) (((x) >> 25) & 0x1) -#define C_03C000_TEX_ARRAY_OVERRIDE 0xFDFFFFFF -#define S_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) & 0x7) << 26) -#define G_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) >> 26) & 0x7) -#define C_03C000_DEPTH_COMPARE_FUNCTION 0xE3FFFFFF -#define S_03C000_CHROMA_KEY(x) (((x) & 0x3) << 29) -#define G_03C000_CHROMA_KEY(x) (((x) >> 29) & 0x3) -#define C_03C000_CHROMA_KEY 0x9FFFFFFF -#define S_03C000_LOD_USES_MINOR_AXIS(x) (((x) & 0x1) << 31) -#define G_03C000_LOD_USES_MINOR_AXIS(x) (((x) >> 31) & 0x1) -#define C_03C000_LOD_USES_MINOR_AXIS 0x7FFFFFFF -#define R_03C004_SQ_TEX_SAMPLER_WORD1_0 0x03C004 -#define S_03C004_MIN_LOD(x) (((x) & 0x3FF) << 0) -#define G_03C004_MIN_LOD(x) (((x) >> 0) & 0x3FF) -#define C_03C004_MIN_LOD 0xFFFFFC00 -#define S_03C004_MAX_LOD(x) (((x) & 0x3FF) << 10) -#define G_03C004_MAX_LOD(x) (((x) >> 10) & 0x3FF) -#define C_03C004_MAX_LOD 0xFFF003FF -#define S_03C004_LOD_BIAS(x) (((x) & 0xFFF) << 20) -#define G_03C004_LOD_BIAS(x) (((x) >> 20) & 0xFFF) -#define C_03C004_LOD_BIAS 0x000FFFFF -#define R_03C008_SQ_TEX_SAMPLER_WORD2_0 0x03C008 -#define S_03C008_LOD_BIAS_SEC(x) (((x) & 0xFFF) << 0) -#define G_03C008_LOD_BIAS_SEC(x) (((x) >> 0) & 0xFFF) -#define C_03C008_LOD_BIAS_SEC 0xFFFFF000 -#define S_03C008_MC_COORD_TRUNCATE(x) (((x) & 0x1) << 12) -#define G_03C008_MC_COORD_TRUNCATE(x) (((x) >> 12) & 0x1) -#define C_03C008_MC_COORD_TRUNCATE 0xFFFFEFFF -#define S_03C008_FORCE_DEGAMMA(x) (((x) & 0x1) << 13) -#define G_03C008_FORCE_DEGAMMA(x) (((x) >> 13) & 0x1) -#define C_03C008_FORCE_DEGAMMA 0xFFFFDFFF -#define S_03C008_HIGH_PRECISION_FILTER(x) (((x) & 0x1) << 14) -#define G_03C008_HIGH_PRECISION_FILTER(x) (((x) >> 14) & 0x1) -#define C_03C008_HIGH_PRECISION_FILTER 0xFFFFBFFF -#define S_03C008_PERF_MIP(x) (((x) & 0x7) << 15) -#define G_03C008_PERF_MIP(x) (((x) >> 15) & 0x7) -#define C_03C008_PERF_MIP 0xFFFC7FFF -#define S_03C008_PERF_Z(x) (((x) & 0x3) << 18) -#define G_03C008_PERF_Z(x) (((x) >> 18) & 0x3) -#define C_03C008_PERF_Z 0xFFF3FFFF -#define S_03C008_FETCH_4(x) (((x) & 0x1) << 26) -#define G_03C008_FETCH_4(x) (((x) >> 26) & 0x1) -#define C_03C008_FETCH_4 0xFBFFFFFF -#define S_03C008_SAMPLE_IS_PCF(x) (((x) & 0x1) << 27) -#define G_03C008_SAMPLE_IS_PCF(x) (((x) >> 27) & 0x1) -#define C_03C008_SAMPLE_IS_PCF 0xF7FFFFFF -#define S_03C008_TYPE(x) (((x) & 0x1) << 31) -#define G_03C008_TYPE(x) (((x) >> 31) & 0x1) -#define C_03C008_TYPE 0x7FFFFFFF -#define R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA 0x00A40C -#define S_00A40C_BORDER_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A40C_BORDER_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A40C_BORDER_ALPHA 0x00000000 -#define R_00A408_TD_PS_SAMPLER0_BORDER_BLUE 0x00A408 -#define S_00A408_BORDER_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A408_BORDER_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A408_BORDER_BLUE 0x00000000 -#define R_00A404_TD_PS_SAMPLER0_BORDER_GREEN 0x00A404 -#define S_00A404_BORDER_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A404_BORDER_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A404_BORDER_GREEN 0x00000000 -#define R_00A400_TD_PS_SAMPLER0_BORDER_RED 0x00A400 -#define S_00A400_BORDER_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A400_BORDER_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A400_BORDER_RED 0x00000000 -#define R_00A60C_TD_VS_SAMPLER0_BORDER_ALPHA 0x00A60C -#define S_00A60C_BORDER_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A60C_BORDER_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A60C_BORDER_ALPHA 0x00000000 -#define R_00A608_TD_VS_SAMPLER0_BORDER_BLUE 0x00A608 -#define S_00A608_BORDER_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A608_BORDER_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A608_BORDER_BLUE 0x00000000 -#define R_00A604_TD_VS_SAMPLER0_BORDER_GREEN 0x00A604 -#define S_00A604_BORDER_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A604_BORDER_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A604_BORDER_GREEN 0x00000000 -#define R_00A600_TD_VS_SAMPLER0_BORDER_RED 0x00A600 -#define S_00A600_BORDER_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A600_BORDER_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A600_BORDER_RED 0x00000000 -#define R_00A80C_TD_GS_SAMPLER0_BORDER_ALPHA 0x00A80C -#define S_00A80C_BORDER_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A80C_BORDER_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A80C_BORDER_ALPHA 0x00000000 -#define R_00A808_TD_GS_SAMPLER0_BORDER_BLUE 0x00A808 -#define S_00A808_BORDER_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A808_BORDER_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A808_BORDER_BLUE 0x00000000 -#define R_00A804_TD_GS_SAMPLER0_BORDER_GREEN 0x00A804 -#define S_00A804_BORDER_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A804_BORDER_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A804_BORDER_GREEN 0x00000000 -#define R_00A800_TD_GS_SAMPLER0_BORDER_RED 0x00A800 -#define S_00A800_BORDER_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A800_BORDER_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A800_BORDER_RED 0x00000000 -#define R_030000_SQ_ALU_CONSTANT0_0 0x030000 -#define S_030000_X(x) (((x) & 0xFFFFFFFF) << 0) -#define G_030000_X(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_030000_X 0x00000000 -#define R_030004_SQ_ALU_CONSTANT1_0 0x030004 -#define S_030004_Y(x) (((x) & 0xFFFFFFFF) << 0) -#define G_030004_Y(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_030004_Y 0x00000000 -#define R_030008_SQ_ALU_CONSTANT2_0 0x030008 -#define S_030008_Z(x) (((x) & 0xFFFFFFFF) << 0) -#define G_030008_Z(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_030008_Z 0x00000000 -#define R_03000C_SQ_ALU_CONSTANT3_0 0x03000C -#define S_03000C_W(x) (((x) & 0xFFFFFFFF) << 0) -#define G_03000C_W(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_03000C_W 0x00000000 -#define R_0287E4_VGT_DMA_BASE_HI 0x0287E4 -#define R_0287E8_VGT_DMA_BASE 0x0287E8 -#define R_028E20_PA_CL_UCP0_X 0x028E20 -#define R_028E24_PA_CL_UCP0_Y 0x028E24 -#define R_028E28_PA_CL_UCP0_Z 0x028E28 -#define R_028E2C_PA_CL_UCP0_W 0x028E2C -#define R_028E30_PA_CL_UCP1_X 0x028E30 -#define R_028E34_PA_CL_UCP1_Y 0x028E34 -#define R_028E38_PA_CL_UCP1_Z 0x028E38 -#define R_028E3C_PA_CL_UCP1_W 0x028E3C -#define R_028E40_PA_CL_UCP2_X 0x028E40 -#define R_028E44_PA_CL_UCP2_Y 0x028E44 -#define R_028E48_PA_CL_UCP2_Z 0x028E48 -#define R_028E4C_PA_CL_UCP2_W 0x028E4C -#define R_028E50_PA_CL_UCP3_X 0x028E50 -#define R_028E54_PA_CL_UCP3_Y 0x028E54 -#define R_028E58_PA_CL_UCP3_Z 0x028E58 -#define R_028E5C_PA_CL_UCP3_W 0x028E5C -#define R_028E60_PA_CL_UCP4_X 0x028E60 -#define R_028E64_PA_CL_UCP4_Y 0x028E64 -#define R_028E68_PA_CL_UCP4_Z 0x028E68 -#define R_028E6C_PA_CL_UCP4_W 0x028E6C -#define R_028E70_PA_CL_UCP5_X 0x028E70 -#define R_028E74_PA_CL_UCP5_Y 0x028E74 -#define R_028E78_PA_CL_UCP5_Z 0x028E78 -#define R_028E7C_PA_CL_UCP5_W 0x028E7C -#define R_038000_RESOURCE0_WORD0 0x038000 -#define R_038004_RESOURCE0_WORD1 0x038004 -#define R_038008_RESOURCE0_WORD2 0x038008 -#define R_03800C_RESOURCE0_WORD3 0x03800C -#define R_038010_RESOURCE0_WORD4 0x038010 -#define R_038014_RESOURCE0_WORD5 0x038014 -#define R_038018_RESOURCE0_WORD6 0x038018 - -#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140 -#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180 -#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940 -#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980 - -#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 -#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 - -#endif diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c deleted file mode 100644 index 45cf6f09671..00000000000 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#define _FILE_OFFSET_BITS 64 -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <errno.h> -#include "r600_priv.h" -#include "xf86drm.h" -#include "radeon_drm.h" - -int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo) -{ - struct drm_radeon_gem_mmap args; - void *ptr; - int r; - - /* Zero out args to make valgrind happy */ - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - args.offset = 0; - args.size = (uint64_t)bo->size; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_MMAP, - &args, sizeof(args)); - if (r) { - fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n", - bo, bo->handle, r); - return r; - } - ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->fd, args.addr_ptr); - if (ptr == MAP_FAILED) { - fprintf(stderr, "%s failed to map bo\n", __func__); - return -errno; - } - bo->data = ptr; - - bo->map_count++; - return 0; -} - -static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo) -{ - if (bo->data) { - munmap(bo->data, bo->size); - bo->data = NULL; - } -} - -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, unsigned initial_domain) -{ - struct radeon_bo *bo; - int r; - - if (handle) { - pipe_mutex_lock(radeon->bo_handles_mutex); - bo = util_hash_table_get(radeon->bo_handles, - (void *)(uintptr_t)handle); - if (bo) { - struct radeon_bo *b = NULL; - radeon_bo_reference(radeon, &b, bo); - goto done; - } - } - bo = calloc(1, sizeof(*bo)); - if (bo == NULL) { - return NULL; - } - bo->size = size; - bo->handle = handle; - pipe_reference_init(&bo->reference, 1); - bo->alignment = alignment; - LIST_INITHEAD(&bo->fencedlist); - - if (handle) { - struct drm_gem_open open_arg; - - memset(&open_arg, 0, sizeof(open_arg)); - open_arg.name = handle; - r = drmIoctl(radeon->fd, DRM_IOCTL_GEM_OPEN, &open_arg); - if (r != 0) { - free(bo); - return NULL; - } - bo->name = handle; - bo->handle = open_arg.handle; - bo->size = open_arg.size; - bo->shared = TRUE; - } else { - struct drm_radeon_gem_create args = {}; - - args.size = size; - args.alignment = alignment; - args.initial_domain = initial_domain; - args.flags = 0; - args.handle = 0; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_CREATE, - &args, sizeof(args)); - bo->handle = args.handle; - if (r) { - fprintf(stderr, "Failed to allocate :\n"); - fprintf(stderr, " size : %d bytes\n", size); - fprintf(stderr, " alignment : %d bytes\n", alignment); - free(bo); - return NULL; - } - } - - if (handle) - util_hash_table_set(radeon->bo_handles, (void *)(uintptr_t)handle, bo); -done: - if (handle) - pipe_mutex_unlock(radeon->bo_handles_mutex); - - return bo; -} - -static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) -{ - struct drm_gem_close args; - - if (bo->name) { - pipe_mutex_lock(radeon->bo_handles_mutex); - util_hash_table_remove(radeon->bo_handles, - (void *)(uintptr_t)bo->name); - pipe_mutex_unlock(radeon->bo_handles_mutex); - } - LIST_DEL(&bo->fencedlist); - radeon_bo_fixed_unmap(radeon, bo); - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - drmIoctl(radeon->fd, DRM_IOCTL_GEM_CLOSE, &args); - memset(bo, 0, sizeof(struct radeon_bo)); - free(bo); -} - -void radeon_bo_reference(struct radeon *radeon, - struct radeon_bo **dst, - struct radeon_bo *src) -{ - struct radeon_bo *old = *dst; - if (pipe_reference(&(*dst)->reference, &src->reference)) { - radeon_bo_destroy(radeon, old); - } - *dst = src; -} - -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) -{ - struct drm_radeon_gem_wait_idle args; - int ret; - - if (!bo->shared) { - if (!bo->fence) - return 0; - if (bo->fence <= *radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - return 0; - } - } - - /* Zero out args to make valgrind happy */ - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - do { - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_WAIT_IDLE, - &args, sizeof(args)); - } while (ret == -EBUSY); - return ret; -} - -int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain) -{ - struct drm_radeon_gem_busy args; - int ret; - - if (!bo->shared) { - if (!bo->fence) - return 0; - if (bo->fence <= *radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - return 0; - } - } - - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - args.domain = 0; - - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY, - &args, sizeof(args)); - - *domain = args.domain; - return ret; -} - -int radeon_bo_get_tiling_flags(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *tiling_flags, - uint32_t *pitch) -{ - struct drm_radeon_gem_get_tiling args = {}; - int ret; - - args.handle = bo->handle; - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_GET_TILING, - &args, sizeof(args)); - if (ret) - return ret; - - *tiling_flags = args.tiling_flags; - *pitch = args.pitch; - return ret; -} - -int radeon_bo_get_name(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *name) -{ - struct drm_gem_flink flink; - int ret; - - flink.handle = bo->handle; - ret = drmIoctl(radeon->fd, DRM_IOCTL_GEM_FLINK, &flink); - if (ret) - return ret; - - *name = flink.name; - return ret; -} diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c index f54a7c8fe72..87572417c80 100644 --- a/src/gallium/winsys/r600/drm/radeon_pciid.c +++ b/src/gallium/winsys/r600/drm/radeon_pciid.c @@ -23,7 +23,6 @@ * Authors: * Jerome Glisse */ -#include <stdlib.h> #include "r600_priv.h" struct pci_id { diff --git a/src/gallium/winsys/radeon/drm/Android.mk b/src/gallium/winsys/radeon/drm/Android.mk new file mode 100644 index 00000000000..c1922498225 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/Android.mk @@ -0,0 +1,40 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2011 Chia-I Wu <[email protected]> +# Copyright (C) 2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_C_INCLUDES := \ + $(DRM_TOP) \ + $(DRM_TOP)/include/drm + +LOCAL_MODULE := libmesa_winsys_radeon + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile index 913e6ad186a..68b9efebfa4 100644 --- a/src/gallium/winsys/radeon/drm/Makefile +++ b/src/gallium/winsys/radeon/drm/Makefile @@ -4,10 +4,8 @@ include $(TOP)/configs/current LIBNAME = radeonwinsys -C_SOURCES = \ - radeon_drm_bo.c \ - radeon_drm_cs.c \ - radeon_drm_winsys.c +# get C_SOURCES +include Makefile.sources LIBRARY_INCLUDES = -I$(TOP)/include \ $(shell pkg-config libdrm --cflags-only-I) diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources new file mode 100644 index 00000000000..1d18d6164d5 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/Makefile.sources @@ -0,0 +1,4 @@ +C_SOURCES := \ + radeon_drm_bo.c \ + radeon_drm_cs.c \ + radeon_drm_winsys.c diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript index 2edb1e94645..e5048d6255d 100644 --- a/src/gallium/winsys/radeon/drm/SConscript +++ b/src/gallium/winsys/radeon/drm/SConscript @@ -2,11 +2,7 @@ Import('*') env = env.Clone() -radeon_sources = [ - 'radeon_drm_bo.c', - 'radeon_drm_cs.c', - 'radeon_drm_winsys.c', -] +radeon_sources = env.ParseSourceList('Makefile.sources', 'C_SOURCES') env.PkgUseModules('DRM') diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index b6f12727e81..b45efe5f49c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -31,11 +31,11 @@ #include "util/u_memory.h" #include "util/u_simple_list.h" #include "os/os_thread.h" +#include "os/os_mman.h" #include "state_tracker/drm_driver.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <xf86drm.h> #include <errno.h> @@ -43,6 +43,21 @@ #define RADEON_BO_FLAGS_MICRO_TILE 2 #define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20 +#ifndef DRM_RADEON_GEM_WAIT +#define DRM_RADEON_GEM_WAIT 0x2b + +#define RADEON_GEM_NO_WAIT 0x1 +#define RADEON_GEM_USAGE_READ 0x2 +#define RADEON_GEM_USAGE_WRITE 0x4 + +struct drm_radeon_gem_wait { + uint32_t handle; + uint32_t flags; /* one of RADEON_GEM_* */ +}; + +#endif + + extern const struct pb_vtbl radeon_bo_vtbl; @@ -87,39 +102,49 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf) return bo; } -static void radeon_bo_wait(struct pb_buffer *_buf) +static void radeon_bo_wait(struct pb_buffer *_buf, enum radeon_bo_usage usage) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); - struct drm_radeon_gem_wait_idle args = {}; + struct radeon_bo *bo = get_radeon_bo(_buf); while (p_atomic_read(&bo->num_active_ioctls)) { sched_yield(); } - args.handle = bo->handle; - while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, - &args, sizeof(args)) == -EBUSY); - - bo->busy_for_write = FALSE; + if (bo->rws->info.drm_minor >= 12) { + struct drm_radeon_gem_wait args = {}; + args.handle = bo->handle; + args.flags = usage; + while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT, + &args, sizeof(args)) == -EBUSY); + } else { + struct drm_radeon_gem_wait_idle args = {}; + args.handle = bo->handle; + while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, + &args, sizeof(args)) == -EBUSY); + } } -static boolean radeon_bo_is_busy(struct pb_buffer *_buf) +static boolean radeon_bo_is_busy(struct pb_buffer *_buf, + enum radeon_bo_usage usage) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); - struct drm_radeon_gem_busy args = {}; - boolean busy; + struct radeon_bo *bo = get_radeon_bo(_buf); if (p_atomic_read(&bo->num_active_ioctls)) { return TRUE; } - args.handle = bo->handle; - busy = drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, - &args, sizeof(args)) != 0; - - if (!busy) - bo->busy_for_write = FALSE; - return busy; + if (bo->rws->info.drm_minor >= 12) { + struct drm_radeon_gem_wait args = {}; + args.handle = bo->handle; + args.flags = usage | RADEON_GEM_NO_WAIT; + return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT, + &args, sizeof(args)) != 0; + } else { + struct drm_radeon_gem_busy args = {}; + args.handle = bo->handle; + return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, + &args, sizeof(args)) != 0; + } } static void radeon_bo_destroy(struct pb_buffer *_buf) @@ -135,7 +160,7 @@ static void radeon_bo_destroy(struct pb_buffer *_buf) } if (bo->ptr) - munmap(bo->ptr, bo->size); + os_munmap(bo->ptr, bo->size); /* Close object. */ args.handle = bo->handle; @@ -172,13 +197,33 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, if (!(flags & PB_USAGE_UNSYNCHRONIZED)) { /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ if (flags & PB_USAGE_DONTBLOCK) { - if (radeon_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); - return NULL; - } + if (!(flags & PB_USAGE_CPU_WRITE)) { + /* Mapping for read. + * + * Since we are mapping for read, we don't need to wait + * if the GPU is using the buffer for read too + * (neither one is changing it). + * + * Only check whether the buffer is being used for write. */ + if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { + cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); + return NULL; + } + + if (radeon_bo_is_busy((struct pb_buffer*)bo, + RADEON_USAGE_WRITE)) { + return NULL; + } + } else { + if (radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); + return NULL; + } - if (radeon_bo_is_busy((struct pb_buffer*)bo)) { - return NULL; + if (radeon_bo_is_busy((struct pb_buffer*)bo, + RADEON_USAGE_READWRITE)) { + return NULL; + } } } else { if (!(flags & PB_USAGE_CPU_WRITE)) { @@ -191,14 +236,9 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, * Only check whether the buffer is being used for write. */ if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { cs->flush_cs(cs->flush_data, 0); - radeon_bo_wait((struct pb_buffer*)bo); - } else if (bo->busy_for_write) { - /* Update the busy_for_write field (done by radeon_bo_is_busy) - * and wait if needed. */ - if (radeon_bo_is_busy((struct pb_buffer*)bo)) { - radeon_bo_wait((struct pb_buffer*)bo); - } } + radeon_bo_wait((struct pb_buffer*)bo, + RADEON_USAGE_WRITE); } else { /* Mapping for write. */ if (radeon_bo_is_referenced_by_cs(cs, bo)) { @@ -209,7 +249,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, radeon_drm_cs_sync_flush(cs); } - radeon_bo_wait((struct pb_buffer*)bo); + radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE); } } } @@ -238,7 +278,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, return NULL; } - ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, + ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, bo->rws->fd, args.addr_ptr); if (ptr == MAP_FAILED) { pipe_mutex_unlock(bo->map_mutex); @@ -345,7 +385,7 @@ static boolean radeon_bomgr_is_buffer_busy(struct pb_manager *_mgr, return TRUE; } - if (radeon_bo_is_busy((struct pb_buffer*)bo)) { + if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) { return TRUE; } @@ -395,16 +435,14 @@ static void *radeon_bo_map(struct pb_buffer *buf, struct radeon_winsys_cs *cs, enum pipe_transfer_usage usage) { - struct pb_buffer *_buf = pb_buffer(buf); - - return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), cs); + return pb_map(buf, get_pb_usage_from_transfer_flags(usage), cs); } static void radeon_bo_get_tiling(struct pb_buffer *_buf, enum radeon_bo_layout *microtiled, enum radeon_bo_layout *macrotiled) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_bo *bo = get_radeon_bo(_buf); struct drm_radeon_gem_set_tiling args = {}; args.handle = bo->handle; @@ -429,7 +467,7 @@ static void radeon_bo_set_tiling(struct pb_buffer *_buf, enum radeon_bo_layout macrotiled, uint32_t pitch) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_bo *bo = get_radeon_bo(_buf); struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct drm_radeon_gem_set_tiling args = {}; @@ -464,12 +502,10 @@ static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle( struct pb_buffer *_buf) { /* return radeon_bo. */ - return (struct radeon_winsys_cs_handle*) - get_radeon_bo(pb_buffer(_buf)); + return (struct radeon_winsys_cs_handle*)get_radeon_bo(_buf); } -static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, - enum radeon_bo_domain domain) +static unsigned get_pb_usage_from_create_flags(enum radeon_bo_domain domain) { unsigned res = 0; @@ -487,7 +523,6 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, unsigned size, unsigned alignment, unsigned bind, - unsigned usage, enum radeon_bo_domain domain) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); @@ -497,10 +532,11 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, memset(&desc, 0, sizeof(desc)); desc.alignment = alignment; - desc.usage = get_pb_usage_from_create_flags(bind, usage, domain); + desc.usage = get_pb_usage_from_create_flags(domain); /* Assign a buffer manager. */ - if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER)) provider = ws->cman; else provider = ws->kman; @@ -587,7 +623,7 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, struct winsys_handle *whandle) { struct drm_gem_flink flink = {}; - struct radeon_bo *bo = get_radeon_bo(pb_buffer(buffer)); + struct radeon_bo *bo = get_radeon_bo(buffer); if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { if (!bo->flinked) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index b94881bc4ce..047ea6b1cf2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -60,13 +60,6 @@ struct radeon_bo { * thread, is this bo referenced in? */ int num_active_ioctls; - /* Whether the buffer has been relocated for write and is busy since then. - * This field is updated in: - * - radeon_drm_cs_flush (to TRUE if it's relocated for write) - * - radeon_bo_is_busy (to FALSE if it's not busy) - * - radeon_bo_wait (to FALSE) */ - boolean busy_for_write; - boolean flinked; uint32_t flink; }; @@ -80,10 +73,4 @@ void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src) pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); } -static INLINE struct pb_buffer * -pb_buffer(struct pb_buffer *buffer) -{ - return (struct pb_buffer *)buffer; -} - #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 0139de1973a..c309354785a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -115,6 +115,7 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) } csc->crelocs = 0; + csc->validated_crelocs = 0; csc->chunks[0].length_dw = 0; csc->chunks[1].length_dw = 0; csc->used_gart = 0; @@ -218,11 +219,11 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) return -1; } -static void radeon_add_reloc(struct radeon_cs_context *csc, - struct radeon_bo *bo, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd, - enum radeon_bo_domain *added_domains) +static unsigned radeon_add_reloc(struct radeon_cs_context *csc, + struct radeon_bo *bo, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd, + enum radeon_bo_domain *added_domains) { struct drm_radeon_cs_reloc *reloc; unsigned i; @@ -232,7 +233,7 @@ static void radeon_add_reloc(struct radeon_cs_context *csc, reloc = csc->relocs_hashlist[hash]; if (reloc->handle == bo->handle) { update_domains(reloc, rd, wd, added_domains); - return; + return csc->reloc_indices_hashlist[hash]; } /* Hash collision, look for the BO in the list of relocs linearly. */ @@ -245,7 +246,7 @@ static void radeon_add_reloc(struct radeon_cs_context *csc, csc->relocs_hashlist[hash] = reloc; csc->reloc_indices_hashlist[hash] = i; /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ - return; + return i; } } } @@ -279,37 +280,64 @@ static void radeon_add_reloc(struct radeon_cs_context *csc, csc->reloc_indices_hashlist[hash] = csc->crelocs; csc->chunks[1].length_dw += RELOC_DWORDS; - csc->crelocs++; *added_domains = rd | wd; + return csc->crelocs++; } -static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, - struct radeon_winsys_cs_handle *buf, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd) +static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; enum radeon_bo_domain added_domains; - radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); - - if (!added_domains) - return; + unsigned index = radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); if (added_domains & RADEON_DOMAIN_GTT) cs->csc->used_gart += bo->size; if (added_domains & RADEON_DOMAIN_VRAM) cs->csc->used_vram += bo->size; + + return index; } static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + boolean status = + cs->csc->used_gart < cs->ws->info.gart_size * 0.8 && + cs->csc->used_vram < cs->ws->info.vram_size * 0.8; + + if (status) { + cs->csc->validated_crelocs = cs->csc->crelocs; + } else { + /* Remove lately-added relocations. The validation failed with them + * and the CS is about to be flushed because of that. Keep only + * the already-validated relocations. */ + unsigned i; + + for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) { + p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references); + radeon_bo_reference(&cs->csc->relocs_bo[i], NULL); + } + cs->csc->crelocs = cs->csc->validated_crelocs; + + /* Flush if there are any relocs. Clean up otherwise. */ + if (cs->csc->crelocs) { + cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); + } else { + radeon_cs_context_cleanup(cs->csc); - return cs->csc->used_gart < cs->ws->gart_size * 0.8 && - cs->csc->used_vram < cs->ws->vram_size * 0.8; + assert(cs->base.cdw == 0); + if (cs->base.cdw != 0) { + fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); + } + } + } + return status; } static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, @@ -351,6 +379,8 @@ static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) for (i = 0; i < csc->crelocs; i++) p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); + + radeon_cs_context_cleanup(csc); return NULL; } @@ -381,11 +411,6 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls); - - /* Update whether the buffer is busy for write. */ - if (cs->csc->relocs[i].write_domain) { - cs->csc->relocs_bo[i]->busy_for_write = TRUE; - } } if (cs->ws->num_cpus > 1 && debug_get_option_thread() && @@ -395,6 +420,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) } else { radeon_drm_cs_emit_ioctl(cs->csc); } + } else { + radeon_cs_context_cleanup(cs->csc); } /* Flip command streams. */ @@ -403,8 +430,6 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) cs->cst = tmp; /* Prepare a new CS. */ - radeon_cs_context_cleanup(cs->csc); - cs->base.buf = cs->csc->buf; cs->base.cdw = 0; } @@ -447,6 +472,6 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_validate = radeon_drm_cs_validate; ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; ws->base.cs_flush = radeon_drm_cs_flush; - ws->base.cs_set_flush = radeon_drm_cs_set_flush; + ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 339beedc6ab..fe285326884 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -41,6 +41,7 @@ struct radeon_cs_context { /* Relocs. */ unsigned nrelocs; unsigned crelocs; + unsigned validated_crelocs; struct radeon_bo **relocs_bo; struct drm_radeon_cs_reloc *relocs; @@ -88,8 +89,9 @@ static INLINE boolean radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, struct radeon_bo *bo) { - return bo->num_cs_references == bo->rws->num_cs || - (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1); + int num_refs = bo->num_cs_references; + return num_refs == bo->rws->num_cs || + (num_refs && radeon_get_reloc(cs->csc, bo) != -1); } static INLINE boolean @@ -111,7 +113,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs, static INLINE boolean radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) { - return bo->num_cs_references; + return bo->num_cs_references != 0; } void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 0474b381ade..e234321d934 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -41,13 +41,34 @@ #include <xf86drm.h> #include <stdio.h> +#ifndef RADEON_INFO_TILING_CONFIG +#define RADEON_INFO_TILING_CONFIG 6 +#endif + #ifndef RADEON_INFO_WANT_HYPERZ #define RADEON_INFO_WANT_HYPERZ 7 #endif + #ifndef RADEON_INFO_WANT_CMASK #define RADEON_INFO_WANT_CMASK 8 #endif +#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ +#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 9 +#endif + +#ifndef RADEON_INFO_NUM_BACKENDS +#define RADEON_INFO_NUM_BACKENDS 0xa +#endif + +#ifndef RADEON_INFO_NUM_TILE_PIPES +#define RADEON_INFO_NUM_TILE_PIPES 0xb +#endif + +#ifndef RADEON_INFO_BACKEND_MAP +#define RADEON_INFO_BACKEND_MAP 0xd +#endif + /* Enable/disable feature access for one command stream. * If enable == TRUE, return TRUE on success. * Otherwise, return FALSE. @@ -103,17 +124,31 @@ static boolean radeon_set_fd_access(struct radeon_drm_cs *applier, return FALSE; } +static boolean radeon_get_drm_value(int fd, unsigned request, + const char *errname, uint32_t *out) +{ + struct drm_radeon_info info = {0}; + int retval; + + info.value = (unsigned long)out; + info.request = request; + + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (retval && errname) { + fprintf(stderr, "radeon: Failed to get %s, error number %d\n", + errname, retval); + return FALSE; + } + return TRUE; +} + /* Helper function to do the ioctls needed for setup and init. */ -static void do_ioctls(struct radeon_drm_winsys *winsys) +static boolean do_winsys_init(struct radeon_drm_winsys *ws) { struct drm_radeon_gem_info gem_info = {0}; - struct drm_radeon_info info = {0}; - int target = 0; int retval; drmVersionPtr version; - info.value = (unsigned long)⌖ - /* We do things in a specific order here. * * DRM version first. We need to be sure we're running on a KMS chipset. @@ -123,71 +158,108 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) * for all Radeons. If this fails, we probably got handed an FD for some * non-Radeon card. * + * The GEM info is actually bogus on the kernel side, as well as our side + * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because + * we don't actually use the info for anything yet. + * * The GB and Z pipe requests should always succeed, but they might not * return sensical values for all chipsets, but that's alright because * the pipe drivers already know that. - * - * The GEM info is actually bogus on the kernel side, as well as our side - * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because - * we don't actually use the info for anything yet. */ + */ - version = drmGetVersion(winsys->fd); + /* Get DRM version. */ + version = drmGetVersion(ws->fd); if (version->version_major != 2 || version->version_minor < 3) { fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " - "only compatible with 2.3.x (kernel 2.6.34) and later.\n", + "only compatible with 2.3.x (kernel 2.6.34) or later.\n", __FUNCTION__, version->version_major, version->version_minor, version->version_patchlevel); drmFreeVersion(version); - exit(1); + return FALSE; } - winsys->drm_major = version->version_major; - winsys->drm_minor = version->version_minor; - winsys->drm_patchlevel = version->version_patchlevel; + ws->info.drm_major = version->version_major; + ws->info.drm_minor = version->version_minor; + ws->info.drm_patchlevel = version->version_patchlevel; + drmFreeVersion(version); - info.request = RADEON_INFO_DEVICE_ID; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get PCI ID, " - "error number %d\n", __FUNCTION__, retval); - exit(1); - } - winsys->pci_id = target; + /* Get PCI ID. */ + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID", + &ws->info.pci_id)) + return FALSE; - info.request = RADEON_INFO_NUM_GB_PIPES; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get GB pipe count, " - "error number %d\n", __FUNCTION__, retval); - exit(1); - } - winsys->gb_pipes = target; + /* Check PCI ID. */ + switch (ws->info.pci_id) { +#define CHIPSET(pci_id, name, family) case pci_id: +#include "pci_ids/r300_pci_ids.h" +#undef CHIPSET + ws->gen = R300; + break; - info.request = RADEON_INFO_NUM_Z_PIPES; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get Z pipe count, " - "error number %d\n", __FUNCTION__, retval); - exit(1); +#define CHIPSET(pci_id, name, family) case pci_id: +#include "pci_ids/r600_pci_ids.h" +#undef CHIPSET + ws->gen = R600; + break; + + default: + fprintf(stderr, "radeon: Invalid PCI ID.\n"); + return FALSE; } - winsys->z_pipes = target; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO, + /* Get GEM info. */ + retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); if (retval) { - fprintf(stderr, "%s: Failed to get MM info, error number %d\n", - __FUNCTION__, retval); - exit(1); + fprintf(stderr, "radeon: Failed to get MM info, error number %d\n", + retval); + return FALSE; } - winsys->gart_size = gem_info.gart_size; - winsys->vram_size = gem_info.vram_size; + ws->info.gart_size = gem_info.gart_size; + ws->info.vram_size = gem_info.vram_size; - drmFreeVersion(version); + ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + + /* Generation-specific queries. */ + if (ws->gen == R300) { + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, + "GB pipe count", + &ws->info.r300_num_gb_pipes)) + return FALSE; - winsys->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, + "Z pipe count", + &ws->info.r300_num_z_pipes)) + return FALSE; + } + else if (ws->gen == R600) { + if (ws->info.drm_minor >= 9 && + !radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, + "num backends", + &ws->info.r600_num_backends)) + return FALSE; + + /* get the GPU counter frequency, failure is not fatal */ + radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL, + &ws->info.r600_clock_crystal_freq); + + radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, + &ws->info.r600_tiling_config); + + if (ws->info.drm_minor >= 11) { + radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, + &ws->info.r600_num_tile_pipes); + + if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, + &ws->info.r600_backend_map)) + ws->info.r600_backend_map_valid = TRUE; + } + } + + return TRUE; } static void radeon_winsys_destroy(struct radeon_winsys *rws) @@ -202,34 +274,10 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws) FREE(rws); } -static uint32_t radeon_get_value(struct radeon_winsys *rws, - enum radeon_value_id id) +static void radeon_query_info(struct radeon_winsys *rws, + struct radeon_info *info) { - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws; - - switch(id) { - case RADEON_VID_PCI_ID: - return ws->pci_id; - case RADEON_VID_R300_GB_PIPES: - return ws->gb_pipes; - case RADEON_VID_R300_Z_PIPES: - return ws->z_pipes; - case RADEON_VID_GART_SIZE: - return ws->gart_size; - case RADEON_VID_VRAM_SIZE: - return ws->vram_size; - case RADEON_VID_DRM_MAJOR: - return ws->drm_major; - case RADEON_VID_DRM_MINOR: - return ws->drm_minor; - case RADEON_VID_DRM_PATCHLEVEL: - return ws->drm_patchlevel; - case RADEON_VID_DRM_2_6_0: - return ws->drm_major*100 + ws->drm_minor >= 206; - case RADEON_VID_DRM_2_8_0: - return ws->drm_major*100 + ws->drm_minor >= 208; - } - return 0; + *info = ((struct radeon_drm_winsys *)rws)->info; } static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, @@ -239,7 +287,7 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, struct radeon_drm_cs *cs = radeon_drm_cs(rcs); switch (fid) { - case RADEON_FID_HYPERZ_RAM_ACCESS: + case RADEON_FID_R300_HYPERZ_ACCESS: if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) { return radeon_set_fd_access(cs, &cs->ws->hyperz_owner, &cs->ws->hyperz_owner_mutex, @@ -248,7 +296,7 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, return FALSE; } - case RADEON_FID_CMASK_RAM_ACCESS: + case RADEON_FID_R300_CMASK_ACCESS: if (debug_get_bool_option("RADEON_CMASK", FALSE)) { return radeon_set_fd_access(cs, &cs->ws->cmask_owner, &cs->ws->cmask_owner_mutex, @@ -268,16 +316,9 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) } ws->fd = fd; - do_ioctls(ws); - switch (ws->pci_id) { -#define CHIPSET(pci_id, name, family) case pci_id: -#include "pci_ids/r300_pci_ids.h" -#undef CHIPSET - break; - default: - goto fail; - } + if (!do_winsys_init(ws)) + goto fail; /* Create managers. */ ws->kman = radeon_bomgr_create(ws); @@ -289,7 +330,7 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) /* Set functions. */ ws->base.destroy = radeon_winsys_destroy; - ws->base.get_value = radeon_get_value; + ws->base.query_info = radeon_query_info; ws->base.cs_request_feature = radeon_cs_request_feature; radeon_bomgr_init_functions(ws); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index d5186bc4d17..69216448496 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -31,29 +31,27 @@ #define RADEON_DRM_WINSYS_H #include "radeon_winsys.h" - #include "os/os_thread.h" +enum radeon_generation { + R300, + R600 +}; + struct radeon_drm_winsys { struct radeon_winsys base; int fd; /* DRM file descriptor */ int num_cs; /* The number of command streams created. */ + enum radeon_generation gen; + struct radeon_info info; + struct pb_manager *kman; struct pb_manager *cman; - uint32_t pci_id; /* PCI ID */ - uint32_t gb_pipes; /* GB pipe count */ - uint32_t z_pipes; /* Z pipe count (rv530 only) */ - uint32_t gart_size; /* GART size. */ - uint32_t vram_size; /* VRAM size. */ uint32_t num_cpus; /* Number of CPUs. */ - unsigned drm_major; - unsigned drm_minor; - unsigned drm_patchlevel; - struct radeon_drm_cs *hyperz_owner; pipe_mutex hyperz_owner_mutex; struct radeon_drm_cs *cmask_owner; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 3a64e4abc35..90583e3ab8c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -26,6 +26,20 @@ /* The public winsys interface header for the radeon driver. */ +/* R300 features in DRM. + * + * 2.6.0: + * - Hyper-Z + * - GB_Z_PEQ_CONFIG on rv350->r4xx + * - R500 FG_ALPHA_VALUE + * + * 2.8.0: + * - R500 US_FORMAT regs + * - R500 ARGB2101010 colorbuffer + * - CMask and AA regs + * - R16F/RG16F + */ + #include "pipebuffer/pb_bufmgr.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -47,6 +61,12 @@ enum radeon_bo_domain { /* bitfield */ RADEON_DOMAIN_VRAM = 4 }; +enum radeon_bo_usage { /* bitfield */ + RADEON_USAGE_READ = 2, + RADEON_USAGE_WRITE = 4, + RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE +}; + struct winsys_handle; struct radeon_winsys_cs_handle; /* for write_reloc etc. */ @@ -55,43 +75,29 @@ struct radeon_winsys_cs { uint32_t *buf; /* The command buffer. */ }; -enum radeon_value_id { - RADEON_VID_PCI_ID, - RADEON_VID_R300_GB_PIPES, - RADEON_VID_R300_Z_PIPES, - RADEON_VID_GART_SIZE, - RADEON_VID_VRAM_SIZE, - RADEON_VID_DRM_MAJOR, - RADEON_VID_DRM_MINOR, - RADEON_VID_DRM_PATCHLEVEL, - - /* These should probably go away: */ - - /* R300 features: - * - Hyper-Z - * - GB_Z_PEQ_CONFIG on rv350->r4xx - * - R500 FG_ALPHA_VALUE - * - * R600 features: - * - TBD - */ - RADEON_VID_DRM_2_6_0, +struct radeon_info { + uint32_t pci_id; + uint32_t gart_size; + uint32_t vram_size; - /* R300 features: - * - R500 US_FORMAT regs - * - R500 ARGB2101010 colorbuffer - * - CMask and AA regs - * - R16F/RG16F - * - * R600 features: - * - TBD - */ - RADEON_VID_DRM_2_8_0, + uint32_t drm_major; /* version */ + uint32_t drm_minor; + uint32_t drm_patchlevel; + + uint32_t r300_num_gb_pipes; + uint32_t r300_num_z_pipes; + + uint32_t r600_num_backends; + uint32_t r600_clock_crystal_freq; + uint32_t r600_tiling_config; + uint32_t r600_num_tile_pipes; + uint32_t r600_backend_map; + boolean r600_backend_map_valid; }; enum radeon_feature_id { - RADEON_FID_HYPERZ_RAM_ACCESS, /* ZMask + HiZ */ - RADEON_FID_CMASK_RAM_ACCESS, + RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ + RADEON_FID_R300_CMASK_ACCESS, }; struct radeon_winsys { @@ -103,13 +109,13 @@ struct radeon_winsys { void (*destroy)(struct radeon_winsys *ws); /** - * Query a system value from a winsys. + * Query an info structure from winsys. * * \param ws The winsys this function is called from. - * \param vid One of the RADEON_VID_* enums. + * \param info Return structure */ - uint32_t (*get_value)(struct radeon_winsys *ws, - enum radeon_value_id vid); + void (*query_info)(struct radeon_winsys *ws, + struct radeon_info *info); /************************************************************************** * Buffer management. Buffer attributes are mostly fixed over its lifetime. @@ -126,7 +132,6 @@ struct radeon_winsys { * \param size The size to allocate. * \param alignment An alignment of the buffer in memory. * \param bind A bitmask of the PIPE_BIND_* flags. - * \param usage A bitmask of the PIPE_USAGE_* flags. * \param domain A bitmask of the RADEON_DOMAIN_* flags. * \return The created buffer object. */ @@ -134,7 +139,6 @@ struct radeon_winsys { unsigned size, unsigned alignment, unsigned bind, - unsigned usage, enum radeon_bo_domain domain); struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)( @@ -164,8 +168,10 @@ struct radeon_winsys { * Return TRUE if a buffer object is being used by the GPU. * * \param buf A winsys buffer object. + * \param usage Only check whether the buffer is busy for the given usage. */ - boolean (*buffer_is_busy)(struct pb_buffer *buf); + boolean (*buffer_is_busy)(struct pb_buffer *buf, + enum radeon_bo_usage usage); /** * Wait for a buffer object until it is not used by a GPU. This is @@ -173,8 +179,10 @@ struct radeon_winsys { * and synchronizing to the fence. * * \param buf A winsys buffer object to wait for. + * \param usage Only wait until the buffer is idle for the given usage, + * but may still be busy for some other usage. */ - void (*buffer_wait)(struct pb_buffer *buf); + void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage); /** * Return tiling flags describing a memory layout of a buffer object. @@ -263,15 +271,18 @@ struct radeon_winsys { * \param buf A winsys buffer to validate. * \param rd A read domain containing a bitmask of the RADEON_DOMAIN_* flags. * \param wd A write domain containing a bitmask of the RADEON_DOMAIN_* flags. + * \return Relocation index. */ - void (*cs_add_reloc)(struct radeon_winsys_cs *cs, - struct radeon_winsys_cs_handle *buf, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd); + unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd); /** * Return TRUE if there is enough memory in VRAM and GTT for the relocs - * added so far. + * added so far. If the validation fails, all the relocations which have + * been added since the last call of cs_validate will be removed and + * the CS will be flushed (provided there are still any relocations). * * \param cs A command stream to validate. */ @@ -304,9 +315,9 @@ struct radeon_winsys { * \param flush A flush callback function associated with the command stream. * \param user A user pointer that will be passed to the flush callback. */ - void (*cs_set_flush)(struct radeon_winsys_cs *cs, - void (*flush)(void *ctx, unsigned flags), - void *user); + void (*cs_set_flush_callback)(struct radeon_winsys_cs *cs, + void (*flush)(void *ctx, unsigned flags), + void *ctx); /** * Return TRUE if a buffer is referenced by a command stream. @@ -321,7 +332,8 @@ struct radeon_winsys { * Request access to a feature for a command stream. * * \param cs A command stream. - * \param fid A winsys buffer. + * \param fid Feature ID, one of RADEON_FID_* + * \param enable Whether to enable or disable the feature. */ boolean (*cs_request_feature)(struct radeon_winsys_cs *cs, enum radeon_feature_id fid, diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c index d92ba389d35..afdbd44458d 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c @@ -42,7 +42,8 @@ #include "xf86drm.h" #include "vmwgfx_drm.h" -#include <sys/mman.h> +#include "os/os_mman.h" + #include <errno.h> #include <unistd.h> @@ -94,7 +95,7 @@ static void vmw_ioctl_fifo_unmap(struct vmw_winsys_screen *vws, void *mapping) { VMW_FUNC; - (void)munmap(mapping, getpagesize()); + (void)os_munmap(mapping, getpagesize()); } @@ -106,7 +107,7 @@ vmw_ioctl_fifo_map(struct vmw_winsys_screen *vws, VMW_FUNC; - map = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, + map = os_mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, vws->ioctl.drm_fd, fifo_offset); if (map == MAP_FAILED) { @@ -362,7 +363,7 @@ vmw_ioctl_region_destroy(struct vmw_region *region) region->ptr.gmrId, region->ptr.offset); if (region->data) { - munmap(region->data, region->size); + os_munmap(region->data, region->size); region->data = NULL; } @@ -388,7 +389,7 @@ vmw_ioctl_region_map(struct vmw_region *region) region->ptr.gmrId, region->ptr.offset); if (region->data == NULL) { - map = mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED, + map = os_mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED, region->drm_fd, region->map_handle); if (map == MAP_FAILED) { debug_printf("%s: Map failed.\n", __FUNCTION__); diff --git a/src/gallium/winsys/sw/android/Android.mk b/src/gallium/winsys/sw/android/Android.mk new file mode 100644 index 00000000000..4fb2715a56c --- /dev/null +++ b/src/gallium/winsys/sw/android/Android.mk @@ -0,0 +1,34 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + android_sw_winsys.cpp + +LOCAL_MODULE := libmesa_winsys_sw_android + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.cpp b/src/gallium/winsys/sw/android/android_sw_winsys.cpp new file mode 100644 index 00000000000..02faf1e0cca --- /dev/null +++ b/src/gallium/winsys/sw/android/android_sw_winsys.cpp @@ -0,0 +1,255 @@ +/* + * Mesa 3-D graphics library + * Version: 7.12 + * + * Copyright (C) 2010-2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "state_tracker/sw_winsys.h" + +#include <utils/Errors.h> +#include <private/ui/sw_gralloc_handle.h> + +#include <hardware/gralloc.h> + +#include "android_sw_winsys.h" + +struct android_sw_winsys +{ + struct sw_winsys base; + + const gralloc_module_t *grmod; +}; + +struct android_sw_displaytarget +{ + buffer_handle_t handle; + int stride; + int width, height; + int usage; /* gralloc usage */ + + void *mapped; +}; + +static INLINE struct android_sw_winsys * +android_sw_winsys(struct sw_winsys *ws) +{ + return (struct android_sw_winsys *) ws; +} + +static INLINE struct android_sw_displaytarget * +android_sw_displaytarget(struct sw_displaytarget *dt) +{ + return (struct android_sw_displaytarget *) dt; +} + +namespace android { + +static void +android_displaytarget_display(struct sw_winsys *ws, + struct sw_displaytarget *dt, + void *context_private) +{ +} + +static struct sw_displaytarget * +android_displaytarget_create(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format, + unsigned width, unsigned height, + unsigned alignment, + unsigned *stride) +{ + return NULL; +} + +static void +android_displaytarget_destroy(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct android_sw_displaytarget *adt = android_sw_displaytarget(dt); + + assert(!adt->mapped); + FREE(adt); +} + +static void +android_displaytarget_unmap(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct android_sw_winsys *droid = android_sw_winsys(ws); + struct android_sw_displaytarget *adt = android_sw_displaytarget(dt); + + if (adt->mapped) { + if (sw_gralloc_handle_t::validate(adt->handle) >= 0) { + adt->mapped = NULL; + } + else { + droid->grmod->unlock(droid->grmod, adt->handle); + adt->mapped = NULL; + } + } +} + +static void * +android_displaytarget_map(struct sw_winsys *ws, + struct sw_displaytarget *dt, + unsigned flags) +{ + struct android_sw_winsys *droid = android_sw_winsys(ws); + struct android_sw_displaytarget *adt = android_sw_displaytarget(dt); + + if (!adt->mapped) { + if (sw_gralloc_handle_t::validate(adt->handle) >= 0) { + const sw_gralloc_handle_t *swhandle = + reinterpret_cast<const sw_gralloc_handle_t *>(adt->handle); + adt->mapped = reinterpret_cast<void *>(swhandle->base); + } + else { + /* lock the buffer for CPU access */ + droid->grmod->lock(droid->grmod, adt->handle, + adt->usage, 0, 0, adt->width, adt->height, &adt->mapped); + } + } + + return adt->mapped; +} + +static struct sw_displaytarget * +android_displaytarget_from_handle(struct sw_winsys *ws, + const struct pipe_resource *templ, + struct winsys_handle *whandle, + unsigned *stride) +{ + struct android_winsys_handle *ahandle = + (struct android_winsys_handle *) whandle; + struct android_sw_displaytarget *adt; + + adt = CALLOC_STRUCT(android_sw_displaytarget); + if (!adt) + return NULL; + + adt->handle = ahandle->handle; + adt->stride = ahandle->stride; + adt->width = templ->width0; + adt->height = templ->height0; + + if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_TRANSFER_WRITE)) + adt->usage |= GRALLOC_USAGE_SW_WRITE_OFTEN; + if (templ->bind & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_TRANSFER_READ)) + adt->usage |= GRALLOC_USAGE_SW_READ_OFTEN; + + if (stride) + *stride = adt->stride; + + return reinterpret_cast<struct sw_displaytarget *>(adt); +} + +static boolean +android_displaytarget_get_handle(struct sw_winsys *ws, + struct sw_displaytarget *dt, + struct winsys_handle *whandle) +{ + return FALSE; +} + +static boolean +android_is_displaytarget_format_supported(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format) +{ + struct android_sw_winsys *droid = android_sw_winsys(ws); + int fmt = -1; + + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + fmt = HAL_PIXEL_FORMAT_RGBA_8888; + break; + case PIPE_FORMAT_R8G8B8X8_UNORM: + fmt = HAL_PIXEL_FORMAT_RGBX_8888; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + fmt = HAL_PIXEL_FORMAT_RGB_888; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + fmt = HAL_PIXEL_FORMAT_RGB_565; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + fmt = HAL_PIXEL_FORMAT_BGRA_8888; + break; + default: + break; + } + + return (fmt != -1); +} + +static void +android_destroy(struct sw_winsys *ws) +{ + struct android_sw_winsys *droid = android_sw_winsys(ws); + + FREE(droid); +} + +}; /* namespace android */ + +using namespace android; + +struct sw_winsys * +android_create_sw_winsys(void) +{ + struct android_sw_winsys *droid; + const hw_module_t *mod; + + droid = CALLOC_STRUCT(android_sw_winsys); + if (!droid) + return NULL; + + if (hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod)) { + FREE(droid); + return NULL; + } + + droid->grmod = (const gralloc_module_t *) mod; + + droid->base.destroy = android_destroy; + droid->base.is_displaytarget_format_supported = + android_is_displaytarget_format_supported; + + droid->base.displaytarget_create = android_displaytarget_create; + droid->base.displaytarget_destroy = android_displaytarget_destroy; + droid->base.displaytarget_from_handle = android_displaytarget_from_handle; + droid->base.displaytarget_get_handle = android_displaytarget_get_handle; + + droid->base.displaytarget_map = android_displaytarget_map; + droid->base.displaytarget_unmap = android_displaytarget_unmap; + droid->base.displaytarget_display = android_displaytarget_display; + + return &droid->base; +} diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.h b/src/gallium/winsys/sw/android/android_sw_winsys.h new file mode 100644 index 00000000000..79392dc0f97 --- /dev/null +++ b/src/gallium/winsys/sw/android/android_sw_winsys.h @@ -0,0 +1,49 @@ +/* + * Mesa 3-D graphics library + * Version: 7.12 + * + * Copyright (C) 2010-2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#ifndef ANDROID_SW_WINSYS +#define ANDROID_SW_WINSYS + +#include <sys/cdefs.h> +#include <hardware/gralloc.h> + +__BEGIN_DECLS + +struct sw_winsys; + +struct android_winsys_handle { + buffer_handle_t handle; + int stride; +}; + +struct sw_winsys * +android_create_sw_winsys(void); + +__END_DECLS + +#endif /* ANDROID_SW_WINSYS */ |