summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorChristian König <[email protected]>2010-10-12 23:05:25 +0200
committerChristian König <[email protected]>2010-10-12 23:07:29 +0200
commit695cc370a280a637f411f5ff3877b3fd1c05e424 (patch)
tree69ae2a8fbecfa553faba59274688ffe11ee1a612 /src/gallium/auxiliary
parentf3e34ba6fba76870b1c91a27adb706d1b87aeec8 (diff)
parent48156b87bc9d3e09ec34372d69504a787332ea0b (diff)
Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
Conflicts: configure.ac src/gallium/drivers/nvfx/Makefile src/gallium/include/pipe/p_defines.h src/gallium/include/pipe/p_screen.h src/gallium/include/state_tracker/dri1_api.h src/gallium/include/state_tracker/drm_api.h src/gallium/winsys/nouveau/drm/nouveau_drm_api.c
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile53
-rw-r--r--src/gallium/auxiliary/SConscript60
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c71
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_cliptest_tmp.h114
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c273
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h81
-rw-r--r--src/gallium/auxiliary/draw/draw_decompose_tmp.h431
-rw-r--r--src/gallium/auxiliary/draw/draw_fs.c73
-rw-r--r--src/gallium/auxiliary/draw/draw_fs.h42
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c418
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.h36
-rw-r--r--src/gallium/auxiliary/draw/draw_gs_tmp.h32
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c416
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h196
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm_sample.c223
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm_translate.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c227
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c30
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c16
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c134
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_cull.c19
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_flatshade.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_offset.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c26
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_stipple.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_twoside.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_unfilled.c34
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c25
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c24
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_line.c74
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c125
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h128
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c198
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h83
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_decompose.h167
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c89
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c53
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c45
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c49
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c52
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c428
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c419
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c304
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_so_emit.c293
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c193
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp.h238
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h98
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c523
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h197
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit.c208
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h309
-rw-r--r--src/gallium/auxiliary/draw/draw_so_emit_tmp.h31
-rw-r--r--src/gallium/auxiliary/draw/draw_split_tmp.h176
-rw-r--r--src/gallium/auxiliary/draw/draw_vbuf.h20
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c37
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h9
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c127
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c6
-rw-r--r--src/gallium/auxiliary/gallivm/f.cpp85
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld.h11
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c1418
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h15
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_assert.c101
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_assert.h41
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_bitarit.c187
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_bitarit.h69
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.c56
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.h14
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c157
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.h18
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.c19
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format.h39
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c481
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_soa.c201
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c445
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_gather.c148
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_gather.h61
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c98
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.h3
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_limits.h55
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c183
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h13
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp180
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c143
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.c101
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.h96
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c906
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h265
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c1092
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h56
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c1828
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_struct.c85
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_struct.h41
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.c403
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.h43
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h44
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c1176
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c831
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.c147
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h72
-rw-r--r--src/gallium/auxiliary/indices/.gitignore2
-rw-r--r--src/gallium/auxiliary/os/os_stream.c58
-rw-r--r--src/gallium/auxiliary/os/os_stream.h25
-rw-r--r--src/gallium/auxiliary/os/os_stream_log.c3
-rw-r--r--src/gallium/auxiliary/os/os_stream_null.c8
-rw-r--r--src/gallium/auxiliary/os/os_stream_stdc.c9
-rw-r--r--src/gallium/auxiliary/os/os_stream_str.c1
-rw-r--r--src/gallium/auxiliary/os/os_thread.h97
-rw-r--r--src/gallium/auxiliary/os/os_time.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h6
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c8
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c3
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h3
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c39
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c8
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c7
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c6
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c6
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c7
-rw-r--r--src/gallium/auxiliary/rbug/README2
-rw-r--r--src/gallium/auxiliary/rbug/rbug_context.c20
-rw-r--r--src/gallium/auxiliary/rbug/rbug_core.c10
-rw-r--r--src/gallium/auxiliary/rbug/rbug_demarshal.c64
-rw-r--r--src/gallium/auxiliary/rbug/rbug_proto.h5
-rw-r--r--src/gallium/auxiliary/rbug/rbug_shader.c12
-rw-r--r--src/gallium/auxiliary/rbug/rbug_texture.c14
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c6
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_execmem.c1
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c520
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h107
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_debug_helper.h44
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_sw_helper.h63
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h34
-rw-r--r--src/gallium/auxiliary/target-helpers/wrap_screen.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt1127
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c66
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c248
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c236
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h70
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c15
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c61
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h20
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c35
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.h3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c50
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h10
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c150
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.h5
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c342
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c77
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h75
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.c147
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.h5
-rw-r--r--src/gallium/auxiliary/translate/translate.c7
-rw-r--r--src/gallium/auxiliary/translate/translate.h15
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c644
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c1378
-rw-r--r--src/gallium/auxiliary/util/u_atomic.h47
-rw-r--r--src/gallium/auxiliary/util/u_bitmask.h3
-rw-r--r--src/gallium/auxiliary/util/u_blit.c240
-rw-r--r--src/gallium/auxiliary/util/u_blit.h22
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c839
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h139
-rw-r--r--src/gallium/auxiliary/util/u_box.h19
-rw-r--r--src/gallium/auxiliary/util/u_caps.c271
-rw-r--r--src/gallium/auxiliary/util/u_caps.h71
-rw-r--r--src/gallium/auxiliary/util/u_clear.h25
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c245
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.h13
-rw-r--r--src/gallium/auxiliary/util/u_debug.c20
-rw-r--r--src/gallium/auxiliary/util/u_debug.h6
-rw-r--r--src/gallium/auxiliary/util/u_debug_describe.c81
-rw-r--r--src/gallium/auxiliary/util/u_debug_describe.h49
-rw-r--r--src/gallium/auxiliary/util/u_debug_refcnt.c181
-rw-r--r--src/gallium/auxiliary/util/u_debug_refcnt.h63
-rw-r--r--src/gallium/auxiliary/util/u_debug_symbol.c192
-rw-r--r--src/gallium/auxiliary/util/u_debug_symbol.h7
-rw-r--r--src/gallium/auxiliary/util/u_dirty_flags.h28
-rw-r--r--src/gallium/auxiliary/util/u_dirty_surfaces.h30
-rw-r--r--src/gallium/auxiliary/util/u_dl.h3
-rw-r--r--src/gallium/auxiliary/util/u_double_list.h17
-rw-r--r--src/gallium/auxiliary/util/u_draw.h139
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.c2
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.h6
-rw-r--r--src/gallium/auxiliary/util/u_dump.h6
-rw-r--r--src/gallium/auxiliary/util/u_dump_defines.c72
-rw-r--r--src/gallium/auxiliary/util/u_dump_state.c13
-rw-r--r--src/gallium/auxiliary/util/u_dynarray.h3
-rw-r--r--src/gallium/auxiliary/util/u_format.c73
-rw-r--r--src/gallium/auxiliary/util/u_format.h143
-rw-r--r--src/gallium/auxiliary/util/u_format_other.c15
-rw-r--r--src/gallium/auxiliary/util/u_format_pack.py3
-rwxr-xr-xsrc/gallium/auxiliary/util/u_format_parse.py2
-rw-r--r--src/gallium/auxiliary/util/u_format_s3tc.c4
-rw-r--r--src/gallium/auxiliary/util/u_format_srgb.py1
-rwxr-xr-xsrc/gallium/auxiliary/util/u_format_table.py5
-rw-r--r--src/gallium/auxiliary/util/u_framebuffer.c148
-rw-r--r--src/gallium/auxiliary/util/u_framebuffer.h54
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c12
-rw-r--r--src/gallium/auxiliary/util/u_half.py8
-rw-r--r--src/gallium/auxiliary/util/u_index_modify.c127
-rw-r--r--src/gallium/auxiliary/util/u_index_modify.h41
-rw-r--r--src/gallium/auxiliary/util/u_inlines.h42
-rw-r--r--src/gallium/auxiliary/util/u_linear.h1
-rw-r--r--src/gallium/auxiliary/util/u_linkage.c149
-rw-r--r--src/gallium/auxiliary/util/u_linkage.h66
-rw-r--r--src/gallium/auxiliary/util/u_math.h37
-rw-r--r--src/gallium/auxiliary/util/u_mempool.c169
-rw-r--r--src/gallium/auxiliary/util/u_mempool.h87
-rw-r--r--src/gallium/auxiliary/util/u_network.c2
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h83
-rw-r--r--src/gallium/auxiliary/util/u_pointer.h23
-rw-r--r--src/gallium/auxiliary/util/u_prim.h60
-rw-r--r--src/gallium/auxiliary/util/u_rect.c207
-rw-r--r--src/gallium/auxiliary/util/u_rect.h77
-rw-r--r--src/gallium/auxiliary/util/u_simple_list.h2
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c20
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.h7
-rw-r--r--src/gallium/auxiliary/util/u_split_prim.h114
-rw-r--r--src/gallium/auxiliary/util/u_sse.h28
-rw-r--r--src/gallium/auxiliary/util/u_staging.c117
-rw-r--r--src/gallium/auxiliary/util/u_staging.h63
-rw-r--r--src/gallium/auxiliary/util/u_surface.c257
-rw-r--r--src/gallium/auxiliary/util/u_surface.h44
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.c105
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.h48
-rw-r--r--src/gallium/auxiliary/util/u_tile.h3
-rw-r--r--src/gallium/auxiliary/util/u_transfer.c4
-rw-r--r--src/gallium/auxiliary/util/u_transfer.h1
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.c2
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.h5
245 files changed, 21512 insertions, 9056 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 16bb50fe168..05096b12a86 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -4,10 +4,11 @@ include $(TOP)/configs/current
LIBNAME = gallium
C_SOURCES = \
- cso_cache/cso_context.c \
cso_cache/cso_cache.c \
+ cso_cache/cso_context.c \
cso_cache/cso_hash.c \
draw/draw_context.c \
+ draw/draw_fs.c \
draw/draw_gs.c \
draw/draw_pipe.c \
draw/draw_pipe_aaline.c \
@@ -26,32 +27,32 @@ C_SOURCES = \
draw/draw_pipe_wide_line.c \
draw/draw_pipe_wide_point.c \
draw/draw_pt.c \
- draw/draw_pt_elts.c \
draw/draw_pt_emit.c \
draw/draw_pt_fetch.c \
draw/draw_pt_fetch_emit.c \
draw/draw_pt_fetch_shade_emit.c \
draw/draw_pt_fetch_shade_pipeline.c \
draw/draw_pt_post_vs.c \
+ draw/draw_pt_so_emit.c \
draw/draw_pt_util.c \
- draw/draw_pt_varray.c \
- draw/draw_pt_vcache.c \
+ draw/draw_pt_vsplit.c \
draw/draw_vertex.c \
draw/draw_vs.c \
- draw/draw_vs_varient.c \
draw/draw_vs_aos.c \
draw/draw_vs_aos_io.c \
draw/draw_vs_aos_machine.c \
draw/draw_vs_exec.c \
draw/draw_vs_ppc.c \
draw/draw_vs_sse.c \
+ draw/draw_vs_varient.c \
indices/u_indices_gen.c \
indices/u_unfilled_gen.c \
os/os_misc.c \
+ os/os_stream.c \
os/os_stream_log.c \
+ os/os_stream_null.c \
os/os_stream_stdc.c \
os/os_stream_str.c \
- os/os_stream_null.c \
os/os_time.c \
pipebuffer/pb_buffer_fenced.c \
pipebuffer/pb_buffer_malloc.c \
@@ -64,17 +65,16 @@ C_SOURCES = \
pipebuffer/pb_bufmgr_slab.c \
pipebuffer/pb_validate.c \
rbug/rbug_connection.c \
+ rbug/rbug_context.c \
rbug/rbug_core.c \
+ rbug/rbug_demarshal.c \
rbug/rbug_texture.c \
- rbug/rbug_context.c \
rbug/rbug_shader.c \
- rbug/rbug_demarshal.c \
rtasm/rtasm_cpu.c \
rtasm/rtasm_execmem.c \
- rtasm/rtasm_x86sse.c \
rtasm/rtasm_ppc.c \
rtasm/rtasm_ppc_spe.c \
- tgsi/tgsi_sanity.c \
+ rtasm/rtasm_x86sse.c \
tgsi/tgsi_build.c \
tgsi/tgsi_dump.c \
tgsi/tgsi_exec.c \
@@ -82,25 +82,29 @@ C_SOURCES = \
tgsi/tgsi_iterate.c \
tgsi/tgsi_parse.c \
tgsi/tgsi_ppc.c \
+ tgsi/tgsi_sanity.c \
tgsi/tgsi_scan.c \
tgsi/tgsi_sse2.c \
tgsi/tgsi_text.c \
tgsi/tgsi_transform.c \
tgsi/tgsi_ureg.c \
tgsi/tgsi_util.c \
- translate/translate_generic.c \
- translate/translate_sse.c \
translate/translate.c \
translate/translate_cache.c \
+ translate/translate_generic.c \
+ translate/translate_sse.c \
util/u_debug.c \
- util/u_debug_symbol.c \
+ util/u_debug_describe.c \
+ util/u_debug_refcnt.c \
util/u_debug_stack.c \
+ util/u_debug_symbol.c \
util/u_dump_defines.c \
util/u_dump_state.c \
util/u_bitmask.c \
util/u_blit.c \
util/u_blitter.c \
util/u_cache.c \
+ util/u_caps.c \
util/u_cpu_detect.c \
util/u_dl.c \
util/u_draw_quad.c \
@@ -112,21 +116,26 @@ C_SOURCES = \
util/u_format_tests.c \
util/u_format_yuv.c \
util/u_format_zs.c \
+ util/u_framebuffer.c \
util/u_gen_mipmap.c \
util/u_half.c \
util/u_handle_table.c \
- util/u_hash_table.c \
util/u_hash.c \
+ util/u_hash_table.c \
+ util/u_index_modify.c \
util/u_keymap.c \
util/u_linear.c \
+ util/u_linkage.c \
util/u_network.c \
util/u_math.c \
+ util/u_mempool.c \
util/u_mm.c \
util/u_rect.c \
util/u_ringbuffer.c \
util/u_sampler.c \
util/u_simple_shaders.c \
util/u_snprintf.c \
+ util/u_staging.c \
util/u_surface.c \
util/u_surfaces.c \
util/u_texture.c \
@@ -142,28 +151,38 @@ C_SOURCES = \
GALLIVM_SOURCES = \
gallivm/lp_bld_arit.c \
+ gallivm/lp_bld_assert.c \
+ gallivm/lp_bld_bitarit.c \
gallivm/lp_bld_const.c \
gallivm/lp_bld_conv.c \
gallivm/lp_bld_debug.c \
gallivm/lp_bld_flow.c \
gallivm/lp_bld_format_aos.c \
gallivm/lp_bld_format_soa.c \
+ gallivm/lp_bld_format_yuv.c \
+ gallivm/lp_bld_gather.c \
gallivm/lp_bld_init.c \
gallivm/lp_bld_intr.c \
gallivm/lp_bld_logic.c \
gallivm/lp_bld_pack.c \
gallivm/lp_bld_printf.c \
+ gallivm/lp_bld_quad.c \
gallivm/lp_bld_sample.c \
+ gallivm/lp_bld_sample_aos.c \
gallivm/lp_bld_sample_soa.c \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
+ gallivm/lp_bld_tgsi_aos.c \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
- draw/draw_pt_fetch_shade_pipeline_llvm.c \
- draw/draw_llvm_translate.c
+ draw/draw_llvm_sample.c \
+ draw/draw_llvm_translate.c \
+ draw/draw_vs_llvm.c \
+ draw/draw_pt_fetch_shade_pipeline_llvm.c
-GALLIVM_CPP_SOURCES =
+GALLIVM_CPP_SOURCES = \
+ gallivm/lp_bld_misc.cpp
GENERATED_SOURCES = \
indices/u_indices_gen.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 6cea83060a9..a18f7c0b2a3 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -32,28 +32,30 @@ env.CodeGenerate(
env.CodeGenerate(
target = 'util/u_format_table.c',
- script = 'util/u_format_table.py',
- source = ['util/u_format.csv'],
- command = 'python $SCRIPT $SOURCE > $TARGET'
+ script = '#src/gallium/auxiliary/util/u_format_table.py',
+ source = ['#src/gallium/auxiliary/util/u_format.csv'],
+ command = python_cmd + ' $SCRIPT $SOURCE > $TARGET'
)
env.CodeGenerate(
target = 'util/u_half.c',
script = 'util/u_half.py',
source = [],
- command = 'python $SCRIPT > $TARGET'
+ command = python_cmd + ' $SCRIPT > $TARGET'
)
env.Depends('util/u_format_table.c', [
- 'util/u_format_parse.py',
+ '#src/gallium/auxiliary/util/u_format_parse.py',
'util/u_format_pack.py',
])
source = [
- 'cso_cache/cso_context.c',
'cso_cache/cso_cache.c',
+ 'cso_cache/cso_context.c',
'cso_cache/cso_hash.c',
'draw/draw_context.c',
+ 'draw/draw_fs.c',
+ 'draw/draw_gs.c',
'draw/draw_pipe.c',
'draw/draw_pipe_aaline.c',
'draw/draw_pipe_aapoint.c',
@@ -71,16 +73,15 @@ source = [
'draw/draw_pipe_wide_line.c',
'draw/draw_pipe_wide_point.c',
'draw/draw_pt.c',
- 'draw/draw_pt_elts.c',
'draw/draw_pt_emit.c',
'draw/draw_pt_fetch.c',
'draw/draw_pt_fetch_emit.c',
'draw/draw_pt_fetch_shade_emit.c',
'draw/draw_pt_fetch_shade_pipeline.c',
'draw/draw_pt_post_vs.c',
+ 'draw/draw_pt_so_emit.c',
'draw/draw_pt_util.c',
- 'draw/draw_pt_varray.c',
- 'draw/draw_pt_vcache.c',
+ 'draw/draw_pt_vsplit.c',
'draw/draw_vertex.c',
'draw/draw_vs.c',
'draw/draw_vs_aos.c',
@@ -90,16 +91,16 @@ source = [
'draw/draw_vs_ppc.c',
'draw/draw_vs_sse.c',
'draw/draw_vs_varient.c',
- 'draw/draw_gs.c',
#'indices/u_indices.c',
#'indices/u_unfilled_indices.c',
'indices/u_indices_gen.c',
'indices/u_unfilled_gen.c',
'os/os_misc.c',
+ 'os/os_stream.c',
'os/os_stream_log.c',
+ 'os/os_stream_null.c',
'os/os_stream_stdc.c',
'os/os_stream_str.c',
- 'os/os_stream_null.c',
'os/os_time.c',
'pipebuffer/pb_buffer_fenced.c',
'pipebuffer/pb_buffer_malloc.c',
@@ -111,42 +112,45 @@ source = [
'pipebuffer/pb_bufmgr_pool.c',
'pipebuffer/pb_bufmgr_slab.c',
'pipebuffer/pb_validate.c',
+ 'rbug/rbug_connection.c',
+ 'rbug/rbug_context.c',
'rbug/rbug_core.c',
+ 'rbug/rbug_demarshal.c',
'rbug/rbug_shader.c',
- 'rbug/rbug_context.c',
'rbug/rbug_texture.c',
- 'rbug/rbug_demarshal.c',
- 'rbug/rbug_connection.c',
'rtasm/rtasm_cpu.c',
'rtasm/rtasm_execmem.c',
- 'rtasm/rtasm_x86sse.c',
'rtasm/rtasm_ppc.c',
'rtasm/rtasm_ppc_spe.c',
+ 'rtasm/rtasm_x86sse.c',
'tgsi/tgsi_build.c',
'tgsi/tgsi_dump.c',
'tgsi/tgsi_exec.c',
'tgsi/tgsi_info.c',
'tgsi/tgsi_iterate.c',
'tgsi/tgsi_parse.c',
+ 'tgsi/tgsi_ppc.c',
'tgsi/tgsi_sanity.c',
'tgsi/tgsi_scan.c',
- 'tgsi/tgsi_ppc.c',
'tgsi/tgsi_sse2.c',
'tgsi/tgsi_text.c',
'tgsi/tgsi_transform.c',
'tgsi/tgsi_ureg.c',
'tgsi/tgsi_util.c',
- 'translate/translate_generic.c',
- 'translate/translate_sse.c',
'translate/translate.c',
'translate/translate_cache.c',
+ 'translate/translate_generic.c',
+ 'translate/translate_sse.c',
'util/u_bitmask.c',
'util/u_blit.c',
'util/u_blitter.c',
'util/u_cache.c',
+ 'util/u_caps.c',
'util/u_cpu_detect.c',
'util/u_debug.c',
+ 'util/u_debug_describe.c',
'util/u_debug_memory.c',
+ 'util/u_debug_refcnt.c',
'util/u_debug_stack.c',
'util/u_debug_symbol.c',
'util/u_dump_defines.c',
@@ -161,14 +165,19 @@ source = [
'util/u_format_tests.c',
'util/u_format_yuv.c',
'util/u_format_zs.c',
+ 'util/u_framebuffer.c',
'util/u_gen_mipmap.c',
'util/u_half.c',
'util/u_handle_table.c',
'util/u_hash.c',
'util/u_hash_table.c',
+ 'util/u_index_modify.c',
'util/u_keymap.c',
+ 'util/u_linear.c',
+ 'util/u_linkage.c',
'util/u_network.c',
'util/u_math.c',
+ 'util/u_mempool.c',
'util/u_mm.c',
'util/u_rect.c',
'util/u_resource.c',
@@ -176,6 +185,7 @@ source = [
'util/u_sampler.c',
'util/u_simple_shaders.c',
'util/u_snprintf.c',
+ 'util/u_staging.c',
'util/u_surface.c',
'util/u_surfaces.c',
'util/u_texture.c',
@@ -192,26 +202,36 @@ source = [
if env['llvm']:
source += [
'gallivm/lp_bld_arit.c',
+ 'gallivm/lp_bld_assert.c',
+ 'gallivm/lp_bld_bitarit.c',
'gallivm/lp_bld_const.c',
'gallivm/lp_bld_conv.c',
'gallivm/lp_bld_debug.c',
'gallivm/lp_bld_flow.c',
'gallivm/lp_bld_format_aos.c',
'gallivm/lp_bld_format_soa.c',
+ 'gallivm/lp_bld_format_yuv.c',
+ 'gallivm/lp_bld_gather.c',
+ 'gallivm/lp_bld_init.c',
'gallivm/lp_bld_intr.c',
'gallivm/lp_bld_logic.c',
- 'gallivm/lp_bld_init.c',
+ 'gallivm/lp_bld_misc.cpp',
'gallivm/lp_bld_pack.c',
'gallivm/lp_bld_printf.c',
+ 'gallivm/lp_bld_quad.c',
'gallivm/lp_bld_sample.c',
+ 'gallivm/lp_bld_sample_aos.c',
'gallivm/lp_bld_sample_soa.c',
'gallivm/lp_bld_struct.c',
'gallivm/lp_bld_swizzle.c',
+ 'gallivm/lp_bld_tgsi_aos.c',
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
+ 'draw/draw_llvm_sample.c',
+ 'draw/draw_llvm_translate.c',
'draw/draw_pt_fetch_shade_pipeline_llvm.c',
- 'draw/draw_llvm_translate.c'
+ 'draw/draw_vs_llvm.c'
]
gallium = env.ConvenienceLibrary(
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 6d0b4207986..58b022d531d 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -36,6 +36,7 @@
*/
#include "pipe/p_state.h"
+#include "util/u_framebuffer.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -98,14 +99,11 @@ struct cso_context {
struct pipe_framebuffer_state fb, fb_saved;
struct pipe_viewport_state vp, vp_saved;
struct pipe_blend_color blend_color;
+ unsigned sample_mask;
struct pipe_stencil_ref stencil_ref, stencil_ref_saved;
};
-static void
-free_framebuffer_state(struct pipe_framebuffer_state *fb);
-
-
static boolean delete_blend_state(struct cso_context *ctx, void *state)
{
struct cso_blend *cso = (struct cso_blend *)state;
@@ -291,6 +289,9 @@ void cso_release_all( struct cso_context *ctx )
ctx->pipe->bind_fs_state( ctx->pipe, NULL );
ctx->pipe->bind_vs_state( ctx->pipe, NULL );
ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
+ ctx->pipe->set_fragment_sampler_views(ctx->pipe, 0, NULL);
+ if (ctx->pipe->set_vertex_sampler_views)
+ ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL);
}
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
@@ -303,8 +304,8 @@ void cso_release_all( struct cso_context *ctx )
pipe_sampler_view_reference(&ctx->vertex_sampler_views_saved[i], NULL);
}
- free_framebuffer_state(&ctx->fb);
- free_framebuffer_state(&ctx->fb_saved);
+ util_unreference_framebuffer_state(&ctx->fb);
+ util_unreference_framebuffer_state(&ctx->fb_saved);
if (ctx->cache) {
cso_cache_delete( ctx->cache );
@@ -313,10 +314,13 @@ void cso_release_all( struct cso_context *ctx )
}
+/**
+ * Free the CSO context. NOTE: the state tracker should have previously called
+ * cso_release_all().
+ */
void cso_destroy_context( struct cso_context *ctx )
{
if (ctx) {
- /*cso_release_all( ctx );*/
FREE( ctx );
}
}
@@ -893,42 +897,11 @@ void cso_restore_vertex_shader(struct cso_context *ctx)
}
-/**
- * Copy framebuffer state from src to dst with refcounting of surfaces.
- */
-static void
-copy_framebuffer_state(struct pipe_framebuffer_state *dst,
- const struct pipe_framebuffer_state *src)
-{
- uint i;
-
- dst->width = src->width;
- dst->height = src->height;
- dst->nr_cbufs = src->nr_cbufs;
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
- pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
- }
- pipe_surface_reference(&dst->zsbuf, src->zsbuf);
-}
-
-
-static void
-free_framebuffer_state(struct pipe_framebuffer_state *fb)
-{
- uint i;
-
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
- pipe_surface_reference(&fb->cbufs[i], NULL);
- }
- pipe_surface_reference(&fb->zsbuf, NULL);
-}
-
-
enum pipe_error cso_set_framebuffer(struct cso_context *ctx,
const struct pipe_framebuffer_state *fb)
{
if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) {
- copy_framebuffer_state(&ctx->fb, fb);
+ util_copy_framebuffer_state(&ctx->fb, fb);
ctx->pipe->set_framebuffer_state(ctx->pipe, fb);
}
return PIPE_OK;
@@ -936,15 +909,15 @@ enum pipe_error cso_set_framebuffer(struct cso_context *ctx,
void cso_save_framebuffer(struct cso_context *ctx)
{
- copy_framebuffer_state(&ctx->fb_saved, &ctx->fb);
+ util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb);
}
void cso_restore_framebuffer(struct cso_context *ctx)
{
if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) {
- copy_framebuffer_state(&ctx->fb, &ctx->fb_saved);
+ util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved);
ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb);
- free_framebuffer_state(&ctx->fb_saved);
+ util_unreference_framebuffer_state(&ctx->fb_saved);
}
}
@@ -984,6 +957,16 @@ enum pipe_error cso_set_blend_color(struct cso_context *ctx,
return PIPE_OK;
}
+enum pipe_error cso_set_sample_mask(struct cso_context *ctx,
+ unsigned sample_mask)
+{
+ if (ctx->sample_mask != sample_mask) {
+ ctx->sample_mask = sample_mask;
+ ctx->pipe->set_sample_mask(ctx->pipe, sample_mask);
+ }
+ return PIPE_OK;
+}
+
enum pipe_error cso_set_stencil_ref(struct cso_context *ctx,
const struct pipe_stencil_ref *sr)
{
@@ -1049,6 +1032,7 @@ static INLINE void
clip_state_cpy(struct pipe_clip_state *dst,
const struct pipe_clip_state *src)
{
+ dst->depth_clamp = src->depth_clamp;
dst->nr = src->nr;
if (src->nr) {
memcpy(dst->ucp, src->ucp, src->nr * sizeof(src->ucp[0]));
@@ -1059,6 +1043,9 @@ static INLINE int
clip_state_cmp(const struct pipe_clip_state *a,
const struct pipe_clip_state *b)
{
+ if (a->depth_clamp != b->depth_clamp) {
+ return 1;
+ }
if (a->nr != b->nr) {
return 1;
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index d6bcb1fe8f7..f0b07f73765 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -159,6 +159,8 @@ void cso_restore_viewport(struct cso_context *cso);
enum pipe_error cso_set_blend_color(struct cso_context *cso,
const struct pipe_blend_color *bc);
+enum pipe_error cso_set_sample_mask(struct cso_context *cso,
+ unsigned stencil_mask);
enum pipe_error cso_set_stencil_ref(struct cso_context *cso,
const struct pipe_stencil_ref *sr);
diff --git a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
new file mode 100644
index 00000000000..958ed20dc84
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
@@ -0,0 +1,114 @@
+/**************************************************************************
+ *
+ * Copyright 2010, VMware, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
+ struct draw_vertex_info *info )
+{
+ struct vertex_header *out = info->verts;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ /* const */ float (*plane)[4] = pvs->draw->plane;
+ const unsigned pos = draw_current_shader_position_output(pvs->draw);
+ const unsigned ef = pvs->draw->vs.edgeflag_output;
+ const unsigned nr = pvs->draw->nr_planes;
+ const unsigned flags = (FLAGS);
+ unsigned need_pipeline = 0;
+ unsigned j;
+
+ for (j = 0; j < info->count; j++) {
+ float *position = out->data[pos];
+ unsigned mask = 0x0;
+
+ initialize_vertex_header(out);
+
+ if (flags & (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_HALF_Z | DO_CLIP_USER)) {
+ out->clip[0] = position[0];
+ out->clip[1] = position[1];
+ out->clip[2] = position[2];
+ out->clip[3] = position[3];
+
+ /* Do the hardwired planes first:
+ */
+ if (flags & DO_CLIP_XY) {
+ if (-position[0] + position[3] < 0) mask |= (1<<0);
+ if ( position[0] + position[3] < 0) mask |= (1<<1);
+ if (-position[1] + position[3] < 0) mask |= (1<<2);
+ if ( position[1] + position[3] < 0) mask |= (1<<3);
+ }
+
+ /* Clip Z planes according to full cube, half cube or none.
+ */
+ if (flags & DO_CLIP_FULL_Z) {
+ if ( position[2] + position[3] < 0) mask |= (1<<4);
+ if (-position[2] + position[3] < 0) mask |= (1<<5);
+ }
+ else if (flags & DO_CLIP_HALF_Z) {
+ if ( position[2] < 0) mask |= (1<<4);
+ if (-position[2] + position[3] < 0) mask |= (1<<5);
+ }
+
+ if (flags & DO_CLIP_USER) {
+ unsigned i;
+ for (i = 6; i < nr; i++) {
+ if (dot4(position, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+ }
+
+ out->clipmask = mask;
+ need_pipeline |= out->clipmask;
+ }
+
+ if ((flags & DO_VIEWPORT) && mask == 0)
+ {
+ /* divide by w */
+ float w = 1.0f / position[3];
+
+ /* Viewport mapping */
+ position[0] = position[0] * w * scale[0] + trans[0];
+ position[1] = position[1] * w * scale[1] + trans[1];
+ position[2] = position[2] * w * scale[2] + trans[2];
+ position[3] = w;
+ }
+
+ if ((flags & DO_EDGEFLAG) && ef) {
+ const float *edgeflag = out->data[ef];
+ out->edgeflag = !(edgeflag[0] != 1.0f);
+ need_pipeline |= !out->edgeflag;
+ }
+
+ out = (struct vertex_header *)( (char *)out + info->stride );
+ }
+
+ return need_pipeline != 0;
+}
+
+
+#undef FLAGS
+#undef TAG
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 02abddf1491..032fcbbc70a 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -34,12 +34,33 @@
#include "pipe/p_context.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "draw_gs.h"
#if HAVE_LLVM
#include "gallivm/lp_bld_init.h"
+#include "draw_llvm.h"
+
+static boolean
+draw_get_option_use_llvm(void)
+{
+ static boolean first = TRUE;
+ static boolean value;
+ if (first) {
+ first = FALSE;
+ value = debug_get_bool_option("DRAW_USE_LLVM", TRUE);
+
+#ifdef PIPE_ARCH_X86
+ util_cpu_detect();
+ /* require SSE2 due to LLVM PR6960. */
+ if (!util_cpu_caps.has_sse2)
+ value = FALSE;
+#endif
+ }
+ return value;
+}
#endif
struct draw_context *draw_create( struct pipe_context *pipe )
@@ -49,9 +70,13 @@ struct draw_context *draw_create( struct pipe_context *pipe )
goto fail;
#if HAVE_LLVM
- lp_build_init();
- assert(lp_build_engine);
- draw->engine = lp_build_engine;
+ if(draw_get_option_use_llvm())
+ {
+ lp_build_init();
+ assert(lp_build_engine);
+ draw->engine = lp_build_engine;
+ draw->llvm = draw_llvm_create(draw);
+ }
#endif
if (!draw_init(draw))
@@ -81,6 +106,8 @@ boolean draw_init(struct draw_context *draw)
ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */
ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
draw->nr_planes = 6;
+ draw->clip_xy = 1;
+ draw->clip_z = 1;
draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
@@ -132,6 +159,10 @@ void draw_destroy( struct draw_context *draw )
draw_pt_destroy( draw );
draw_vs_destroy( draw );
draw_gs_destroy( draw );
+#ifdef HAVE_LLVM
+ if(draw->llvm)
+ draw_llvm_destroy( draw->llvm );
+#endif
FREE( draw );
}
@@ -157,6 +188,14 @@ void draw_set_mrd(struct draw_context *draw, double mrd)
}
+static void update_clip_flags( struct draw_context *draw )
+{
+ draw->clip_xy = !draw->driver.bypass_clip_xy;
+ draw->clip_z = (!draw->driver.bypass_clip_z &&
+ !draw->depth_clamp);
+ draw->clip_user = (draw->nr_planes > 6);
+}
+
/**
* Register new primitive rasterization/rendering state.
* This causes the drawing pipeline to be rebuilt.
@@ -171,18 +210,25 @@ void draw_set_rasterizer_state( struct draw_context *draw,
draw->rasterizer = raster;
draw->rast_handle = rast_handle;
- draw->bypass_clipping = draw->driver.bypass_clipping;
- }
+ }
}
-
+/* With a little more work, llvmpipe will be able to turn this off and
+ * do its own x/y clipping.
+ *
+ * Some hardware can turn off clipping altogether - in particular any
+ * hardware with a TNL unit can do its own clipping, even if it is
+ * relying on the draw module for some other reason.
+ */
void draw_set_driver_clipping( struct draw_context *draw,
- boolean bypass_clipping )
+ boolean bypass_clip_xy,
+ boolean bypass_clip_z )
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->driver.bypass_clipping = bypass_clipping;
- draw->bypass_clipping = draw->driver.bypass_clipping;
+ draw->driver.bypass_clip_xy = bypass_clip_xy;
+ draw->driver.bypass_clip_z = bypass_clip_z;
+ update_clip_flags(draw);
}
@@ -211,6 +257,9 @@ void draw_set_clip_state( struct draw_context *draw,
assert(clip->nr <= PIPE_MAX_CLIP_PLANES);
memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
draw->nr_planes = 6 + clip->nr;
+ draw->depth_clamp = clip->depth_clamp;
+
+ update_clip_flags(draw);
}
@@ -282,12 +331,19 @@ draw_set_mapped_constant_buffer(struct draw_context *draw,
shader_type == PIPE_SHADER_GEOMETRY);
debug_assert(slot < PIPE_MAX_CONSTANT_BUFFERS);
- if (shader_type == PIPE_SHADER_VERTEX) {
+ switch (shader_type) {
+ case PIPE_SHADER_VERTEX:
draw->pt.user.vs_constants[slot] = buffer;
+ draw->pt.user.vs_constants_size[slot] = size;
draw_vs_set_constants(draw, slot, buffer, size);
- } else if (shader_type == PIPE_SHADER_GEOMETRY) {
+ break;
+ case PIPE_SHADER_GEOMETRY:
draw->pt.user.gs_constants[slot] = buffer;
+ draw->pt.user.gs_constants_size[slot] = size;
draw_gs_set_constants(draw, slot, buffer, size);
+ break;
+ default:
+ assert(0 && "invalid shader type in draw_set_mapped_constant_buffer");
}
}
@@ -357,6 +413,42 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable )
}
+
+/**
+ * Allocate an extra vertex/geometry shader vertex attribute.
+ * This is used by some of the optional draw module stages such
+ * as wide_point which may need to allocate additional generic/texcoord
+ * attributes.
+ */
+int
+draw_alloc_extra_vertex_attrib(struct draw_context *draw,
+ uint semantic_name, uint semantic_index)
+{
+ const int num_outputs = draw_current_shader_outputs(draw);
+ const int n = draw->extra_shader_outputs.num;
+
+ assert(n < Elements(draw->extra_shader_outputs.semantic_name));
+
+ draw->extra_shader_outputs.semantic_name[n] = semantic_name;
+ draw->extra_shader_outputs.semantic_index[n] = semantic_index;
+ draw->extra_shader_outputs.slot[n] = num_outputs + n;
+ draw->extra_shader_outputs.num++;
+
+ return draw->extra_shader_outputs.slot[n];
+}
+
+
+/**
+ * Remove all extra vertex attributes that were allocated with
+ * draw_alloc_extra_vertex_attrib().
+ */
+void
+draw_remove_extra_vertex_attribs(struct draw_context *draw)
+{
+ draw->extra_shader_outputs.num = 0;
+}
+
+
/**
* Ask the draw module for the location/slot of the given vertex attribute in
* a post-transformed vertex.
@@ -390,12 +482,12 @@ draw_find_shader_output(const struct draw_context *draw,
return i;
}
- /* XXX there may be more than one extra vertex attrib.
- * For example, simulated gl_FragCoord and gl_PointCoord.
- */
- if (draw->extra_shader_outputs.semantic_name == semantic_name &&
- draw->extra_shader_outputs.semantic_index == semantic_index) {
- return draw->extra_shader_outputs.slot;
+ /* Search the extra vertex attributes */
+ for (i = 0; i < draw->extra_shader_outputs.num; i++) {
+ if (draw->extra_shader_outputs.semantic_name[i] == semantic_name &&
+ draw->extra_shader_outputs.semantic_index[i] == semantic_index) {
+ return draw->extra_shader_outputs.slot[i];
+ }
}
return 0;
@@ -414,16 +506,18 @@ draw_find_shader_output(const struct draw_context *draw,
uint
draw_num_shader_outputs(const struct draw_context *draw)
{
- uint count = draw->vs.vertex_shader->info.num_outputs;
+ uint count;
/* If a geometry shader is present, its outputs go to the
* driver, else the vertex shader's outputs.
*/
if (draw->gs.geometry_shader)
count = draw->gs.geometry_shader->info.num_outputs;
+ else
+ count = draw->vs.vertex_shader->info.num_outputs;
+
+ count += draw->extra_shader_outputs.num;
- if (draw->extra_shader_outputs.slot > 0)
- count++;
return count;
}
@@ -435,13 +529,18 @@ draw_num_shader_outputs(const struct draw_context *draw)
*/
void
draw_texture_samplers(struct draw_context *draw,
+ uint shader,
uint num_samplers,
struct tgsi_sampler **samplers)
{
- draw->vs.num_samplers = num_samplers;
- draw->vs.samplers = samplers;
- draw->gs.num_samplers = num_samplers;
- draw->gs.samplers = samplers;
+ if (shader == PIPE_SHADER_VERTEX) {
+ draw->vs.num_samplers = num_samplers;
+ draw->vs.samplers = samplers;
+ } else {
+ debug_assert(shader == PIPE_SHADER_GEOMETRY);
+ draw->gs.num_samplers = num_samplers;
+ draw->gs.samplers = samplers;
+ }
}
@@ -454,47 +553,28 @@ void draw_set_render( struct draw_context *draw,
}
-
-/**
- * Tell the drawing context about the index/element buffer to use
- * (ala glDrawElements)
- * If no element buffer is to be used (i.e. glDrawArrays) then this
- * should be called with eltSize=0 and elements=NULL.
- *
- * \param draw the drawing context
- * \param eltSize size of each element (1, 2 or 4 bytes)
- * \param elements the element buffer ptr
- */
void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- unsigned min_index,
- unsigned max_index,
- const void *elements )
+draw_set_index_buffer(struct draw_context *draw,
+ const struct pipe_index_buffer *ib)
{
- draw->pt.user.elts = elements;
- draw->pt.user.eltSize = eltSize;
- draw->pt.user.eltBias = eltBias;
- draw->pt.user.min_index = min_index;
- draw->pt.user.max_index = max_index;
+ if (ib)
+ memcpy(&draw->pt.index_buffer, ib, sizeof(draw->pt.index_buffer));
+ else
+ memset(&draw->pt.index_buffer, 0, sizeof(draw->pt.index_buffer));
}
+/**
+ * Tell drawing context where to find mapped index/element buffer.
+ */
void
-draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- const void *elements )
+draw_set_mapped_index_buffer(struct draw_context *draw,
+ const void *elements)
{
- draw->pt.user.elts = elements;
- draw->pt.user.eltSize = eltSize;
- draw->pt.user.eltBias = eltBias;
- draw->pt.user.min_index = 0;
- draw->pt.user.max_index = 0xffffffff;
+ draw->pt.user.elts = elements;
}
-
+
/* Revamp me please:
*/
void draw_do_flush( struct draw_context *draw, unsigned flags )
@@ -566,7 +646,7 @@ draw_get_rasterizer_no_cull( struct draw_context *draw,
memset(&rast, 0, sizeof(rast));
rast.scissor = scissor;
rast.flatshade = flatshade;
- rast.front_winding = PIPE_WINDING_CCW;
+ rast.front_ccw = 1;
rast.gl_rasterization_rules = draw->rasterizer->gl_rasterization_rules;
draw->rasterizer_no_cull[scissor][flatshade] =
@@ -574,3 +654,82 @@ draw_get_rasterizer_no_cull( struct draw_context *draw,
}
return draw->rasterizer_no_cull[scissor][flatshade];
}
+
+void
+draw_set_mapped_so_buffers(struct draw_context *draw,
+ void *buffers[PIPE_MAX_SO_BUFFERS],
+ unsigned num_buffers)
+{
+ int i;
+
+ for (i = 0; i < num_buffers; ++i) {
+ draw->so.buffers[i] = buffers[i];
+ }
+ draw->so.num_buffers = num_buffers;
+}
+
+void
+draw_set_so_state(struct draw_context *draw,
+ struct pipe_stream_output_state *state)
+{
+ memcpy(&draw->so.state,
+ state,
+ sizeof(struct pipe_stream_output_state));
+}
+
+void
+draw_set_sampler_views(struct draw_context *draw,
+ struct pipe_sampler_view **views,
+ unsigned num)
+{
+ unsigned i;
+
+ debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ for (i = 0; i < num; ++i)
+ draw->sampler_views[i] = views[i];
+ for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ draw->sampler_views[i] = NULL;
+
+ draw->num_sampler_views = num;
+}
+
+void
+draw_set_samplers(struct draw_context *draw,
+ struct pipe_sampler_state **samplers,
+ unsigned num)
+{
+ unsigned i;
+
+ debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ for (i = 0; i < num; ++i)
+ draw->samplers[i] = samplers[i];
+ for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ draw->samplers[i] = NULL;
+
+ draw->num_samplers = num;
+
+#ifdef HAVE_LLVM
+ if (draw->llvm)
+ draw_llvm_set_sampler_state(draw);
+#endif
+}
+
+void
+draw_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS])
+{
+#ifdef HAVE_LLVM
+ if(draw->llvm)
+ draw_llvm_set_mapped_texture(draw,
+ sampler_idx,
+ width, height, depth, last_level,
+ row_stride, img_stride, data);
+#endif
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index b905c2f2da6..1f27cbf488a 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -39,19 +39,24 @@
#include "pipe/p_state.h"
+#include "tgsi/tgsi_exec.h"
struct pipe_context;
struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
struct draw_geometry_shader;
+struct draw_fragment_shader;
struct tgsi_sampler;
+#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
struct draw_context *draw_create( struct pipe_context *pipe );
void draw_destroy( struct draw_context *draw );
+void draw_flush(struct draw_context *draw);
+
void draw_set_viewport_state( struct draw_context *draw,
const struct pipe_viewport_state *viewport );
@@ -97,9 +102,27 @@ draw_num_shader_outputs(const struct draw_context *draw);
void
draw_texture_samplers(struct draw_context *draw,
+ uint shader_type,
uint num_samplers,
struct tgsi_sampler **samplers);
+void
+draw_set_sampler_views(struct draw_context *draw,
+ struct pipe_sampler_view **views,
+ unsigned num);
+void
+draw_set_samplers(struct draw_context *draw,
+ struct pipe_sampler_state **samplers,
+ unsigned num);
+
+void
+draw_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS]);
/*
@@ -116,6 +139,17 @@ void draw_delete_vertex_shader(struct draw_context *draw,
/*
+ * Fragment shader functions
+ */
+struct draw_fragment_shader *
+draw_create_fragment_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader);
+void draw_bind_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dvs);
+void draw_delete_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dvs);
+
+/*
* Geometry shader functions
*/
struct draw_geometry_shader *
@@ -139,18 +173,11 @@ void draw_set_vertex_elements(struct draw_context *draw,
unsigned count,
const struct pipe_vertex_element *elements);
-void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- unsigned min_index,
- unsigned max_index,
- const void *elements );
-
-void draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- const void *elements );
+void draw_set_index_buffer(struct draw_context *draw,
+ const struct pipe_index_buffer *ib);
+
+void draw_set_mapped_index_buffer(struct draw_context *draw,
+ const void *elements);
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer);
@@ -162,11 +189,22 @@ draw_set_mapped_constant_buffer(struct draw_context *draw,
const void *buffer,
unsigned size);
+void
+draw_set_mapped_so_buffers(struct draw_context *draw,
+ void *buffers[PIPE_MAX_SO_BUFFERS],
+ unsigned num_buffers);
+void
+draw_set_so_state(struct draw_context *draw,
+ struct pipe_stream_output_state *state);
+
/***********************************************************************
- * draw_prim.c
+ * draw_pt.c
*/
+void draw_vbo(struct draw_context *draw,
+ const struct pipe_draw_info *info);
+
void draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count);
@@ -178,8 +216,6 @@ draw_arrays_instanced(struct draw_context *draw,
unsigned startInstance,
unsigned instanceCount);
-void draw_flush(struct draw_context *draw);
-
/*******************************************************************************
* Driver backend interface
@@ -189,7 +225,8 @@ void draw_set_render( struct draw_context *draw,
struct vbuf_render *render );
void draw_set_driver_clipping( struct draw_context *draw,
- boolean bypass_clipping );
+ boolean bypass_clip_xy,
+ boolean bypass_clip_z );
void draw_set_force_passthrough( struct draw_context *draw,
boolean enable );
@@ -201,4 +238,16 @@ boolean draw_need_pipeline(const struct draw_context *draw,
const struct pipe_rasterizer_state *rasterizer,
unsigned prim );
+static INLINE int
+draw_get_shader_param(unsigned shader, enum pipe_cap param)
+{
+ switch(shader) {
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ return tgsi_exec_get_shader_param(param);
+ default:
+ return 0;
+ }
+}
+
#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
new file mode 100644
index 00000000000..a142563af97
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
@@ -0,0 +1,431 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ * Chia-I Wu <[email protected]>
+ */
+
+/* these macros are optional */
+#ifndef LOCAL_VARS
+#define LOCAL_VARS
+#endif
+#ifndef FUNC_ENTER
+#define FUNC_ENTER do {} while (0)
+#endif
+#ifndef FUNC_EXIT
+#define FUNC_EXIT do {} while (0)
+#endif
+#ifndef LINE_ADJ
+#define LINE_ADJ(flags, a0, i0, i1, a1) LINE(flags, i0, i1)
+#endif
+#ifndef TRIANGLE_ADJ
+#define TRIANGLE_ADJ(flags, i0, a0, i1, a1, i2, a2) TRIANGLE(flags, i0, i1, i2)
+#endif
+
+static void
+FUNC(FUNC_VARS)
+{
+ unsigned idx[6], i;
+ ushort flags;
+ LOCAL_VARS
+
+ FUNC_ENTER;
+
+ /* prim, prim_flags, count, and last_vertex_last should have been defined */
+ if (0) {
+ debug_printf("%s: prim 0x%x, prim_flags 0x%x, count %d, last_vertex_last %d\n",
+ __FUNCTION__, prim, prim_flags, count, last_vertex_last);
+ }
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i++) {
+ idx[0] = GET_ELT(i);
+ POINT(idx[0]);
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 0; i + 1 < count; i += 2) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ LINE(flags, idx[0], idx[1]);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ case PIPE_PRIM_LINE_STRIP:
+ if (count >= 2) {
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
+ idx[1] = GET_ELT(0);
+ idx[2] = idx[1];
+
+ for (i = 1; i < count; i++, flags = 0) {
+ idx[0] = idx[1];
+ idx[1] = GET_ELT(i);
+ LINE(flags, idx[0], idx[1]);
+ }
+ /* close the loop */
+ if (prim == PIPE_PRIM_LINE_LOOP && !prim_flags)
+ LINE(flags, idx[1], idx[2]);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (i = 0; i + 2 < count; i += 3) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (count >= 3) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ idx[1] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+
+ if (last_vertex_last) {
+ for (i = 0; i + 2 < count; i++) {
+ idx[0] = idx[1];
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[2] last */
+ if (i & 1)
+ TRIANGLE(flags, idx[1], idx[0], idx[2]);
+ else
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ else {
+ for (i = 0; i + 2 < count; i++) {
+ idx[0] = idx[1];
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[0] first */
+ if (i & 1)
+ TRIANGLE(flags, idx[0], idx[2], idx[1]);
+ else
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ idx[0] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+
+ /* idx[0] is neither the first nor the last vertex */
+ if (last_vertex_last) {
+ for (i = 0; i + 2 < count; i++) {
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[2] last */
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ else {
+ for (i = 0; i + 2 < count; i++) {
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[1] first */
+ TRIANGLE(flags, idx[1], idx[2], idx[0]);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ if (last_vertex_last) {
+ for (i = 0; i + 3 < count; i += 4) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ /* always emit idx[3] last */
+ TRIANGLE(flags, idx[0], idx[1], idx[3]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ TRIANGLE(flags, idx[1], idx[2], idx[3]);
+ }
+ }
+ else {
+ for (i = 0; i + 3 < count; i += 4) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ /* XXX should always emit idx[0] first */
+ /* always emit idx[3] first */
+ TRIANGLE(flags, idx[3], idx[0], idx[1]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_1 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ TRIANGLE(flags, idx[3], idx[1], idx[2]);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ if (count >= 4) {
+ idx[2] = GET_ELT(0);
+ idx[3] = GET_ELT(1);
+
+ if (last_vertex_last) {
+ for (i = 0; i + 3 < count; i += 2) {
+ idx[0] = idx[2];
+ idx[1] = idx[3];
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ /* always emit idx[3] last */
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ TRIANGLE(flags, idx[2], idx[0], idx[3]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ TRIANGLE(flags, idx[0], idx[1], idx[3]);
+ }
+ }
+ else {
+ for (i = 0; i + 3 < count; i += 2) {
+ idx[0] = idx[2];
+ idx[1] = idx[3];
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ /* XXX should always emit idx[0] first */
+ /* always emit idx[3] first */
+ TRIANGLE(flags, idx[3], idx[2], idx[0]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_1 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ TRIANGLE(flags, idx[3], idx[0], idx[1]);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ if (count >= 3) {
+ ushort edge_next, edge_finish;
+
+ if (last_vertex_last) {
+ flags = (DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_2;
+
+ edge_next = DRAW_PIPE_EDGE_FLAG_0;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_1;
+ }
+ else {
+ flags = (DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_1);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_0;
+
+ edge_next = DRAW_PIPE_EDGE_FLAG_1;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_2;
+ }
+
+ idx[0] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+
+ for (i = 0; i + 2 < count; i++, flags = edge_next) {
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+
+ if (i + 3 == count)
+ flags |= edge_finish;
+
+ /* idx[0] is both the first and the last vertex */
+ if (last_vertex_last)
+ TRIANGLE(flags, idx[1], idx[2], idx[0]);
+ else
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_LINES_ADJACENCY:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 0; i + 3 < count; i += 4) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+ LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ if (count >= 4) {
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
+ idx[1] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+ idx[3] = GET_ELT(2);
+
+ for (i = 1; i + 2 < count; i++, flags = 0) {
+ idx[0] = idx[1];
+ idx[1] = idx[2];
+ idx[2] = idx[3];
+ idx[3] = GET_ELT(i + 2);
+ LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (i = 0; i + 5 < count; i += 6) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+ idx[4] = GET_ELT(i + 4);
+ idx[5] = GET_ELT(i + 5);
+ TRIANGLE_ADJ(flags, idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ if (count >= 6) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ idx[0] = GET_ELT(1);
+ idx[2] = GET_ELT(0);
+ idx[4] = GET_ELT(2);
+ idx[3] = GET_ELT(4);
+
+ /*
+ * The vertices of the i-th triangle are stored in
+ * idx[0,2,4] = { 2*i, 2*i+2, 2*i+4 };
+ *
+ * The adjacent vertices are stored in
+ * idx[1,3,5] = { 2*i-2, 2*i+6, 2*i+3 }.
+ *
+ * However, there are two exceptions:
+ *
+ * For the first triangle, idx[1] = 1;
+ * For the last triangle, idx[3] = 2*i+5.
+ */
+ if (last_vertex_last) {
+ for (i = 0; i + 5 < count; i += 2) {
+ idx[1] = idx[0];
+
+ idx[0] = idx[2];
+ idx[2] = idx[4];
+ idx[4] = idx[3];
+
+ idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5));
+ idx[5] = GET_ELT(i + 3);
+
+ /*
+ * alternate the first two vertices (idx[0] and idx[2]) and the
+ * corresponding adjacent vertices (idx[3] and idx[5]) to have
+ * the correct orientation
+ */
+ if (i & 2) {
+ TRIANGLE_ADJ(flags,
+ idx[2], idx[1], idx[0], idx[5], idx[4], idx[3]);
+ }
+ else {
+ TRIANGLE_ADJ(flags,
+ idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
+ }
+ }
+ }
+ else {
+ for (i = 0; i + 5 < count; i += 2) {
+ idx[1] = idx[0];
+
+ idx[0] = idx[2];
+ idx[2] = idx[4];
+ idx[4] = idx[3];
+
+ idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5));
+ idx[5] = GET_ELT(i + 3);
+
+ /*
+ * alternate the last two vertices (idx[2] and idx[4]) and the
+ * corresponding adjacent vertices (idx[1] and idx[5]) to have
+ * the correct orientation
+ */
+ if (i & 2) {
+ TRIANGLE_ADJ(flags,
+ idx[0], idx[5], idx[4], idx[3], idx[2], idx[1]);
+ }
+ else {
+ TRIANGLE_ADJ(flags,
+ idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ FUNC_EXIT;
+}
+
+#undef LOCAL_VARS
+#undef FUNC_ENTER
+#undef FUNC_EXIT
+#undef LINE_ADJ
+#undef TRIANGLE_ADJ
+
+#undef FUNC
+#undef FUNC_VARS
+#undef GET_ELT
+#undef POINT
+#undef LINE
+#undef TRIANGLE
diff --git a/src/gallium/auxiliary/draw/draw_fs.c b/src/gallium/auxiliary/draw/draw_fs.c
new file mode 100644
index 00000000000..1543bd86f17
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_fs.c
@@ -0,0 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "draw_fs.h"
+#include "draw_private.h"
+#include "draw_context.h"
+
+
+struct draw_fragment_shader *
+draw_create_fragment_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader)
+{
+ struct draw_fragment_shader *dfs;
+
+ dfs = CALLOC_STRUCT(draw_fragment_shader);
+ if (dfs) {
+ dfs->base = *shader;
+ tgsi_scan_shader(shader->tokens, &dfs->info);
+ }
+
+ return dfs;
+}
+
+
+void
+draw_bind_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dfs)
+{
+ draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
+
+ draw->fs.fragment_shader = dfs;
+}
+
+
+void
+draw_delete_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dfs)
+{
+ FREE(dfs);
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_fs.h b/src/gallium/auxiliary/draw/draw_fs.h
new file mode 100644
index 00000000000..44995b8277f
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_fs.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRAW_FS_H
+#define DRAW_FS_H
+
+
+#include "tgsi/tgsi_scan.h"
+
+
+struct draw_fragment_shader
+{
+ struct pipe_shader_state base;
+ struct tgsi_shader_info info;
+};
+
+
+#endif /* DRAW_FS_H */
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 131deed43e4..50a03ac95a5 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMWare Inc.
+ * Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -37,8 +37,8 @@
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
-#define MAX_PRIM_VERTICES 6
/* fixme: move it from here */
#define MAX_PRIMITIVES 64
@@ -75,6 +75,10 @@ draw_gs_set_constants(struct draw_context *draw,
const void *constants,
unsigned size)
{
+ /* noop. added here for symmetry with the VS
+ * code and in case we'll ever want to allign
+ * the constants, e.g. when we'll change to a
+ * different interpreter */
}
@@ -90,6 +94,7 @@ draw_create_geometry_shader(struct draw_context *draw,
if (!gs)
return NULL;
+ gs->draw = draw;
gs->state = *state;
gs->state.tokens = tgsi_dup_tokens(state->tokens);
if (!gs->state.tokens) {
@@ -112,7 +117,7 @@ draw_create_geometry_shader(struct draw_context *draw,
TGSI_PROPERTY_GS_OUTPUT_PRIM)
gs->output_primitive = gs->info.properties[i].data[0];
else if (gs->info.properties[i].name ==
- TGSI_PROPERTY_GS_MAX_VERTICES)
+ TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
gs->max_output_vertices = gs->info.properties[i].data[0];
}
@@ -154,128 +159,37 @@ void draw_delete_geometry_shader(struct draw_context *draw,
FREE(dgs);
}
-static INLINE int num_vertices_for_prim(int prim)
-{
- switch(prim) {
- case PIPE_PRIM_POINTS:
- return 1;
- case PIPE_PRIM_LINES:
- return 2;
- case PIPE_PRIM_LINE_LOOP:
- return 2;
- case PIPE_PRIM_LINE_STRIP:
- return 2;
- case PIPE_PRIM_TRIANGLES:
- return 3;
- case PIPE_PRIM_TRIANGLE_STRIP:
- return 3;
- case PIPE_PRIM_TRIANGLE_FAN:
- return 3;
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- return 4;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- return 6;
- default:
- assert(!"Bad geometry shader input");
- return 0;
- }
-}
-
-static void draw_fetch_geometry_input(struct draw_geometry_shader *shader,
- int start_primitive,
- int num_primitives,
- const float (*input_ptr)[4],
- unsigned input_vertex_stride,
- unsigned inputs_from_vs)
-{
- struct tgsi_exec_machine *machine = shader->machine;
- unsigned slot, vs_slot, k, j;
- unsigned num_vertices = num_vertices_for_prim(shader->input_primitive);
- int idx = 0;
-
- for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; slot++) {
- /*debug_printf("Slot = %d (semantic = %d)\n", slot,
- shader->info.input_semantic_name[slot]);*/
- if (shader->info.input_semantic_name[slot] ==
- TGSI_SEMANTIC_PRIMID) {
- for (j = 0; j < num_primitives; ++j) {
- machine->Inputs[idx].xyzw[0].f[j] = (float)start_primitive + j;
- machine->Inputs[idx].xyzw[1].f[j] = (float)start_primitive + j;
- machine->Inputs[idx].xyzw[2].f[j] = (float)start_primitive + j;
- machine->Inputs[idx].xyzw[3].f[j] = (float)start_primitive + j;
- }
- ++idx;
- } else {
- for (j = 0; j < num_primitives; ++j) {
- int vidx = idx;
- const float (*prim_ptr)[4];
- /*debug_printf(" %d) Prim (num_verts = %d)\n", start_primitive + j,
- num_vertices);*/
- prim_ptr = (const float (*)[4])(
- (const char *)input_ptr +
- (j * num_vertices * input_vertex_stride));
-
- for (k = 0; k < num_vertices; ++k, ++vidx) {
- const float (*input)[4];
- input = (const float (*)[4])(
- (const char *)prim_ptr + (k * input_vertex_stride));
- vidx = k * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
- /*debug_printf("\t%d)(%d) Input vert:\n", vidx, k);*/
-#if 1
- assert(!util_is_inf_or_nan(input[vs_slot][0]));
- assert(!util_is_inf_or_nan(input[vs_slot][1]));
- assert(!util_is_inf_or_nan(input[vs_slot][2]));
- assert(!util_is_inf_or_nan(input[vs_slot][3]));
-#endif
- machine->Inputs[vidx].xyzw[0].f[j] = input[vs_slot][0];
- machine->Inputs[vidx].xyzw[1].f[j] = input[vs_slot][1];
- machine->Inputs[vidx].xyzw[2].f[j] = input[vs_slot][2];
- machine->Inputs[vidx].xyzw[3].f[j] = input[vs_slot][3];
-#if 0
- debug_printf("\t\t%d %f %f %f %f\n", slot,
- machine->Inputs[vidx].xyzw[0].f[j],
- machine->Inputs[vidx].xyzw[1].f[j],
- machine->Inputs[vidx].xyzw[2].f[j],
- machine->Inputs[vidx].xyzw[3].f[j]);
-#endif
- }
- }
- ++vs_slot;
- idx += num_vertices;
- }
- }
-}
-
+/*#define DEBUG_OUTPUTS 1*/
static INLINE void
draw_geometry_fetch_outputs(struct draw_geometry_shader *shader,
int num_primitives,
- float (*output)[4],
- unsigned vertex_size)
+ float (**p_output)[4])
{
struct tgsi_exec_machine *machine = shader->machine;
unsigned prim_idx, j, slot;
+ float (*output)[4];
+
+ output = *p_output;
/* Unswizzle all output results.
*/
- /* FIXME: handle all the primitives produced by the gs, not just
- * the first one
- unsigned prim_count =
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];*/
+
for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
- unsigned num_verts_per_prim = machine->Primitives[0];
+ unsigned num_verts_per_prim = machine->Primitives[prim_idx];
+ shader->primitive_lengths[prim_idx + shader->emitted_primitives] =
+ machine->Primitives[prim_idx];
+ shader->emitted_vertices += num_verts_per_prim;
for (j = 0; j < num_verts_per_prim; j++) {
int idx = (prim_idx * num_verts_per_prim + j) *
shader->info.num_outputs;
#ifdef DEBUG_OUTPUTS
- debug_printf("%d) Output vert:\n", idx);
+ debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
#endif
for (slot = 0; slot < shader->info.num_outputs; slot++) {
- output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[prim_idx];
- output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[prim_idx];
- output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[prim_idx];
- output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[prim_idx];
+ output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0];
+ output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0];
+ output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0];
+ output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0];
#ifdef DEBUG_OUTPUTS
debug_printf("\t%d: %f %f %f %f\n", slot,
output[slot][0],
@@ -285,52 +199,276 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader,
#endif
debug_assert(!util_is_inf_or_nan(output[slot][0]));
}
- output = (float (*)[4])((char *)output + vertex_size);
+ output = (float (*)[4])((char *)output + shader->vertex_size);
}
}
+ *p_output = output;
+ shader->emitted_primitives += num_primitives;
}
-void draw_geometry_shader_run(struct draw_geometry_shader *shader,
- const float (*input)[4],
- float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
- unsigned count,
- unsigned input_stride,
- unsigned vertex_size)
+/*#define DEBUG_INPUTS 1*/
+static void draw_fetch_gs_input(struct draw_geometry_shader *shader,
+ unsigned *indices,
+ unsigned num_vertices,
+ unsigned prim_idx)
{
struct tgsi_exec_machine *machine = shader->machine;
- unsigned int i;
- unsigned num_vertices = num_vertices_for_prim(shader->input_primitive);
- unsigned num_primitives = count/num_vertices;
- unsigned inputs_from_vs = 0;
+ unsigned slot, vs_slot, i;
+ unsigned input_vertex_stride = shader->input_vertex_stride;
+ const float (*input_ptr)[4];
- for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- machine->Consts[i] = constants[i];
- }
+ input_ptr = shader->input;
- for (i = 0; i < shader->info.num_inputs; ++i) {
- if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID)
- ++inputs_from_vs;
+ for (i = 0; i < num_vertices; ++i) {
+ const float (*input)[4];
+#if DEBUG_INPUTS
+ debug_printf("%d) vertex index = %d (prim idx = %d)\n",
+ i, indices[i], prim_idx);
+#endif
+ input = (const float (*)[4])(
+ (const char *)input_ptr + (indices[i] * input_vertex_stride));
+ for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
+ unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
+ if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
+ machine->Inputs[idx].xyzw[0].f[prim_idx] =
+ (float)shader->in_prim_idx;
+ machine->Inputs[idx].xyzw[1].f[prim_idx] =
+ (float)shader->in_prim_idx;
+ machine->Inputs[idx].xyzw[2].f[prim_idx] =
+ (float)shader->in_prim_idx;
+ machine->Inputs[idx].xyzw[3].f[prim_idx] =
+ (float)shader->in_prim_idx;
+ } else {
+#if DEBUG_INPUTS
+ debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
+ slot, vs_slot, idx);
+#endif
+#if 1
+ assert(!util_is_inf_or_nan(input[vs_slot][0]));
+ assert(!util_is_inf_or_nan(input[vs_slot][1]));
+ assert(!util_is_inf_or_nan(input[vs_slot][2]));
+ assert(!util_is_inf_or_nan(input[vs_slot][3]));
+#endif
+ machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0];
+ machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1];
+ machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2];
+ machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3];
+#if DEBUG_INPUTS
+ debug_printf("\t\t%f %f %f %f\n",
+ machine->Inputs[idx].xyzw[0].f[prim_idx],
+ machine->Inputs[idx].xyzw[1].f[prim_idx],
+ machine->Inputs[idx].xyzw[2].f[prim_idx],
+ machine->Inputs[idx].xyzw[3].f[prim_idx]);
+#endif
+ ++vs_slot;
+ }
+ }
}
+}
- for (i = 0; i < num_primitives; ++i) {
- unsigned int max_primitives = 1;
+static void gs_flush(struct draw_geometry_shader *shader,
+ unsigned input_primitives)
+{
+ unsigned out_prim_count;
+ struct tgsi_exec_machine *machine = shader->machine;
+
+ debug_assert(input_primitives > 0 &&
+ input_primitives < 4);
- draw_fetch_geometry_input(shader, i, max_primitives, input,
- input_stride, inputs_from_vs);
+ tgsi_set_exec_mask(machine,
+ 1,
+ input_primitives > 1,
+ input_primitives > 2,
+ input_primitives > 3);
- tgsi_set_exec_mask(machine,
- 1,
- max_primitives > 1,
- max_primitives > 2,
- max_primitives > 3);
+ /* run interpreter */
+ tgsi_exec_machine_run(machine);
- /* run interpreter */
- tgsi_exec_machine_run(machine);
+ out_prim_count =
+ machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
+
+#if 0
+ debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
+ shader->emitted_primitives, shader->emitted_vertices,
+ out_prim_count);
+#endif
+ draw_geometry_fetch_outputs(shader, out_prim_count,
+ &shader->tmp_output);
+}
+
+static void gs_point(struct draw_geometry_shader *shader,
+ int idx)
+{
+ unsigned indices[1];
+
+ indices[0] = idx;
+
+ draw_fetch_gs_input(shader, indices, 1, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+static void gs_line(struct draw_geometry_shader *shader,
+ int i0, int i1)
+{
+ unsigned indices[2];
- draw_geometry_fetch_outputs(shader, max_primitives,
- output, vertex_size);
+ indices[0] = i0;
+ indices[1] = i1;
+
+ draw_fetch_gs_input(shader, indices, 2, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+static void gs_line_adj(struct draw_geometry_shader *shader,
+ int i0, int i1, int i2, int i3)
+{
+ unsigned indices[4];
+
+ indices[0] = i0;
+ indices[1] = i1;
+ indices[2] = i2;
+ indices[3] = i3;
+
+ draw_fetch_gs_input(shader, indices, 4, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+static void gs_tri(struct draw_geometry_shader *shader,
+ int i0, int i1, int i2)
+{
+ unsigned indices[3];
+
+ indices[0] = i0;
+ indices[1] = i1;
+ indices[2] = i2;
+
+ draw_fetch_gs_input(shader, indices, 3, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+static void gs_tri_adj(struct draw_geometry_shader *shader,
+ int i0, int i1, int i2,
+ int i3, int i4, int i5)
+{
+ unsigned indices[6];
+
+ indices[0] = i0;
+ indices[1] = i1;
+ indices[2] = i2;
+ indices[3] = i3;
+ indices[4] = i4;
+ indices[5] = i5;
+
+ draw_fetch_gs_input(shader, indices, 6, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+#define FUNC gs_run
+#define GET_ELT(idx) (idx)
+#include "draw_gs_tmp.h"
+
+
+#define FUNC gs_run_elts
+#define LOCAL_VARS const ushort *elts = input_prims->elts;
+#define GET_ELT(idx) (elts[idx])
+#include "draw_gs_tmp.h"
+
+
+/**
+ * Execute geometry shader using TGSI interpreter.
+ */
+int draw_geometry_shader_run(struct draw_geometry_shader *shader,
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
+ const struct draw_vertex_info *input_verts,
+ const struct draw_prim_info *input_prim,
+ struct draw_vertex_info *output_verts,
+ struct draw_prim_info *output_prims )
+{
+ const float (*input)[4] = (const float (*)[4])input_verts->verts->data;
+ unsigned input_stride = input_verts->vertex_size;
+ unsigned vertex_size = input_verts->vertex_size;
+ struct tgsi_exec_machine *machine = shader->machine;
+ unsigned num_input_verts = input_prim->linear ?
+ input_verts->count :
+ input_prim->count;
+ unsigned num_in_primitives =
+ MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts),
+ u_gs_prims_for_vertices(shader->input_primitive, num_input_verts));
+ unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive,
+ shader->max_output_vertices)
+ * num_in_primitives;
+
+ output_verts->vertex_size = input_verts->vertex_size;
+ output_verts->stride = input_verts->vertex_size;
+ output_verts->verts =
+ (struct vertex_header *)MALLOC(input_verts->vertex_size *
+ num_in_primitives *
+ shader->max_output_vertices);
+
+
+#if 0
+ debug_printf("%s count = %d (in prims # = %d)\n",
+ __FUNCTION__, num_input_verts, num_in_primitives);
+ debug_printf("\tlinear = %d, prim_info->count = %d\n",
+ input_prim->linear, input_prim->count);
+ debug_printf("\tprimt pipe = %d, shader in = %d, shader out = %d, max out = %d\n",
+ input_prim->prim, shader->input_primitive,
+ shader->output_primitive,
+ shader->max_output_vertices);
+#endif
+
+ shader->emitted_vertices = 0;
+ shader->emitted_primitives = 0;
+ shader->vertex_size = vertex_size;
+ shader->tmp_output = (float (*)[4])output_verts->verts->data;
+ shader->in_prim_idx = 0;
+ shader->input_vertex_stride = input_stride;
+ shader->input = input;
+ if (shader->primitive_lengths) {
+ FREE(shader->primitive_lengths);
}
+ shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
+
+ tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
+ constants, constants_size);
+
+ if (input_prim->linear)
+ gs_run(shader, input_prim, input_verts,
+ output_prims, output_verts);
+ else
+ gs_run_elts(shader, input_prim, input_verts,
+ output_prims, output_verts);
+
+ /* Update prim_info:
+ */
+ output_prims->linear = TRUE;
+ output_prims->elts = NULL;
+ output_prims->start = 0;
+ output_prims->count = shader->emitted_vertices;
+ output_prims->prim = shader->output_primitive;
+ output_prims->flags = 0x0;
+ output_prims->primitive_lengths = shader->primitive_lengths;
+ output_prims->primitive_count = shader->emitted_primitives;
+ output_verts->count = shader->emitted_vertices;
+
+#if 0
+ debug_printf("GS finished, prims = %d, verts = %d\n",
+ output_prims->primitive_count,
+ output_verts->count);
+#endif
+
+ return shader->emitted_vertices;
}
void draw_geometry_shader_delete(struct draw_geometry_shader *shader)
diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h
index d8eb2103433..67bc1aa73ff 100644
--- a/src/gallium/auxiliary/draw/draw_gs.h
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMWare Inc.
+ * Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -54,23 +54,37 @@ struct draw_geometry_shader {
unsigned input_primitive;
unsigned output_primitive;
- /* Extracted from shader:
- */
- const float (*immediates)[4];
+ unsigned *primitive_lengths;
+ unsigned emitted_vertices;
+ unsigned emitted_primitives;
+
+ float (*tmp_output)[4];
+ unsigned vertex_size;
+
+ unsigned in_prim_idx;
+ unsigned input_vertex_stride;
+ const float (*input)[4];
};
-void draw_geometry_shader_run(struct draw_geometry_shader *shader,
- const float (*input)[4],
- float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
- unsigned count,
- unsigned input_stride,
- unsigned output_stride);
+/*
+ * Returns the number of vertices emitted.
+ * The vertex shader can emit any number of vertices as long as it's
+ * smaller than the GS_MAX_OUTPUT_VERTICES shader property.
+ */
+int draw_geometry_shader_run(struct draw_geometry_shader *shader,
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
+ const struct draw_vertex_info *input_verts,
+ const struct draw_prim_info *input_prim,
+ struct draw_vertex_info *output_verts,
+ struct draw_prim_info *output_prims );
void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
struct draw_context *draw);
void draw_geometry_shader_delete(struct draw_geometry_shader *shader);
+int draw_gs_max_output_vertices(struct draw_geometry_shader *shader,
+ unsigned pipe_prim);
#endif
diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h
new file mode 100644
index 00000000000..de7b02655a5
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h
@@ -0,0 +1,32 @@
+#define FUNC_VARS struct draw_geometry_shader *gs, \
+ const struct draw_prim_info *input_prims, \
+ const struct draw_vertex_info *input_verts, \
+ struct draw_prim_info *output_prims, \
+ struct draw_vertex_info *output_verts
+
+#define FUNC_ENTER \
+ /* declare more local vars */ \
+ const unsigned prim = input_prims->prim; \
+ const unsigned prim_flags = input_prims->flags; \
+ const unsigned count = input_prims->count; \
+ const boolean last_vertex_last = TRUE; \
+ do { \
+ debug_assert(input_prims->primitive_count == 1); \
+ switch (prim) { \
+ case PIPE_PRIM_QUADS: \
+ case PIPE_PRIM_QUAD_STRIP: \
+ case PIPE_PRIM_POLYGON: \
+ debug_assert(!"unexpected primitive type in GS"); \
+ return; \
+ default: \
+ break; \
+ } \
+ } while (0) \
+
+#define POINT(i0) gs_point(gs,i0)
+#define LINE(flags,i0,i1) gs_line(gs,i0,i1)
+#define TRIANGLE(flags,i0,i1,i2) gs_tri(gs,i0,i1,i2)
+#define LINE_ADJ(flags,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3)
+#define TRIANGLE_ADJ(flags,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5)
+
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 27383221b9b..7fb86d7cb27 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1,3 +1,30 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#include "draw_llvm.h"
#include "draw_context.h"
@@ -10,17 +37,21 @@
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_tgsi.h"
#include "gallivm/lp_bld_printf.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_init.h"
#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
#include "util/u_cpu_detect.h"
+#include "util/u_math.h"
+#include "util/u_pointer.h"
#include "util/u_string.h"
#include <llvm-c/Transforms/Scalar.h>
#define DEBUG_STORE 0
-
/* generates the draw jit function */
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
@@ -34,12 +65,24 @@ init_globals(struct draw_llvm *llvm)
/* struct draw_jit_texture */
{
- LLVMTypeRef elem_types[4];
+ LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
- elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
- elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
+ elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
+ LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
+ LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ elem_types[DRAW_JIT_TEXTURE_DATA] =
+ LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
+ DRAW_MAX_TEXTURE_LEVELS);
+ elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
+ elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
+ elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
+ elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
+ LLVMArrayType(LLVMFloatType(), 4);
texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -49,12 +92,33 @@ init_globals(struct draw_llvm *llvm)
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_HEIGHT);
- LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_DEPTH);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_LAST_LEVEL);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_ROW_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
llvm->target, texture_type,
- DRAW_JIT_TEXTURE_STRIDE);
+ DRAW_JIT_TEXTURE_IMG_STRIDE);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_DATA);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_MIN_LOD);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_MAX_LOD);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_LOD_BIAS);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_BORDER_COLOR);
LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
llvm->target, texture_type);
@@ -69,7 +133,8 @@ init_globals(struct draw_llvm *llvm)
elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
- elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+ elem_types[2] = LLVMArrayType(texture_type,
+ PIPE_MAX_VERTEX_SAMPLERS); /* textures */
context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -79,7 +144,7 @@ init_globals(struct draw_llvm *llvm)
llvm->target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
llvm->target, context_type,
- DRAW_JIT_CONTEXT_TEXTURES_INDEX);
+ DRAW_JIT_CTX_TEXTURES);
LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
llvm->target, context_type);
@@ -161,9 +226,11 @@ create_vertex_header(struct draw_llvm *llvm, int data_elems)
struct draw_llvm *
draw_llvm_create(struct draw_context *draw)
{
- struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm );
+ struct draw_llvm *llvm;
- util_cpu_detect();
+ llvm = CALLOC_STRUCT( draw_llvm );
+ if (!llvm)
+ return NULL;
llvm->draw = draw;
llvm->engine = draw->engine;
@@ -179,27 +246,50 @@ draw_llvm_create(struct draw_context *draw)
llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
LLVMAddTargetData(llvm->target, llvm->pass);
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- /* TODO: Add more passes */
- LLVMAddConstantPropagationPass(llvm->pass);
- if(util_cpu_caps.has_sse4_1) {
- /* FIXME: There is a bug in this pass, whereby the combination of fptosi
- * and sitofp (necessary for trunc/floor/ceil/round implementation)
- * somehow becomes invalid code.
+
+ if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ /* TODO: Add more passes */
+
+ LLVMAddCFGSimplificationPass(llvm->pass);
+
+ if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) {
+ /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
+ * avoid generating bad code.
+ * Test with piglit glsl-vs-sqrt-zero test.
+ */
+ LLVMAddConstantPropagationPass(llvm->pass);
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+ }
+ else {
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+ LLVMAddConstantPropagationPass(llvm->pass);
+ }
+
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(llvm->pass);
+ }
+ LLVMAddGVNPass(llvm->pass);
+ } else {
+ /* We need at least this pass to prevent the backends to fail in
+ * unexpected ways.
*/
- LLVMAddInstructionCombiningPass(llvm->pass);
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
}
- LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
- LLVMAddGVNPass(llvm->pass);
- LLVMAddCFGSimplificationPass(llvm->pass);
init_globals(llvm);
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ LLVMDumpModule(llvm->module);
+ }
-#if 0
- LLVMDumpModule(llvm->module);
-#endif
+ llvm->nr_variants = 0;
+ make_empty_list(&llvm->vs_variants_list);
return llvm;
}
@@ -207,21 +297,41 @@ draw_llvm_create(struct draw_context *draw)
void
draw_llvm_destroy(struct draw_llvm *llvm)
{
+ LLVMDisposePassManager(llvm->pass);
+
FREE(llvm);
}
struct draw_llvm_variant *
-draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs)
+draw_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_inputs,
+ const struct draw_llvm_variant_key *key)
{
- struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
+ struct draw_llvm_variant *variant;
+ struct llvm_vertex_shader *shader =
+ llvm_vertex_shader(llvm->draw->vs.vertex_shader);
- draw_llvm_make_variant_key(llvm, &variant->key);
+ variant = MALLOC(sizeof *variant +
+ shader->variant_key_size -
+ sizeof variant->key);
+ if (variant == NULL)
+ return NULL;
+
+ variant->llvm = llvm;
+
+ memcpy(&variant->key, key, shader->variant_key_size);
llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
draw_llvm_generate(llvm, variant);
draw_llvm_generate_elts(llvm, variant);
+ variant->shader = shader;
+ variant->list_item_global.base = variant;
+ variant->list_item_local.base = variant;
+ /*variant->no = */shader->variants_created++;
+ variant->list_item_global.base = variant;
+
return variant;
}
@@ -230,11 +340,13 @@ generate_vs(struct draw_llvm *llvm,
LLVMBuilderRef builder,
LLVMValueRef (*outputs)[NUM_CHANNELS],
const LLVMValueRef (*inputs)[NUM_CHANNELS],
- LLVMValueRef context_ptr)
+ LLVMValueRef context_ptr,
+ struct lp_build_sampler_soa *draw_sampler)
{
const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
struct lp_type vs_type;
LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
+ struct lp_build_sampler_soa *sampler = 0;
memset(&vs_type, 0, sizeof vs_type);
vs_type.floating = TRUE; /* floating point values */
@@ -246,7 +358,14 @@ generate_vs(struct draw_llvm *llvm,
num_vs = 4; /* number of vertices per block */
#endif
- /*tgsi_dump(tokens, 0);*/
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ tgsi_dump(tokens, 0);
+ }
+
+ if (llvm->draw->num_sampler_views &&
+ llvm->draw->num_samplers)
+ sampler = draw_sampler;
+
lp_build_tgsi_soa(builder,
tokens,
vs_type,
@@ -255,7 +374,7 @@ generate_vs(struct draw_llvm *llvm,
NULL /*pos*/,
inputs,
outputs,
- NULL/*sampler*/,
+ sampler,
&llvm->draw->vs.vertex_shader->info);
}
@@ -283,7 +402,8 @@ generate_fetch(LLVMBuilderRef builder,
LLVMValueRef *res,
struct pipe_vertex_element *velem,
LLVMValueRef vbuf,
- LLVMValueRef index)
+ LLVMValueRef index,
+ LLVMValueRef instance_id)
{
LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
@@ -294,8 +414,15 @@ generate_fetch(LLVMBuilderRef builder,
LLVMValueRef cond;
LLVMValueRef stride;
- cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
+ if (velem->instance_divisor) {
+ /* array index = instance_id / instance_divisor */
+ index = LLVMBuildUDiv(builder, instance_id,
+ LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0),
+ "instance_divisor");
+ }
+ /* limit index to min(inex, vb_max_index) */
+ cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
stride = LLVMBuildMul(builder, vb_stride, index, "");
@@ -563,20 +690,22 @@ convert_to_aos(LLVMBuilderRef builder,
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
{
- LLVMTypeRef arg_types[7];
+ LLVMTypeRef arg_types[8];
LLVMTypeRef func_type;
LLVMValueRef context_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef start, end, count, stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ LLVMValueRef instance_id;
struct draw_context *draw = llvm->draw;
unsigned i, j;
struct lp_build_context bld;
struct lp_build_loop_state lp_loop;
- struct lp_type vs_type = lp_type_float_vec(32);
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+ void *code;
+ struct lp_build_sampler_soa *sampler = 0;
arg_types[0] = llvm->context_ptr_type; /* context */
arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
@@ -585,6 +714,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
arg_types[4] = LLVMInt32Type(); /* count */
arg_types[5] = LLVMInt32Type(); /* stride */
arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+ arg_types[7] = LLVMInt32Type(); /* instance_id */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
@@ -601,6 +731,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
count = LLVMGetParam(variant->function, 4);
stride = LLVMGetParam(variant->function, 5);
vb_ptr = LLVMGetParam(variant->function, 6);
+ instance_id = LLVMGetParam(variant->function, 7);
lp_build_name(context_ptr, "context");
lp_build_name(io_ptr, "io");
@@ -609,6 +740,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
lp_build_name(count, "count");
lp_build_name(stride, "stride");
lp_build_name(vb_ptr, "vb");
+ lp_build_name(instance_id, "instance_id");
/*
* Function body
@@ -618,12 +750,17 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld, builder, lp_type_int(32));
end = lp_build_add(&bld, start, count);
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+ /* code generated texture sampling */
+ sampler = draw_llvm_sampler_soa_create(
+ draw_llvm_variant_key_samplers(&variant->key),
+ context_ptr);
+
#if DEBUG_STORE
lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
start, end, step);
@@ -654,7 +791,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
&vb_index, 1, "");
generate_fetch(builder, vbuffers_ptr,
- &aos_attribs[j][i], velem, vb, true_index);
+ &aos_attribs[j][i], velem, vb, true_index,
+ instance_id);
}
}
convert_to_soa(builder, aos_attribs, inputs,
@@ -665,7 +803,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
builder,
outputs,
ptr_aos,
- context_ptr);
+ context_ptr,
+ sampler);
convert_to_aos(builder, io, outputs,
draw->vs.vertex_shader->info.num_outputs,
@@ -673,6 +812,13 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
}
lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
+ sampler->destroy(sampler);
+
+#ifdef PIPE_ARCH_X86
+ /* Avoid corrupting the FPU stack on 32bit OSes. */
+ lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
@@ -682,41 +828,48 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
*/
#ifdef DEBUG
if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function);
+ lp_debug_dump_value(variant->function);
assert(0);
}
#endif
LLVMRunFunctionPassManager(llvm->pass, variant->function);
- if (0) {
- LLVMDumpValue(variant->function);
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ lp_debug_dump_value(variant->function);
debug_printf("\n");
}
- variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
- if (0)
- lp_disassemble(variant->jit_func);
+ code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
+ variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
+
+ if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+ lp_disassemble(code);
+ }
+ lp_func_delete_body(variant->function);
}
static void
draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
{
- LLVMTypeRef arg_types[7];
+ LLVMTypeRef arg_types[8];
LLVMTypeRef func_type;
LLVMValueRef context_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ LLVMValueRef instance_id;
struct draw_context *draw = llvm->draw;
unsigned i, j;
struct lp_build_context bld;
struct lp_build_loop_state lp_loop;
- struct lp_type vs_type = lp_type_float_vec(32);
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+ LLVMValueRef fetch_max;
+ void *code;
+ struct lp_build_sampler_soa *sampler = 0;
arg_types[0] = llvm->context_ptr_type; /* context */
arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
@@ -725,14 +878,17 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
arg_types[4] = LLVMInt32Type(); /* fetch_count */
arg_types[5] = LLVMInt32Type(); /* stride */
arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+ arg_types[7] = LLVMInt32Type(); /* instance_id */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
- variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
+ variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts",
+ func_type);
LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
for(i = 0; i < Elements(arg_types); ++i)
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
+ LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
+ LLVMNoAliasAttribute);
context_ptr = LLVMGetParam(variant->function_elts, 0);
io_ptr = LLVMGetParam(variant->function_elts, 1);
@@ -741,6 +897,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
fetch_count = LLVMGetParam(variant->function_elts, 4);
stride = LLVMGetParam(variant->function_elts, 5);
vb_ptr = LLVMGetParam(variant->function_elts, 6);
+ instance_id = LLVMGetParam(variant->function_elts, 7);
lp_build_name(context_ptr, "context");
lp_build_name(io_ptr, "io");
@@ -749,6 +906,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
lp_build_name(fetch_count, "fetch_count");
lp_build_name(stride, "stride");
lp_build_name(vb_ptr, "vb");
+ lp_build_name(instance_id, "instance_id");
/*
* Function body
@@ -758,10 +916,19 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld, builder, lp_type_int(32));
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+ /* code generated texture sampling */
+ sampler = draw_llvm_sampler_soa_create(
+ draw_llvm_variant_key_samplers(&variant->key),
+ context_ptr);
+
+ fetch_max = LLVMBuildSub(builder, fetch_count,
+ LLVMConstInt(LLVMInt32Type(), 1, 0),
+ "fetch_max");
+
lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
{
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
@@ -780,8 +947,15 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder,
lp_loop.counter,
LLVMConstInt(LLVMInt32Type(), i, 0), "");
- LLVMValueRef fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
- &true_index, 1, "");
+ LLVMValueRef fetch_ptr;
+
+ /* make sure we're not out of bounds which can happen
+ * if fetch_count % 4 != 0, because on the last iteration
+ * a few of the 4 vertex fetches will be out of bounds */
+ true_index = lp_build_min(&bld, true_index, fetch_max);
+
+ fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+ &true_index, 1, "");
true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
@@ -791,7 +965,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
&vb_index, 1, "");
generate_fetch(builder, vbuffers_ptr,
- &aos_attribs[j][i], velem, vb, true_index);
+ &aos_attribs[j][i], velem, vb, true_index,
+ instance_id);
}
}
convert_to_soa(builder, aos_attribs, inputs,
@@ -802,7 +977,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder,
outputs,
ptr_aos,
- context_ptr);
+ context_ptr,
+ sampler);
convert_to_aos(builder, io, outputs,
draw->vs.vertex_shader->info.num_outputs,
@@ -810,6 +986,13 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
}
lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
+ sampler->destroy(sampler);
+
+#ifdef PIPE_ARCH_X86
+ /* Avoid corrupting the FPU stack on 32bit OSes. */
+ lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
@@ -819,37 +1002,136 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
*/
#ifdef DEBUG
if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function_elts);
+ lp_debug_dump_value(variant->function_elts);
assert(0);
}
#endif
LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
- if (0) {
- LLVMDumpValue(variant->function_elts);
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ lp_debug_dump_value(variant->function_elts);
debug_printf("\n");
}
- variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal(
- llvm->draw->engine, variant->function_elts);
- if (0)
- lp_disassemble(variant->jit_func_elts);
+ code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts);
+ variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
+
+ if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+ lp_disassemble(code);
+ }
+ lp_func_delete_body(variant->function_elts);
}
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
- struct draw_llvm_variant_key *key)
+
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
{
- memset(key, 0, sizeof(struct draw_llvm_variant_key));
+ unsigned i;
+ struct draw_llvm_variant_key *key;
+ struct lp_sampler_static_state *sampler;
+
+ key = (struct draw_llvm_variant_key *)store;
+ /* Presumably all variants of the shader should have the same
+ * number of vertex elements - ie the number of shader inputs.
+ */
key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
+ /* All variants of this shader will have the same value for
+ * nr_samplers. Not yet trying to compact away holes in the
+ * sampler array.
+ */
+ key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ sampler = draw_llvm_variant_key_samplers(key);
+
memcpy(key->vertex_element,
llvm->draw->pt.vertex_element,
sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
+
+ memset(sampler, 0, key->nr_samplers * sizeof *sampler);
+
+ for (i = 0 ; i < key->nr_samplers; i++) {
+ lp_sampler_static_state(&sampler[i],
+ llvm->draw->sampler_views[i],
+ llvm->draw->samplers[i]);
+ }
+
+ return key;
+}
+
+void
+draw_llvm_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS])
+{
+ unsigned j;
+ struct draw_jit_texture *jit_tex;
+
+ assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
+
+
+ jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
+
+ jit_tex->width = width;
+ jit_tex->height = height;
+ jit_tex->depth = depth;
+ jit_tex->last_level = last_level;
+
+ for (j = 0; j <= last_level; j++) {
+ jit_tex->data[j] = data[j];
+ jit_tex->row_stride[j] = row_stride[j];
+ jit_tex->img_stride[j] = img_stride[j];
+ }
+}
+
+
+void
+draw_llvm_set_sampler_state(struct draw_context *draw)
+{
+ unsigned i;
+
+ for (i = 0; i < draw->num_samplers; i++) {
+ struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
+
+ if (draw->samplers[i]) {
+ jit_tex->min_lod = draw->samplers[i]->min_lod;
+ jit_tex->max_lod = draw->samplers[i]->max_lod;
+ jit_tex->lod_bias = draw->samplers[i]->lod_bias;
+ COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
+ }
+ }
+}
+
+
+void
+draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
+{
+ struct draw_llvm *llvm = variant->llvm;
+ struct draw_context *draw = llvm->draw;
+
+ if (variant->function_elts) {
+ if (variant->function_elts)
+ LLVMFreeMachineCodeForFunction(draw->engine,
+ variant->function_elts);
+ LLVMDeleteFunction(variant->function_elts);
+ }
+
+ if (variant->function) {
+ if (variant->function)
+ LLVMFreeMachineCodeForFunction(draw->engine,
+ variant->function);
+ LLVMDeleteFunction(variant->function);
+ }
- memcpy(&key->vs,
- &llvm->draw->vs.vertex_shader->state,
- sizeof(struct pipe_shader_state));
+ remove_from_list(&variant->list_item_local);
+ variant->shader->variants_cached--;
+ remove_from_list(&variant->list_item_global);
+ llvm->nr_variants--;
+ FREE(variant);
}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 58fee7f9d60..d0a68ae412d 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -1,28 +1,79 @@
-#ifndef HAVE_LLVM_H
-#define HAVE_LLVM_H
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRAW_LLVM_H
+#define DRAW_LLVM_H
#include "draw/draw_private.h"
+#include "draw/draw_vs.h"
+#include "gallivm/lp_bld_sample.h"
+
#include "pipe/p_context.h"
+#include "util/u_simple_list.h"
#include <llvm-c/Core.h>
#include <llvm-c/Analysis.h>
#include <llvm-c/Target.h>
#include <llvm-c/ExecutionEngine.h>
+#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
+
+struct draw_llvm;
+struct llvm_vertex_shader;
+
struct draw_jit_texture
{
uint32_t width;
uint32_t height;
- uint32_t stride;
- const void *data;
+ uint32_t depth;
+ uint32_t last_level;
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
+ const void *data[DRAW_MAX_TEXTURE_LEVELS];
+ float min_lod;
+ float max_lod;
+ float lod_bias;
+ float border_color[4];
};
enum {
DRAW_JIT_TEXTURE_WIDTH = 0,
DRAW_JIT_TEXTURE_HEIGHT,
- DRAW_JIT_TEXTURE_STRIDE,
- DRAW_JIT_TEXTURE_DATA
+ DRAW_JIT_TEXTURE_DEPTH,
+ DRAW_JIT_TEXTURE_LAST_LEVEL,
+ DRAW_JIT_TEXTURE_ROW_STRIDE,
+ DRAW_JIT_TEXTURE_IMG_STRIDE,
+ DRAW_JIT_TEXTURE_DATA,
+ DRAW_JIT_TEXTURE_MIN_LOD,
+ DRAW_JIT_TEXTURE_MAX_LOD,
+ DRAW_JIT_TEXTURE_LOD_BIAS,
+ DRAW_JIT_TEXTURE_BORDER_COLOR,
+ DRAW_JIT_TEXTURE_NUM_FIELDS /* number of fields above */
};
enum {
@@ -48,7 +99,7 @@ struct draw_jit_context
const float *gs_constants;
- struct draw_jit_texture textures[PIPE_MAX_SAMPLERS];
+ struct draw_jit_texture textures[PIPE_MAX_VERTEX_SAMPLERS];
};
@@ -58,10 +109,10 @@ struct draw_jit_context
#define draw_jit_context_gs_constants(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 1, "gs_constants")
-#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2
+#define DRAW_JIT_CTX_TEXTURES 2
#define draw_jit_context_textures(_builder, _ptr) \
- lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+ lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures")
@@ -92,7 +143,8 @@ typedef void
unsigned start,
unsigned count,
unsigned stride,
- struct pipe_vertex_buffer *vertex_buffers);
+ struct pipe_vertex_buffer *vertex_buffers,
+ unsigned instance_id);
typedef void
@@ -102,13 +154,89 @@ typedef void
const unsigned *fetch_elts,
unsigned fetch_count,
unsigned stride,
- struct pipe_vertex_buffer *vertex_buffers);
+ struct pipe_vertex_buffer *vertex_buffers,
+ unsigned instance_id);
+
+struct draw_llvm_variant_key
+{
+ unsigned nr_vertex_elements:16;
+ unsigned nr_samplers:16;
+
+ /* Variable number of vertex elements:
+ */
+ struct pipe_vertex_element vertex_element[1];
+
+ /* Followed by variable number of samplers:
+ */
+/* struct lp_sampler_static_state sampler; */
+};
+
+#define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \
+ (sizeof(struct draw_llvm_variant_key) + \
+ PIPE_MAX_VERTEX_SAMPLERS * sizeof(struct lp_sampler_static_state) + \
+ (PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element))
+
+
+static INLINE size_t
+draw_llvm_variant_key_size(unsigned nr_vertex_elements,
+ unsigned nr_samplers)
+{
+ return (sizeof(struct draw_llvm_variant_key) +
+ nr_samplers * sizeof(struct lp_sampler_static_state) +
+ (nr_vertex_elements - 1) * sizeof(struct pipe_vertex_element));
+}
+
+
+static INLINE struct lp_sampler_static_state *
+draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
+{
+ return (struct lp_sampler_static_state *)
+ &key->vertex_element[key->nr_vertex_elements];
+}
+
+
+
+struct draw_llvm_variant_list_item
+{
+ struct draw_llvm_variant *base;
+ struct draw_llvm_variant_list_item *next, *prev;
+};
+
+struct draw_llvm_variant
+{
+ LLVMValueRef function;
+ LLVMValueRef function_elts;
+ draw_jit_vert_func jit_func;
+ draw_jit_vert_func_elts jit_func_elts;
+
+ struct llvm_vertex_shader *shader;
+
+ struct draw_llvm *llvm;
+ struct draw_llvm_variant_list_item list_item_global;
+ struct draw_llvm_variant_list_item list_item_local;
+
+ /* key is variable-sized, must be last */
+ struct draw_llvm_variant_key key;
+ /* key is variable-sized, must be last */
+};
+
+struct llvm_vertex_shader {
+ struct draw_vertex_shader base;
+
+ unsigned variant_key_size;
+ struct draw_llvm_variant_list_item variants;
+ unsigned variants_created;
+ unsigned variants_cached;
+};
struct draw_llvm {
struct draw_context *draw;
struct draw_jit_context jit_context;
+ struct draw_llvm_variant_list_item vs_variants_list;
+ int nr_variants;
+
LLVMModuleRef module;
LLVMExecutionEngineRef engine;
LLVMModuleProviderRef provider;
@@ -121,23 +249,12 @@ struct draw_llvm {
LLVMTypeRef vb_ptr_type;
};
-struct draw_llvm_variant_key
-{
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
- unsigned nr_vertex_elements;
- struct pipe_shader_state vs;
-};
-
-struct draw_llvm_variant
+static INLINE struct llvm_vertex_shader *
+llvm_vertex_shader(struct draw_vertex_shader *vs)
{
- struct draw_llvm_variant_key key;
- LLVMValueRef function;
- LLVMValueRef function_elts;
- draw_jit_vert_func jit_func;
- draw_jit_vert_func_elts jit_func_elts;
+ return (struct llvm_vertex_shader *)vs;
+}
- struct draw_llvm_variant *next;
-};
struct draw_llvm *
draw_llvm_create(struct draw_context *draw);
@@ -146,14 +263,35 @@ void
draw_llvm_destroy(struct draw_llvm *llvm);
struct draw_llvm_variant *
-draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs);
+draw_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_vertex_header_attribs,
+ const struct draw_llvm_variant_key *key);
void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
- struct draw_llvm_variant_key *key);
+draw_llvm_destroy_variant(struct draw_llvm_variant *variant);
+
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
LLVMValueRef
draw_llvm_translate_from(LLVMBuilderRef builder,
LLVMValueRef vbuffer,
enum pipe_format from_format);
+
+struct lp_build_sampler_soa *
+draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr);
+
+void
+draw_llvm_set_sampler_state(struct draw_context *draw);
+
+void
+draw_llvm_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS]);
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c
new file mode 100644
index 00000000000..ac1fbb179c6
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
@@ -0,0 +1,223 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ * @author Jose Fonseca <[email protected]>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_sample.h"
+#include "gallivm/lp_bld_tgsi.h"
+
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_pointer.h"
+#include "util/u_string.h"
+
+#include "draw_llvm.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in
+ * lp_jit_context and lp_jit_texture and the sampler code
+ * generator. It provides the texture layout information required by
+ * the texture sampler code generator in terms of the state stored in
+ * lp_jit_context and lp_jit_texture in runtime.
+ */
+struct draw_llvm_sampler_dynamic_state
+{
+ struct lp_sampler_dynamic_state base;
+
+ const struct lp_sampler_static_state *static_state;
+
+ LLVMValueRef context_ptr;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct draw_llvm_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ struct draw_llvm_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ * \param emit_load if TRUE, emit the LLVM load instruction to actually
+ * fetch the field's value. Otherwise, just emit the
+ * GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
+ LLVMBuilderRef builder,
+ unsigned unit,
+ unsigned member_index,
+ const char *member_name,
+ boolean emit_load)
+{
+ struct draw_llvm_sampler_dynamic_state *state =
+ (struct draw_llvm_sampler_dynamic_state *)base;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ debug_assert(unit < PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* context[0] */
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ /* context[0].textures */
+ indices[1] = LLVMConstInt(LLVMInt32Type(), DRAW_JIT_CTX_TEXTURES, 0);
+ /* context[0].textures[unit] */
+ indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ /* context[0].textures[unit].member */
+ indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+
+ ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
+
+ if (emit_load)
+ res = LLVMBuildLoad(builder, ptr, "");
+ else
+ res = ptr;
+
+ lp_build_name(res, "context.texture%u.%s", unit, member_name);
+
+ return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to
+ * fetch the members of lp_jit_texture to fulfill the sampler code
+ * generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture
+ * sampler code generator a reusable module without dependencies to
+ * llvmpipe internals.
+ */
+#define DRAW_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \
+ static LLVMValueRef \
+ draw_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \
+ LLVMBuilderRef builder, \
+ unsigned unit) \
+ { \
+ return draw_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \
+ }
+
+
+DRAW_LLVM_TEXTURE_MEMBER(width, DRAW_JIT_TEXTURE_WIDTH, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(height, DRAW_JIT_TEXTURE_HEIGHT, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(depth, DRAW_JIT_TEXTURE_DEPTH, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE)
+DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE)
+DRAW_LLVM_TEXTURE_MEMBER(data_ptr, DRAW_JIT_TEXTURE_DATA, FALSE)
+DRAW_LLVM_TEXTURE_MEMBER(min_lod, DRAW_JIT_TEXTURE_MIN_LOD, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(max_lod, DRAW_JIT_TEXTURE_MAX_LOD, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(lod_bias, DRAW_JIT_TEXTURE_LOD_BIAS, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(border_color, DRAW_JIT_TEXTURE_BORDER_COLOR, FALSE)
+
+
+static void
+draw_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+/**
+ * Fetch filtered values from texture.
+ * The 'texel' parameter returns four vectors corresponding to R, G, B, A.
+ */
+static void
+draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef *texel)
+{
+ struct draw_llvm_sampler_soa *sampler = (struct draw_llvm_sampler_soa *)base;
+
+ assert(unit < PIPE_MAX_VERTEX_SAMPLERS);
+
+ lp_build_sample_soa(builder,
+ &sampler->dynamic_state.static_state[unit],
+ &sampler->dynamic_state.base,
+ type,
+ unit,
+ num_coords, coords,
+ ddx, ddy,
+ lod_bias, explicit_lod,
+ texel);
+}
+
+
+struct lp_build_sampler_soa *
+draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr)
+{
+ struct draw_llvm_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(draw_llvm_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = draw_llvm_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = draw_llvm_sampler_soa_emit_fetch_texel;
+ sampler->dynamic_state.base.width = draw_llvm_texture_width;
+ sampler->dynamic_state.base.height = draw_llvm_texture_height;
+ sampler->dynamic_state.base.depth = draw_llvm_texture_depth;
+ sampler->dynamic_state.base.last_level = draw_llvm_texture_last_level;
+ sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride;
+ sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride;
+ sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr;
+ sampler->dynamic_state.base.min_lod = draw_llvm_texture_min_lod;
+ sampler->dynamic_state.base.max_lod = draw_llvm_texture_max_lod;
+ sampler->dynamic_state.base.lod_bias = draw_llvm_texture_lod_bias;
+ sampler->dynamic_state.base.border_color = draw_llvm_texture_border_color;
+ sampler->dynamic_state.static_state = static_state;
+ sampler->dynamic_state.context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c
index d7da7ed357d..5171327ce2d 100644
--- a/src/gallium/auxiliary/draw/draw_llvm_translate.c
+++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c
@@ -3,10 +3,10 @@
#include "draw_llvm.h"
-#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_struct.h"
#include "gallivm/lp_bld_format.h"
#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_type.h"
#include "util/u_memory.h"
#include "util/u_format.h"
@@ -466,6 +466,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
const struct util_format_description *format_desc;
LLVMValueRef zero;
int i;
+ struct lp_type type = lp_float32_vec4_type();
/*
* The above can only cope with straight arrays: no bitfields,
@@ -493,5 +494,5 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
format_desc = util_format_description(from_format);
zero = LLVMConstNull(LLVMInt32Type());
- return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero);
+ return lp_build_fetch_rgba_aos(builder, format_desc, type, vbuffer, zero, zero, zero);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 64c35025081..6206197dae9 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -170,58 +170,42 @@ static void do_triangle( struct draw_context *draw,
* Set up macros for draw_pt_decompose.h template code.
* This code uses vertex indexes / elements.
*/
-#define QUAD(i0,i1,i2,i3) \
- do_triangle( draw, \
- ( DRAW_PIPE_RESET_STIPPLE | \
- DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * elts[i0], \
- verts + stride * elts[i1], \
- verts + stride * elts[i3]); \
- do_triangle( draw, \
- ( DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * elts[i1], \
- verts + stride * elts[i2], \
- verts + stride * elts[i3])
-
-#define TRIANGLE(flags,i0,i1,i2) \
- do_triangle( draw, \
- elts[i0], /* flags */ \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK) );
-
-#define LINE(flags,i0,i1) \
- do_line( draw, \
- elts[i0], \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK) );
+
+#define TRIANGLE(flags,i0,i1,i2) \
+ do { \
+ do_triangle( draw, \
+ flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1), \
+ verts + stride * (i2) ); \
+ } while (0)
+
+#define LINE(flags,i0,i1) \
+ do { \
+ do_line( draw, \
+ flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1) ); \
+ } while (0)
#define POINT(i0) \
- do_point( draw, \
- verts + stride * elts[i0] )
+ do { \
+ do_point( draw, verts + stride * (i0) ); \
+ } while (0)
+
+#define GET_ELT(idx) (elts[idx])
-#define FUNC pipe_run
-#define ARGS \
+#define FUNC pipe_run_elts
+#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
+ unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
- const ushort *elts
-
-#define LOCAL_VARS \
- char *verts = (char *)vertices; \
- boolean flatfirst = (draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
- unsigned i; \
- ushort flags
-
-#define FLUSH
+ const ushort *elts, \
+ unsigned count
#include "draw_pt_decompose.h"
-#undef ARGS
-#undef LOCAL_VARS
@@ -238,78 +222,84 @@ static void do_triangle( struct draw_context *draw,
* draw_vbuf.c code uses when it has to perform a flush.
*/
void draw_pipeline_run( struct draw_context *draw,
- unsigned prim,
- struct vertex_header *vertices,
- unsigned vertex_count,
- unsigned stride,
- const ushort *elts,
- unsigned count )
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
{
- char *verts = (char *)vertices;
+ unsigned i, start;
+
+ draw->pipeline.verts = (char *)vert_info->verts;
+ draw->pipeline.vertex_stride = vert_info->stride;
+ draw->pipeline.vertex_count = vert_info->count;
+
+ for (start = i = 0;
+ i < prim_info->primitive_count;
+ start += prim_info->primitive_lengths[i], i++)
+ {
+ const unsigned count = prim_info->primitive_lengths[i];
+
+#if DEBUG
+ /* Warn if one of the element indexes go outside the vertex buffer */
+ {
+ unsigned max_index = 0x0, i;
+ /* find the largest element index */
+ for (i = 0; i < count; i++) {
+ unsigned int index = prim_info->elts[start + i];
+ if (index > max_index)
+ max_index = index;
+ }
+ if (max_index >= vert_info->count) {
+ debug_printf("%s: max_index (%u) outside vertex buffer (%u)\n",
+ __FUNCTION__,
+ max_index,
+ vert_info->count);
+ }
+ }
+#endif
+
+ pipe_run_elts(draw,
+ prim_info->prim,
+ prim_info->flags,
+ vert_info->verts,
+ vert_info->stride,
+ prim_info->elts + start,
+ count);
+ }
- draw->pipeline.verts = verts;
- draw->pipeline.vertex_stride = stride;
- draw->pipeline.vertex_count = vertex_count;
-
- pipe_run(draw, prim, vertices, stride, elts, count);
-
draw->pipeline.verts = NULL;
draw->pipeline.vertex_count = 0;
}
-
/*
* Set up macros for draw_pt_decompose.h template code.
- * This code is for non-indexed rendering (no elts).
+ * This code is for non-indexed (aka linear) rendering (no elts).
*/
-#define QUAD(i0,i1,i2,i3) \
- do_triangle( draw, \
- ( DRAW_PIPE_RESET_STIPPLE | \
- DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
- verts + stride * (i3)); \
- do_triangle( draw, \
- ( DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i2), \
- verts + stride * (i3))
-
-#define TRIANGLE(flags,i0,i1,i2) \
- do_triangle( draw, \
- flags, /* flags */ \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
- verts + stride * (i2))
-
-#define LINE(flags,i0,i1) \
- do_line( draw, \
- flags, \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1))
-#define POINT(i0) \
- do_point( draw, \
- verts + stride * i0 )
+#define TRIANGLE(flags,i0,i1,i2) \
+ do_triangle( draw, flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1), \
+ verts + stride * (i2) )
-#define FUNC pipe_run_linear
-#define ARGS \
- struct draw_context *draw, \
- unsigned prim, \
- struct vertex_header *vertices, \
- unsigned stride
+#define LINE(flags,i0,i1) \
+ do_line( draw, flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1) )
-#define LOCAL_VARS \
- char *verts = (char *)vertices; \
- boolean flatfirst = (draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
- unsigned i; \
- ushort flags
+#define POINT(i0) \
+ do_point( draw, verts + stride * (i0) )
-#define FLUSH
+
+#define GET_ELT(idx) (idx)
+
+#define FUNC pipe_run_linear
+#define FUNC_VARS \
+ struct draw_context *draw, \
+ unsigned prim, \
+ unsigned prim_flags, \
+ struct vertex_header *vertices, \
+ unsigned stride, \
+ unsigned count
#include "draw_pt_decompose.h"
@@ -318,17 +308,32 @@ void draw_pipeline_run( struct draw_context *draw,
* For drawing non-indexed primitives.
*/
void draw_pipeline_run_linear( struct draw_context *draw,
- unsigned prim,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
{
- char *verts = (char *)vertices;
- draw->pipeline.verts = verts;
- draw->pipeline.vertex_stride = stride;
- draw->pipeline.vertex_count = count;
-
- pipe_run_linear(draw, prim, vertices, stride, count);
+ unsigned i, start;
+
+ for (start = i = 0;
+ i < prim_info->primitive_count;
+ start += prim_info->primitive_lengths[i], i++)
+ {
+ unsigned count = prim_info->primitive_lengths[i];
+ char *verts = ((char*)vert_info->verts) +
+ (start * vert_info->stride);
+
+ draw->pipeline.verts = verts;
+ draw->pipeline.vertex_stride = vert_info->stride;
+ draw->pipeline.vertex_count = count;
+
+ assert(count <= vert_info->count);
+
+ pipe_run_linear(draw,
+ prim_info->prim,
+ prim_info->flags,
+ (struct vertex_header*)verts,
+ vert_info->stride,
+ count);
+ }
draw->pipeline.verts = NULL;
draw->pipeline.vertex_count = 0;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 4faf0a779ca..d1aba763098 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -373,8 +373,7 @@ generate_aaline_fs(struct aaline_stage *aaline)
aaline->fs->sampler_unit = transform.freeSampler;
- aaline->fs->aaline_fs
- = aaline->driver_create_fs_state(pipe, &aaline_fs);
+ aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs);
if (aaline->fs->aaline_fs == NULL)
goto fail;
@@ -425,7 +424,8 @@ aaline_create_texture(struct aaline_stage *aaline)
/* Fill in mipmap images.
* Basically each level is solid opaque, except for the outermost
- * texels which are zero. Special case the 1x1 and 2x2 levels.
+ * texels which are zero. Special case the 1x1 and 2x2 levels
+ * (though, those levels shouldn't be used - see the max_lod setting).
*/
for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
struct pipe_transfer *transfer;
@@ -497,7 +497,8 @@ aaline_create_sampler(struct aaline_stage *aaline)
sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
sampler.normalized_coords = 1;
sampler.min_lod = 0.0f;
- sampler.max_lod = MAX_TEXTURE_LEVEL;
+ /* avoid using the 1x1 and 2x2 mipmap levels */
+ sampler.max_lod = MAX_TEXTURE_LEVEL - 2;
aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
if (aaline->sampler_cso == NULL)
@@ -669,8 +670,8 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
assert(draw->rasterizer->line_smooth);
- if (draw->rasterizer->line_width <= 3.0)
- aaline->half_line_width = 1.5f;
+ if (draw->rasterizer->line_width <= 2.2)
+ aaline->half_line_width = 1.1f;
else
aaline->half_line_width = 0.5f * draw->rasterizer->line_width;
@@ -687,10 +688,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
aaline->tex_slot = draw_current_shader_outputs(draw);
aaline->pos_slot = draw_current_shader_position_output(draw);;
- /* advertise the extra post-transformed vertex attribute */
- draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib;
- draw->extra_shader_outputs.slot = aaline->tex_slot;
+ /* allocate the extra post-transformed vertex attribute */
+ (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
+ aaline->fs->generic_attrib);
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -743,7 +743,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
draw->suspend_flushing = FALSE;
- draw->extra_shader_outputs.slot = 0;
+ draw_remove_extra_vertex_attribs(draw);
}
@@ -788,9 +788,6 @@ draw_aaline_stage(struct draw_context *draw)
if (aaline == NULL)
return NULL;
- if (!draw_alloc_temp_verts( &aaline->stage, 8 ))
- goto fail;
-
aaline->stage.draw = draw;
aaline->stage.name = "aaline";
aaline->stage.next = NULL;
@@ -801,11 +798,14 @@ draw_aaline_stage(struct draw_context *draw)
aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter;
aaline->stage.destroy = aaline_destroy;
+ if (!draw_alloc_temp_verts( &aaline->stage, 8 ))
+ goto fail;
+
return aaline;
fail:
if (aaline)
- aaline_destroy(&aaline->stage);
+ aaline->stage.destroy(&aaline->stage);
return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index bba6f50c020..5ea552f51c1 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -701,9 +701,9 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
aapoint->pos_slot = draw_current_shader_position_output(draw);
- draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib;
- draw->extra_shader_outputs.slot = aapoint->tex_slot;
+ /* allocate the extra post-transformed vertex attribute */
+ (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
+ aapoint->fs->generic_attrib);
/* find psize slot in post-transform vertex */
aapoint->psize_slot = -1;
@@ -754,7 +754,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)
draw->suspend_flushing = FALSE;
- draw->extra_shader_outputs.slot = 0;
+ draw_remove_extra_vertex_attribs(draw);
}
@@ -780,9 +780,6 @@ draw_aapoint_stage(struct draw_context *draw)
if (aapoint == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
- goto fail;
-
aapoint->stage.draw = draw;
aapoint->stage.name = "aapoint";
aapoint->stage.next = NULL;
@@ -793,11 +790,14 @@ draw_aapoint_stage(struct draw_context *draw)
aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
aapoint->stage.destroy = aapoint_destroy;
+ if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
+ goto fail;
+
return aapoint;
fail:
if (aapoint)
- aapoint_destroy(&aapoint->stage);
+ aapoint->stage.destroy(&aapoint->stage);
return NULL;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 51a6115ebf5..a10d8e9edc0 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -68,8 +68,7 @@ struct clip_stage {
};
-/* This is a bit confusing:
- */
+/** Cast wrapper */
static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
{
return (struct clip_stage *)stage;
@@ -81,18 +80,22 @@ static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
/* All attributes are float[4], so this is easy:
*/
-static void interp_attr( float *fdst,
+static void interp_attr( float dst[4],
float t,
- const float *fin,
- const float *fout )
+ const float in[4],
+ const float out[4] )
{
- fdst[0] = LINTERP( t, fout[0], fin[0] );
- fdst[1] = LINTERP( t, fout[1], fin[1] );
- fdst[2] = LINTERP( t, fout[2], fin[2] );
- fdst[3] = LINTERP( t, fout[3], fin[3] );
+ dst[0] = LINTERP( t, out[0], in[0] );
+ dst[1] = LINTERP( t, out[1], in[1] );
+ dst[2] = LINTERP( t, out[2], in[2] );
+ dst[3] = LINTERP( t, out[3], in[3] );
}
+/**
+ * Copy front/back, primary/secondary colors from src vertex to dst vertex.
+ * Used when flat shading.
+ */
static void copy_colors( struct draw_stage *stage,
struct vertex_header *dst,
const struct vertex_header *src )
@@ -121,20 +124,17 @@ static void interp( const struct clip_stage *clip,
/* Vertex header.
*/
- {
- dst->clipmask = 0;
- dst->edgeflag = 0; /* will get overwritten later */
- dst->pad = 0;
- dst->vertex_id = UNDEFINED_VERTEX_ID;
- }
+ dst->clipmask = 0;
+ dst->edgeflag = 0; /* will get overwritten later */
+ dst->pad = 0;
+ dst->vertex_id = UNDEFINED_VERTEX_ID;
- /* Clip coordinates: interpolate normally
+ /* Interpolate the clip-space coords.
*/
- {
- interp_attr(dst->clip, t, in->clip, out->clip);
- }
+ interp_attr(dst->clip, t, in->clip, out->clip);
- /* Do the projective divide and insert window coordinates:
+ /* Do the projective divide and viewport transformation to get
+ * new window coordinates:
*/
{
const float *pos = dst->clip;
@@ -157,6 +157,10 @@ static void interp( const struct clip_stage *clip,
}
+/**
+ * Emit a post-clip polygon to the next pipeline stage. The polygon
+ * will be convex and the provoking vertex will always be vertex[0].
+ */
static void emit_poly( struct draw_stage *stage,
struct vertex_header **inlist,
unsigned n,
@@ -164,10 +168,18 @@ static void emit_poly( struct draw_stage *stage,
{
struct prim_header header;
unsigned i;
+ ushort edge_first, edge_middle, edge_last;
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+ if (stage->draw->rasterizer->flatshade_first) {
+ edge_first = DRAW_PIPE_EDGE_FLAG_0;
+ edge_middle = DRAW_PIPE_EDGE_FLAG_1;
+ edge_last = DRAW_PIPE_EDGE_FLAG_2;
+ }
+ else {
+ edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ edge_last = DRAW_PIPE_EDGE_FLAG_1;
+ }
/* later stages may need the determinant, but only the sign matters */
header.det = origPrim->det;
@@ -175,9 +187,17 @@ static void emit_poly( struct draw_stage *stage,
header.pad = 0;
for (i = 2; i < n; i++, header.flags = edge_middle) {
- header.v[0] = inlist[i-1];
- header.v[1] = inlist[i];
- header.v[2] = inlist[0]; /* keep in v[2] for flatshading */
+ /* order the triangle verts to respect the provoking vertex mode */
+ if (stage->draw->rasterizer->flatshade_first) {
+ header.v[0] = inlist[0]; /* the provoking vertex */
+ header.v[1] = inlist[i-1];
+ header.v[2] = inlist[i];
+ }
+ else {
+ header.v[0] = inlist[i-1];
+ header.v[1] = inlist[i];
+ header.v[2] = inlist[0]; /* the provoking vertex */
+ }
if (i == n-1)
header.flags |= edge_last;
@@ -185,7 +205,8 @@ static void emit_poly( struct draw_stage *stage,
if (0) {
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint j, k;
- debug_printf("Clipped tri:\n");
+ debug_printf("Clipped tri: (flat-shade-first = %d)\n",
+ stage->draw->rasterizer->flatshade_first);
for (j = 0; j < 3; j++) {
for (k = 0; k < vs->info.num_outputs; k++) {
debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k,
@@ -241,6 +262,9 @@ do_clip_tri( struct draw_stage *stage,
clipmask &= ~(1<<plane_idx);
+ assert(n < MAX_CLIPPED_VERTICES);
+ if (n >= MAX_CLIPPED_VERTICES)
+ return;
inlist[n] = inlist[0]; /* prevent rotation of vertices */
for (i = 1; i <= n; i++) {
@@ -249,11 +273,23 @@ do_clip_tri( struct draw_stage *stage,
float dp = dot4( vert->clip, plane );
if (!IS_NEGATIVE(dp_prev)) {
+ assert(outcount < MAX_CLIPPED_VERTICES);
+ if (outcount >= MAX_CLIPPED_VERTICES)
+ return;
outlist[outcount++] = vert_prev;
}
if (DIFFERENT_SIGNS(dp, dp_prev)) {
- struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++];
+ struct vertex_header *new_vert;
+
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
+ if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
+ return;
+ new_vert = clipper->stage.tmp[tmpnr++];
+
+ assert(outcount < MAX_CLIPPED_VERTICES);
+ if (outcount >= MAX_CLIPPED_VERTICES)
+ return;
outlist[outcount++] = new_vert;
if (IS_NEGATIVE(dp)) {
@@ -291,18 +327,34 @@ do_clip_tri( struct draw_stage *stage,
}
}
- /* If flat-shading, copy color to new provoking vertex.
+ /* If flat-shading, copy provoking vertex color to polygon vertex[0]
*/
- if (clipper->flat && inlist[0] != header->v[2]) {
- inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
-
- copy_colors(stage, inlist[0], header->v[2]);
- }
-
- /* Emit the polygon as triangles to the setup stage:
- */
- if (n >= 3)
+ if (n >= 3) {
+ if (clipper->flat) {
+ if (stage->draw->rasterizer->flatshade_first) {
+ if (inlist[0] != header->v[0]) {
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
+ if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
+ return;
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
+ copy_colors(stage, inlist[0], header->v[0]);
+ }
+ }
+ else {
+ if (inlist[0] != header->v[2]) {
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
+ if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
+ return;
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
+ copy_colors(stage, inlist[0], header->v[2]);
+ }
+ }
+ }
+
+ /* Emit the polygon as triangles to the setup stage:
+ */
emit_poly( stage, inlist, n, header );
+ }
}
@@ -492,9 +544,6 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw )
if (clipper == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ))
- goto fail;
-
clipper->stage.draw = draw;
clipper->stage.name = "clipper";
clipper->stage.point = clip_point;
@@ -506,6 +555,9 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw )
clipper->plane = draw->plane;
+ if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ))
+ goto fail;
+
return &clipper->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index dc66c65a56c..2f4d01d23ab 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -40,7 +40,8 @@
struct cull_stage {
struct draw_stage stage;
- unsigned winding; /**< which winding(s) to cull (one of PIPE_WINDING_x) */
+ unsigned cull_face; /**< which face(s) to cull (one of PIPE_FACE_x) */
+ unsigned front_ccw;
};
@@ -73,9 +74,12 @@ static void cull_tri( struct draw_stage *stage,
/* if det < 0 then Z points toward the camera and the triangle is
* counter-clockwise winding.
*/
- unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
+ unsigned ccw = (header->det < 0);
+ unsigned face = ((ccw == cull_stage(stage)->front_ccw) ?
+ PIPE_FACE_FRONT :
+ PIPE_FACE_BACK);
- if ((winding & cull_stage(stage)->winding) == 0) {
+ if ((face & cull_stage(stage)->cull_face) == 0) {
/* triangle is not culled, pass to next stage */
stage->next->tri( stage->next, header );
}
@@ -88,7 +92,8 @@ static void cull_first_tri( struct draw_stage *stage,
{
struct cull_stage *cull = cull_stage(stage);
- cull->winding = stage->draw->rasterizer->cull_mode;
+ cull->cull_face = stage->draw->rasterizer->cull_face;
+ cull->front_ccw = stage->draw->rasterizer->front_ccw;
stage->tri = cull_tri;
stage->tri( stage, header );
@@ -124,9 +129,6 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw )
if (cull == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &cull->stage, 0 ))
- goto fail;
-
cull->stage.draw = draw;
cull->stage.name = "cull";
cull->stage.next = NULL;
@@ -137,6 +139,9 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw )
cull->stage.reset_stipple_counter = cull_reset_stipple_counter;
cull->stage.destroy = cull_destroy;
+ if (!draw_alloc_temp_verts( &cull->stage, 0 ))
+ goto fail;
+
return &cull->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index 34afb1a0b60..693f2895aad 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -257,9 +257,6 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
if (flatshade == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &flatshade->stage, 2 ))
- goto fail;
-
flatshade->stage.draw = draw;
flatshade->stage.name = "flatshade";
flatshade->stage.next = NULL;
@@ -270,6 +267,9 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter;
flatshade->stage.destroy = flatshade_destroy;
+ if (!draw_alloc_temp_verts( &flatshade->stage, 2 ))
+ goto fail;
+
return &flatshade->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index 8e321946ced..8afbbfa1569 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -161,9 +161,7 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw )
{
struct offset_stage *offset = CALLOC_STRUCT(offset_stage);
if (offset == NULL)
- return NULL;
-
- draw_alloc_temp_verts( &offset->stage, 3 );
+ goto fail;
offset->stage.draw = draw;
offset->stage.name = "offset";
@@ -175,5 +173,14 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw )
offset->stage.reset_stipple_counter = offset_reset_stipple_counter;
offset->stage.destroy = offset_destroy;
+ if (!draw_alloc_temp_verts( &offset->stage, 3 ))
+ goto fail;
+
return &offset->stage;
+
+fail:
+ if (offset)
+ offset->stage.destroy( &offset->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index ef30db094fe..ed9a53e154d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -363,8 +363,12 @@ generate_pstip_fs(struct pstip_stage *pstip)
assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);
pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
-
+
FREE((void *)pstip_fs.tokens);
+
+ if (!pstip->fs->pstip_fs)
+ return FALSE;
+
return TRUE;
}
@@ -603,12 +607,15 @@ pstip_destroy(struct draw_stage *stage)
}
+/** Create a new polygon stipple drawing stage object */
static struct pstip_stage *
-draw_pstip_stage(struct draw_context *draw)
+draw_pstip_stage(struct draw_context *draw, struct pipe_context *pipe)
{
struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage);
+ if (pstip == NULL)
+ goto fail;
- draw_alloc_temp_verts( &pstip->stage, 8 );
+ pstip->pipe = pipe;
pstip->stage.draw = draw;
pstip->stage.name = "pstip";
@@ -620,7 +627,16 @@ draw_pstip_stage(struct draw_context *draw)
pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter;
pstip->stage.destroy = pstip_destroy;
+ if (!draw_alloc_temp_verts( &pstip->stage, 8 ))
+ goto fail;
+
return pstip;
+
+fail:
+ if (pstip)
+ pstip->stage.destroy( &pstip->stage );
+
+ return NULL;
}
@@ -756,14 +772,12 @@ draw_install_pstipple_stage(struct draw_context *draw,
/*
* Create / install pgon stipple drawing / prim stage
*/
- pstip = draw_pstip_stage( draw );
+ pstip = draw_pstip_stage( draw, pipe );
if (pstip == NULL)
goto fail;
draw->pipeline.pstipple = &pstip->stage;
- pstip->pipe = pipe;
-
/* create special texture, sampler state */
if (!pstip_create_texture(pstip))
goto fail;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 70fbab9ea76..4b3f4e7ae11 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -235,8 +235,8 @@ stipple_destroy( struct draw_stage *stage )
struct draw_stage *draw_stipple_stage( struct draw_context *draw )
{
struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage);
-
- draw_alloc_temp_verts( &stipple->stage, 2 );
+ if (stipple == NULL)
+ goto fail;
stipple->stage.draw = draw;
stipple->stage.name = "stipple";
@@ -248,5 +248,14 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw )
stipple->stage.flush = stipple_flush;
stipple->stage.destroy = stipple_destroy;
+ if (!draw_alloc_temp_verts( &stipple->stage, 2 ))
+ goto fail;
+
return &stipple->stage;
+
+fail:
+ if (stipple)
+ stipple->stage.destroy( &stipple->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index eef0238b157..9a3f3fee625 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -141,7 +141,7 @@ static void twoside_first_tri( struct draw_stage *stage,
* if the triangle is back-facing (negative).
* sign = -1 for CCW, +1 for CW
*/
- twoside->sign = (stage->draw->rasterizer->front_winding == PIPE_WINDING_CCW) ? -1.0f : 1.0f;
+ twoside->sign = stage->draw->rasterizer->front_ccw ? -1.0f : 1.0f;
stage->tri = twoside_tri;
stage->tri( stage, header );
@@ -177,9 +177,6 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw )
if (twoside == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &twoside->stage, 3 ))
- goto fail;
-
twoside->stage.draw = draw;
twoside->stage.name = "twoside";
twoside->stage.next = NULL;
@@ -190,6 +187,9 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw )
twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter;
twoside->stage.destroy = twoside_destroy;
+ if (!draw_alloc_temp_verts( &twoside->stage, 3 ))
+ goto fail;
+
return &twoside->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index 03bb842e20a..d87741b91e7 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -105,6 +105,23 @@ static void lines( struct draw_stage *stage,
}
+/** For debugging */
+static void
+print_header_flags(unsigned flags)
+{
+ debug_printf("header->flags = ");
+ if (flags & DRAW_PIPE_RESET_STIPPLE)
+ debug_printf("RESET_STIPPLE ");
+ if (flags & DRAW_PIPE_EDGE_FLAG_0)
+ debug_printf("EDGE_FLAG_0 ");
+ if (flags & DRAW_PIPE_EDGE_FLAG_1)
+ debug_printf("EDGE_FLAG_1 ");
+ if (flags & DRAW_PIPE_EDGE_FLAG_2)
+ debug_printf("EDGE_FLAG_2 ");
+ debug_printf("\n");
+}
+
+
/* Unfilled tri:
*
* Note edgeflags in the vertex struct is not sufficient as we will
@@ -117,8 +134,12 @@ static void unfilled_tri( struct draw_stage *stage,
struct prim_header *header )
{
struct unfilled_stage *unfilled = unfilled_stage(stage);
- unsigned mode = unfilled->mode[header->det >= 0.0];
+ unsigned cw = header->det >= 0.0;
+ unsigned mode = unfilled->mode[cw];
+ if (0)
+ print_header_flags(header->flags);
+
switch (mode) {
case PIPE_POLYGON_MODE_FILL:
stage->next->tri( stage->next, header );
@@ -139,9 +160,10 @@ static void unfilled_first_tri( struct draw_stage *stage,
struct prim_header *header )
{
struct unfilled_stage *unfilled = unfilled_stage(stage);
+ const struct pipe_rasterizer_state *rast = stage->draw->rasterizer;
- unfilled->mode[0] = stage->draw->rasterizer->fill_ccw; /* front */
- unfilled->mode[1] = stage->draw->rasterizer->fill_cw; /* back */
+ unfilled->mode[0] = rast->front_ccw ? rast->fill_front : rast->fill_back;
+ unfilled->mode[1] = rast->front_ccw ? rast->fill_back : rast->fill_front;
stage->tri = unfilled_tri;
stage->tri( stage, header );
@@ -180,9 +202,6 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
if (unfilled == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &unfilled->stage, 0 ))
- goto fail;
-
unfilled->stage.draw = draw;
unfilled->stage.name = "unfilled";
unfilled->stage.next = NULL;
@@ -194,6 +213,9 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter;
unfilled->stage.destroy = unfilled_destroy;
+ if (!draw_alloc_temp_verts( &unfilled->stage, 0 ))
+ goto fail;
+
return &unfilled->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index 2a50af7a414..c575a8ac7ca 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -122,12 +122,14 @@ draw_need_pipeline(const struct draw_context *draw,
return TRUE;
/* unfilled polygons */
- if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL)
+ if (rasterizer->fill_front != PIPE_POLYGON_MODE_FILL ||
+ rasterizer->fill_back != PIPE_POLYGON_MODE_FILL)
return TRUE;
/* polygon offset */
- if (rasterizer->offset_cw || rasterizer->offset_ccw)
+ if (rasterizer->offset_point ||
+ rasterizer->offset_line ||
+ rasterizer->offset_tri)
return TRUE;
/* two-side lighting */
@@ -170,7 +172,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold
&& !rast->line_smooth);
- /* drawing large points? */
+ /* drawing large/sprite points (but not AA points)? */
if (rast->sprite_coord_enable && draw->pipeline.point_sprite)
wide_points = TRUE;
else if (rast->point_smooth && draw->pipeline.aapoint)
@@ -205,7 +207,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
precalc_flat = TRUE;
}
- if (wide_points || rast->sprite_coord_enable) {
+ if (wide_points) {
draw->pipeline.wide_point->next = next;
next = draw->pipeline.wide_point;
}
@@ -222,8 +224,8 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
next = draw->pipeline.pstipple;
}
- if (rast->fill_cw != PIPE_POLYGON_MODE_FILL ||
- rast->fill_ccw != PIPE_POLYGON_MODE_FILL) {
+ if (rast->fill_front != PIPE_POLYGON_MODE_FILL ||
+ rast->fill_back != PIPE_POLYGON_MODE_FILL) {
draw->pipeline.unfilled->next = next;
next = draw->pipeline.unfilled;
precalc_flat = TRUE; /* only needed for triangles really */
@@ -235,8 +237,9 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
next = draw->pipeline.flatshade;
}
- if (rast->offset_cw ||
- rast->offset_ccw) {
+ if (rast->offset_point ||
+ rast->offset_line ||
+ rast->offset_tri) {
draw->pipeline.offset->next = next;
next = draw->pipeline.offset;
need_det = TRUE;
@@ -255,14 +258,14 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
* to less work emitting vertices, smaller vertex buffers, etc.
* It's difficult to say whether this will be true in general.
*/
- if (need_det || rast->cull_mode) {
+ if (need_det || rast->cull_face != PIPE_FACE_NONE) {
draw->pipeline.cull->next = next;
next = draw->pipeline.cull;
}
/* Clip stage
*/
- if (!draw->bypass_clipping)
+ if (draw->clip_xy || draw->clip_z || draw->clip_user)
{
draw->pipeline.clip->next = next;
next = draw->pipeline.clip;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index abbf6247ab8..58c5858734a 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -159,19 +159,8 @@ vbuf_tri( struct draw_stage *stage,
check_space( vbuf, 3 );
- if (vbuf->stage.draw->rasterizer->flatshade_first) {
- /* Put provoking vertex in position expected by the driver.
- * Emit last provoking vertex in first pos.
- * Swap verts 0 & 1 to preserve polygon winding.
- */
- vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[2] );
- vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] );
- vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[1] );
- }
- else {
- for (i = 0; i < 3; i++) {
- vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
- }
+ for (i = 0; i < 3; i++) {
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
}
}
@@ -335,9 +324,9 @@ vbuf_flush_vertices( struct vbuf_stage *vbuf )
if (vbuf->nr_indices)
{
- vbuf->render->draw(vbuf->render,
- vbuf->indices,
- vbuf->nr_indices );
+ vbuf->render->draw_elements(vbuf->render,
+ vbuf->indices,
+ vbuf->nr_indices );
vbuf->nr_indices = 0;
}
@@ -364,9 +353,6 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* Allocate a new vertex buffer */
vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
- /* even number */
- vbuf->max_vertices = vbuf->max_vertices & ~1;
-
if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index ab167065815..98da9cfb999 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -77,8 +77,11 @@ static void wideline_line( struct draw_stage *stage,
const float dx = fabsf(pos0[0] - pos2[0]);
const float dy = fabsf(pos0[1] - pos2[1]);
+ const boolean gl_rasterization_rules =
+ stage->draw->rasterizer->gl_rasterization_rules;
+
/* small tweak to meet GL specification */
- const float bias = 0.125f;
+ const float bias = gl_rasterization_rules ? 0.125f : 0.0f;
/*
* Draw wide line as a quad (two tris) by "stretching" the line along
@@ -92,19 +95,21 @@ static void wideline_line( struct draw_stage *stage,
pos1[1] = pos1[1] + half_width - bias;
pos2[1] = pos2[1] - half_width - bias;
pos3[1] = pos3[1] + half_width - bias;
- if (pos0[0] < pos2[0]) {
- /* left to right line */
- pos0[0] -= 0.5f;
- pos1[0] -= 0.5f;
- pos2[0] -= 0.5f;
- pos3[0] -= 0.5f;
- }
- else {
- /* right to left line */
- pos0[0] += 0.5f;
- pos1[0] += 0.5f;
- pos2[0] += 0.5f;
- pos3[0] += 0.5f;
+ if (gl_rasterization_rules) {
+ if (pos0[0] < pos2[0]) {
+ /* left to right line */
+ pos0[0] -= 0.5f;
+ pos1[0] -= 0.5f;
+ pos2[0] -= 0.5f;
+ pos3[0] -= 0.5f;
+ }
+ else {
+ /* right to left line */
+ pos0[0] += 0.5f;
+ pos1[0] += 0.5f;
+ pos2[0] += 0.5f;
+ pos3[0] += 0.5f;
+ }
}
}
else {
@@ -113,19 +118,21 @@ static void wideline_line( struct draw_stage *stage,
pos1[0] = pos1[0] + half_width + bias;
pos2[0] = pos2[0] - half_width + bias;
pos3[0] = pos3[0] + half_width + bias;
- if (pos0[1] < pos2[1]) {
- /* top to bottom line */
- pos0[1] -= 0.5f;
- pos1[1] -= 0.5f;
- pos2[1] -= 0.5f;
- pos3[1] -= 0.5f;
- }
- else {
- /* bottom to top line */
- pos0[1] += 0.5f;
- pos1[1] += 0.5f;
- pos2[1] += 0.5f;
- pos3[1] += 0.5f;
+ if (gl_rasterization_rules) {
+ if (pos0[1] < pos2[1]) {
+ /* top to bottom line */
+ pos0[1] -= 0.5f;
+ pos1[1] -= 0.5f;
+ pos2[1] -= 0.5f;
+ pos3[1] -= 0.5f;
+ }
+ else {
+ /* bottom to top line */
+ pos0[1] += 0.5f;
+ pos1[1] += 0.5f;
+ pos2[1] += 0.5f;
+ pos3[1] += 0.5f;
+ }
}
}
@@ -195,8 +202,8 @@ static void wideline_destroy( struct draw_stage *stage )
struct draw_stage *draw_wide_line_stage( struct draw_context *draw )
{
struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage);
-
- draw_alloc_temp_verts( &wide->stage, 4 );
+ if (wide == NULL)
+ goto fail;
wide->stage.draw = draw;
wide->stage.name = "wide-line";
@@ -208,5 +215,14 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw )
wide->stage.reset_stipple_counter = wideline_reset_stipple_counter;
wide->stage.destroy = wideline_destroy;
+ if (!draw_alloc_temp_verts( &wide->stage, 4 ))
+ goto fail;
+
return &wide->stage;
+
+fail:
+ if (wide)
+ wide->stage.destroy( &wide->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index a86fe19586c..3646c6a7145 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -57,26 +57,24 @@
#include "util/u_memory.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
+#include "draw_fs.h"
#include "draw_vs.h"
#include "draw_pipe.h"
struct widepoint_stage {
- struct draw_stage stage;
+ struct draw_stage stage; /**< base class */
float half_point_size;
float xbias;
float ybias;
- uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
- uint texcoord_enable[PIPE_MAX_SHADER_OUTPUTS];
- uint num_texcoords;
- uint texcoord_mode;
+ /** for automatic texcoord generation/replacement */
+ uint num_texcoord_gen;
+ uint texcoord_gen_slot[PIPE_MAX_SHADER_OUTPUTS];
int psize_slot;
-
- int point_coord_fs_input; /**< input for pointcoord */
};
@@ -96,30 +94,20 @@ widepoint_stage( struct draw_stage *stage )
static void set_texcoords(const struct widepoint_stage *wide,
struct vertex_header *v, const float tc[4])
{
+ const struct draw_context *draw = wide->stage.draw;
+ const struct pipe_rasterizer_state *rast = draw->rasterizer;
+ const uint texcoord_mode = rast->sprite_coord_mode;
uint i;
- for (i = 0; i < wide->num_texcoords; i++) {
- if (wide->texcoord_enable[i]) {
- uint j = wide->texcoord_slot[i];
- v->data[j][0] = tc[0];
- if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
- v->data[j][1] = 1.0f - tc[1];
- else
- v->data[j][1] = tc[1];
- v->data[j][2] = tc[2];
- v->data[j][3] = tc[3];
- }
- }
- if (wide->point_coord_fs_input >= 0) {
- /* put gl_PointCoord into the extra vertex slot */
- uint slot = wide->stage.draw->extra_shader_outputs.slot;
+ for (i = 0; i < wide->num_texcoord_gen; i++) {
+ const uint slot = wide->texcoord_gen_slot[i];
v->data[slot][0] = tc[0];
- if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+ if (texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
v->data[slot][1] = 1.0f - tc[1];
else
v->data[slot][1] = tc[1];
- v->data[slot][2] = 0.0F;
- v->data[slot][3] = 1.0F;
+ v->data[slot][2] = tc[2];
+ v->data[slot][3] = tc[3];
}
}
@@ -201,18 +189,9 @@ static void widepoint_point( struct draw_stage *stage,
}
-static int
-find_pntc_input_attrib(struct draw_context *draw)
-{
- /* Scan the fragment program's input decls to find the pointcoord
- * attribute. The xy components will store the point coord.
- */
- return 0; /* XXX fix this */
-}
-
-
-static void widepoint_first_point( struct draw_stage *stage,
- struct prim_header *header )
+static void
+widepoint_first_point(struct draw_stage *stage,
+ struct prim_header *header)
{
struct widepoint_stage *wide = widepoint_stage(stage);
struct draw_context *draw = stage->draw;
@@ -226,6 +205,7 @@ static void widepoint_first_point( struct draw_stage *stage,
if (rast->gl_rasterization_rules) {
wide->xbias = 0.125;
+ wide->ybias = -0.125;
}
/* Disable triangle culling, stippling, unfilled mode etc. */
@@ -243,31 +223,49 @@ static void widepoint_first_point( struct draw_stage *stage,
stage->point = draw_pipe_passthrough_point;
}
+ draw_remove_extra_vertex_attribs(draw);
+
if (rast->point_quad_rasterization) {
- /* find vertex shader texcoord outputs */
- const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
- uint i, j = 0;
- wide->texcoord_mode = rast->sprite_coord_mode;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
- wide->texcoord_slot[j] = i;
- wide->texcoord_enable[j] = (rast->sprite_coord_enable >> j) & 1;
- j++;
+ const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
+ uint i;
+
+ wide->num_texcoord_gen = 0;
+
+ /* Loop over fragment shader inputs looking for generic inputs
+ * for which bit 'k' in sprite_coord_enable is set.
+ */
+ for (i = 0; i < fs->info.num_inputs; i++) {
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+ const int generic_index = fs->info.input_semantic_index[i];
+ /* Note that sprite_coord enable is a bitfield of
+ * PIPE_MAX_SHADER_OUTPUTS bits.
+ */
+ if (generic_index < PIPE_MAX_SHADER_OUTPUTS &&
+ (rast->sprite_coord_enable & (1 << generic_index))) {
+ /* OK, this generic attribute needs to be replaced with a
+ * texcoord (see above).
+ */
+ int slot = draw_find_shader_output(draw,
+ TGSI_SEMANTIC_GENERIC,
+ generic_index);
+
+ if (slot > 0) {
+ /* there's already a post-vertex shader attribute
+ * for this fragment shader input attribute.
+ */
+ }
+ else {
+ /* need to allocate a new post-vertex shader attribute */
+ slot = draw_alloc_extra_vertex_attrib(draw,
+ TGSI_SEMANTIC_GENERIC,
+ generic_index);
+ }
+
+ /* add this slot to the texcoord-gen list */
+ wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot;
+ }
}
}
- wide->num_texcoords = j;
-
- /* find fragment shader PointCoord input */
- wide->point_coord_fs_input = find_pntc_input_attrib(draw);
-
- /* setup extra vp output (point coord implemented as a texcoord) */
- draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_shader_outputs.semantic_index = 0;
- draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw);
- }
- else {
- wide->point_coord_fs_input = -1;
- draw->extra_shader_outputs.slot = 0;
}
wide->psize_slot = -1;
@@ -294,7 +292,8 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags )
stage->point = widepoint_first_point;
stage->next->flush( stage->next, flags );
- stage->draw->extra_shader_outputs.slot = 0;
+
+ draw_remove_extra_vertex_attribs(draw);
/* restore original rasterizer state */
if (draw->rast_handle) {
@@ -324,9 +323,6 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
if (wide == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &wide->stage, 4 ))
- goto fail;
-
wide->stage.draw = draw;
wide->stage.name = "wide-point";
wide->stage.next = NULL;
@@ -337,6 +333,9 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter;
wide->stage.destroy = widepoint_destroy;
+ if (!draw_alloc_temp_verts( &wide->stage, 4 ))
+ goto fail;
+
return &wide->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index a2bfb693c09..d417f825a0f 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -48,6 +48,7 @@
#ifdef HAVE_LLVM
#include <llvm-c/ExecutionEngine.h>
+struct draw_llvm;
#endif
@@ -81,6 +82,9 @@ struct vertex_header {
#define UNDEFINED_VERTEX_ID 0xffff
+/* maximum number of shader variants we can cache */
+#define DRAW_MAX_SHADER_VARIANTS 1024
+
/**
* Private context for the drawing module.
*/
@@ -136,8 +140,7 @@ struct draw_context
} middle;
struct {
- struct draw_pt_front_end *vcache;
- struct draw_pt_front_end *varray;
+ struct draw_pt_front_end *vsplit;
} front;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -146,6 +149,8 @@ struct draw_context
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned nr_vertex_elements;
+ struct pipe_index_buffer index_buffer;
+
/* user-space vertex data, buffers */
struct {
/** vertex element/index buffer (ex: glDrawElements) */
@@ -159,9 +164,11 @@ struct draw_context
/** vertex arrays */
const void *vbuffer[PIPE_MAX_ATTRIBS];
- /** constant buffer (for vertex/geometry shader) */
+ /** constant buffers (for vertex/geometry shader) */
const void *vs_constants[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS];
const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS];
} user;
boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */
@@ -169,13 +176,19 @@ struct draw_context
} pt;
struct {
- boolean bypass_clipping;
- boolean bypass_vs;
+ boolean bypass_clip_xy;
+ boolean bypass_clip_z;
} driver;
boolean flushing; /**< debugging/sanity */
boolean suspend_flushing; /**< internally set */
- boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */
+
+ /* Flags set if API requires clipping in these planes and the
+ * driver doesn't indicate that it can do it for us.
+ */
+ boolean clip_xy;
+ boolean clip_z;
+ boolean clip_user;
boolean force_passthrough; /**< never clip or shade */
@@ -194,6 +207,7 @@ struct draw_context
struct pipe_viewport_state viewport;
boolean identity_viewport;
+ /** Vertex shader state */
struct {
struct draw_vertex_shader *vertex_shader;
uint num_vs_outputs; /**< convenience, from vertex_shader */
@@ -223,6 +237,7 @@ struct draw_context
struct translate_cache *emit_cache;
} vs;
+ /** Geometry shader state */
struct {
struct draw_geometry_shader *geometry_shader;
uint num_gs_outputs; /**< convenience, from geometry_shader */
@@ -235,17 +250,31 @@ struct draw_context
struct tgsi_sampler **samplers;
} gs;
+ /** Fragment shader state */
+ struct {
+ struct draw_fragment_shader *fragment_shader;
+ } fs;
+
+ /** Stream output (vertex feedback) state */
+ struct {
+ struct pipe_stream_output_state state;
+ void *buffers[PIPE_MAX_SO_BUFFERS];
+ uint num_buffers;
+ } so;
+
/* Clip derived state:
*/
float plane[12][4];
unsigned nr_planes;
+ boolean depth_clamp;
/* If a prim stage introduces new vertex attributes, they'll be stored here
*/
struct {
- uint semantic_name;
- uint semantic_index;
- int slot;
+ uint num;
+ uint semantic_name[10];
+ uint semantic_index[10];
+ uint slot[10];
} extra_shader_outputs;
unsigned reduced_prim;
@@ -253,12 +282,51 @@ struct draw_context
unsigned instance_id;
#ifdef HAVE_LLVM
+ struct draw_llvm *llvm;
LLVMExecutionEngineRef engine;
#endif
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned num_sampler_views;
+ const struct pipe_sampler_state *samplers[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned num_samplers;
+
void *driver_private;
};
+
+struct draw_fetch_info {
+ boolean linear;
+ unsigned start;
+ const unsigned *elts;
+ unsigned count;
+};
+
+struct draw_vertex_info {
+ struct vertex_header *verts;
+ unsigned vertex_size;
+ unsigned stride;
+ unsigned count;
+};
+
+/* these flags are set if the primitive is a segment of a larger one */
+#define DRAW_SPLIT_BEFORE 0x1
+#define DRAW_SPLIT_AFTER 0x2
+
+struct draw_prim_info {
+ boolean linear;
+ unsigned start;
+
+ const ushort *elts;
+ unsigned count;
+
+ unsigned prim;
+ unsigned flags;
+ unsigned *primitive_lengths;
+ unsigned primitive_count;
+};
+
+
/*******************************************************************************
* Draw common initialization code
*/
@@ -300,6 +368,11 @@ void draw_gs_destroy( struct draw_context *draw );
uint draw_current_shader_outputs(const struct draw_context *draw);
uint draw_current_shader_position_output(const struct draw_context *draw);
+int draw_alloc_extra_vertex_attrib(struct draw_context *draw,
+ uint semantic_name, uint semantic_index);
+void draw_remove_extra_vertex_attribs(struct draw_context *draw);
+
+
/*******************************************************************************
* Vertex processing (was passthrough) code:
*/
@@ -319,35 +392,24 @@ void draw_pipeline_destroy( struct draw_context *draw );
-/* We use the top few bits in the elts[] parameter to convey a little
- * API information. This limits the number of vertices we can address
- * to only 4096 -- if that becomes a problem, we can switch to 32-bit
- * draw indices.
- *
- * These flags expected at first vertex of lines & triangles when
- * unfilled and/or line stipple modes are operational.
+/*
+ * These flags are used by the pipeline when unfilled and/or line stipple modes
+ * are operational.
*/
-#define DRAW_PIPE_MAX_VERTICES (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12)
-#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12)
-#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12)
-#define DRAW_PIPE_RESET_STIPPLE (0x8<<12)
-#define DRAW_PIPE_FLAG_MASK (0xf<<12)
+#define DRAW_PIPE_EDGE_FLAG_0 0x1
+#define DRAW_PIPE_EDGE_FLAG_1 0x2
+#define DRAW_PIPE_EDGE_FLAG_2 0x4
+#define DRAW_PIPE_EDGE_FLAG_ALL 0x7
+#define DRAW_PIPE_RESET_STIPPLE 0x8
void draw_pipeline_run( struct draw_context *draw,
- unsigned prim,
- struct vertex_header *vertices,
- unsigned vertex_count,
- unsigned stride,
- const ushort *elts,
- unsigned count );
+ const struct draw_vertex_info *vert,
+ const struct draw_prim_info *prim);
void draw_pipeline_run_linear( struct draw_context *draw,
- unsigned prim,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride );
+ const struct draw_vertex_info *vert,
+ const struct draw_prim_info *prim);
+
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index b853f3a89f8..f44bf2507c6 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -31,30 +31,22 @@
*/
#include "draw/draw_context.h"
+#include "draw/draw_gs.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_math.h"
#include "util/u_prim.h"
+#include "util/u_format.h"
+#include "util/u_draw.h"
DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
-#ifdef HAVE_LLVM
-DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
-#endif
-
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- if (count < first)
- return 0;
- return count - (count - first) % incr;
-}
-
-
/* Overall we split things into:
- * - frontend -- prepare fetch_elts, draw_elts - eg vcache
+ * - frontend -- prepare fetch_elts, draw_elts - eg vsplit
* - middle -- fetch, shade, cliptest, viewport
* - pipeline -- the prim pipeline: clipping, wide lines, etc
* - backend -- the vbuf_render provided by the driver.
@@ -74,29 +66,35 @@ draw_pt_arrays(struct draw_context *draw,
{
unsigned first, incr;
draw_pt_split_prim(prim, &first, &incr);
- count = trim(count, first, incr);
+ count = draw_pt_trim_count(count, first, incr);
if (count < first)
return TRUE;
}
if (!draw->force_passthrough) {
+ unsigned gs_out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ prim);
+
if (!draw->render) {
opt |= PT_PIPELINE;
}
-
+
if (draw_need_pipeline(draw,
draw->rasterizer,
- prim)) {
+ gs_out_prim)) {
opt |= PT_PIPELINE;
}
- if (!draw->bypass_clipping && !draw->pt.test_fse) {
+ if ((draw->clip_xy ||
+ draw->clip_z ||
+ draw->clip_user) && !draw->pt.test_fse) {
opt |= PT_CLIPTEST;
}
-
+
opt |= PT_SHADE;
}
-
+
if (draw->pt.middle.llvm) {
middle = draw->pt.middle.llvm;
} else {
@@ -108,22 +106,11 @@ draw_pt_arrays(struct draw_context *draw,
middle = draw->pt.middle.general;
}
-
- /* Pick the right frontend
- */
- if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
- frontend = draw->pt.front.vcache;
- } else {
- frontend = draw->pt.front.varray;
- }
+ frontend = draw->pt.front.vsplit;
frontend->prepare( frontend, prim, middle, opt );
- frontend->run(frontend,
- draw_pt_elt_func(draw),
- draw_pt_elt_ptr(draw, start),
- draw->pt.user.eltBias,
- count);
+ frontend->run(frontend, start, count);
frontend->finish( frontend );
@@ -136,12 +123,8 @@ boolean draw_pt_init( struct draw_context *draw )
draw->pt.test_fse = debug_get_option_draw_fse();
draw->pt.no_fse = debug_get_option_draw_no_fse();
- draw->pt.front.vcache = draw_pt_vcache( draw );
- if (!draw->pt.front.vcache)
- return FALSE;
-
- draw->pt.front.varray = draw_pt_varray(draw);
- if (!draw->pt.front.varray)
+ draw->pt.front.vsplit = draw_pt_vsplit(draw);
+ if (!draw->pt.front.vsplit)
return FALSE;
draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
@@ -157,7 +140,7 @@ boolean draw_pt_init( struct draw_context *draw )
return FALSE;
#if HAVE_LLVM
- if (debug_get_option_draw_use_llvm())
+ if (draw->llvm)
draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw );
#endif
@@ -187,14 +170,9 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_shade_emit = NULL;
}
- if (draw->pt.front.vcache) {
- draw->pt.front.vcache->destroy( draw->pt.front.vcache );
- draw->pt.front.vcache = NULL;
- }
-
- if (draw->pt.front.varray) {
- draw->pt.front.varray->destroy( draw->pt.front.varray );
- draw->pt.front.varray = NULL;
+ if (draw->pt.front.vsplit) {
+ draw->pt.front.vsplit->destroy( draw->pt.front.vsplit );
+ draw->pt.front.vsplit = NULL;
}
}
@@ -214,24 +192,29 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
uint ii = 0;
uint j;
- if (draw->pt.user.elts) {
+ if (draw->pt.user.eltSize) {
+ const char *elts;
+
/* indexed arrays */
+ elts = (const char *) draw->pt.user.elts;
+ elts += draw->pt.index_buffer.offset;
+
switch (draw->pt.user.eltSize) {
case 1:
{
- const ubyte *elem = (const ubyte *) draw->pt.user.elts;
+ const ubyte *elem = (const ubyte *) elts;
ii = elem[start + i];
}
break;
case 2:
{
- const ushort *elem = (const ushort *) draw->pt.user.elts;
+ const ushort *elem = (const ushort *) elts;
ii = elem[start + i];
}
break;
case 4:
{
- const uint *elem = (const uint *) draw->pt.user.elts;
+ const uint *elem = (const uint *) elts;
ii = elem[start + i];
}
break;
@@ -252,6 +235,12 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
for (j = 0; j < draw->pt.nr_vertex_elements; j++) {
uint buf = draw->pt.vertex_element[j].vertex_buffer_index;
ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf];
+
+ if (draw->pt.vertex_element[j].instance_divisor) {
+ ii = draw->instance_id / draw->pt.vertex_element[j].instance_divisor;
+ }
+
+ ptr += draw->pt.vertex_buffer[buf].buffer_offset;
ptr += draw->pt.vertex_buffer[buf].stride * ii;
ptr += draw->pt.vertex_element[j].src_offset;
@@ -260,31 +249,38 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
case PIPE_FORMAT_R32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f @ %p\n", v[0], (void *) v);
+ debug_printf("R %f @ %p\n", v[0], (void *) v);
}
break;
case PIPE_FORMAT_R32G32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f %f @ %p\n", v[0], v[1], (void *) v);
+ debug_printf("RG %f %f @ %p\n", v[0], v[1], (void *) v);
}
break;
case PIPE_FORMAT_R32G32B32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f %f %f @ %p\n", v[0], v[1], v[2], (void *) v);
+ debug_printf("RGB %f %f %f @ %p\n", v[0], v[1], v[2], (void *) v);
}
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f %f %f %f @ %p\n", v[0], v[1], v[2], v[3],
+ debug_printf("RGBA %f %f %f %f @ %p\n", v[0], v[1], v[2], v[3],
(void *) v);
}
break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ {
+ ubyte *u = (ubyte *) ptr;
+ debug_printf("BGRA %d %d %d %d @ %p\n", u[0], u[1], u[2], u[3],
+ (void *) u);
+ }
+ break;
default:
- debug_printf("other format (fix me)\n");
- ;
+ debug_printf("other format %s (fix me)\n",
+ util_format_name(draw->pt.vertex_element[j].src_format));
}
}
}
@@ -292,11 +288,8 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
/**
- * Draw vertex arrays
- * This is the main entrypoint into the drawing module.
- * \param prim one of PIPE_PRIM_x
- * \param start index of first vertex to draw
- * \param count number of vertices to draw
+ * Non-instanced drawing.
+ * \sa draw_arrays_instanced
*/
void
draw_arrays(struct draw_context *draw, unsigned prim,
@@ -305,6 +298,11 @@ draw_arrays(struct draw_context *draw, unsigned prim,
draw_arrays_instanced(draw, prim, start, count, 0, 1);
}
+
+/**
+ * Instanced drawing.
+ * \sa draw_vbo
+ */
void
draw_arrays_instanced(struct draw_context *draw,
unsigned mode,
@@ -313,40 +311,90 @@ draw_arrays_instanced(struct draw_context *draw,
unsigned startInstance,
unsigned instanceCount)
{
- unsigned reduced_prim = u_reduced_prim(mode);
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.start_instance = startInstance;
+ info.instance_count = instanceCount;
+
+ info.indexed = (draw->pt.user.elts != NULL);
+ if (!info.indexed) {
+ info.min_index = start;
+ info.max_index = start + count - 1;
+ }
+
+ draw_vbo(draw, &info);
+}
+
+
+/**
+ * Draw vertex arrays.
+ * This is the main entrypoint into the drawing module. If drawing an indexed
+ * primitive, the draw_set_index_buffer() and draw_set_mapped_index_buffer()
+ * functions should have already been called to specify the element/index
+ * buffer information.
+ */
+void
+draw_vbo(struct draw_context *draw,
+ const struct pipe_draw_info *info)
+{
+ unsigned reduced_prim = u_reduced_prim(info->mode);
unsigned instance;
+ assert(info->instance_count > 0);
+ if (info->indexed)
+ assert(draw->pt.user.elts);
+
+ draw->pt.user.eltSize =
+ (info->indexed) ? draw->pt.index_buffer.index_size : 0;
+
+ draw->pt.user.eltBias = info->index_bias;
+ draw->pt.user.min_index = info->min_index;
+ draw->pt.user.max_index = info->max_index;
+
if (reduced_prim != draw->reduced_prim) {
draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
draw->reduced_prim = reduced_prim;
}
if (0)
- draw_print_arrays(draw, mode, start, MIN2(count, 20));
+ debug_printf("draw_vbo(mode=%u start=%u count=%u):\n",
+ info->mode, info->start, info->count);
-#if 0
- {
- int i;
- debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
- mode, start, count);
+ if (0)
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
+
+ if (0) {
+ unsigned int i;
debug_printf("Elements:\n");
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
- debug_printf(" format=%s\n",
+ debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n",
+ i,
+ draw->pt.vertex_element[i].src_offset,
+ draw->pt.vertex_element[i].instance_divisor,
+ draw->pt.vertex_element[i].vertex_buffer_index,
util_format_name(draw->pt.vertex_element[i].src_format));
}
debug_printf("Buffers:\n");
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
- debug_printf(" stride=%u offset=%u ptr=%p\n",
+ debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n",
+ i,
draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index,
draw->pt.vertex_buffer[i].buffer_offset,
draw->pt.user.vbuffer[i]);
}
}
-#endif
- for (instance = 0; instance < instanceCount; instance++) {
- draw->instance_id = instance + startInstance;
- draw_pt_arrays(draw, mode, start, count);
+ if (0)
+ draw_print_arrays(draw, info->mode, info->start, MIN2(info->count, 20));
+
+ for (instance = 0; instance < info->instance_count; instance++) {
+ draw->instance_id = instance + info->start_instance;
+ draw_pt_arrays(draw, info->mode, info->start, info->count);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 3e3ea320cc0..5fbb4242915 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -35,10 +35,10 @@
#include "pipe/p_compiler.h"
-typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx );
-
struct draw_pt_middle_end;
struct draw_context;
+struct draw_prim_info;
+struct draw_vertex_info;
#define PT_SHADE 0x1
@@ -50,13 +50,18 @@ struct draw_context;
/* The "front end" - prepare sets of fetch, draw elements for the
* middle end.
*
- * Currenly one version of this:
- * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims
- * Later:
- * - varray, varray_split
- * - velement, velement_split
+ * The fetch elements are indices to the vertices. The draw elements are
+ * indices to the fetched vertices. When both arrays of elements are both
+ * linear, middle->run_linear is called; When only the fetch elements are
+ * linear, middle->run_linear_elts is called; Otherwise, middle->run is
+ * called.
+ *
+ * When the number of the draw elements exceeds max_vertex of the middle end,
+ * the draw elements (as well as the fetch elements) are splitted and the
+ * middle end is called multiple times.
*
- * Currenly only using the vcache version.
+ * Currenly there is:
+ * - vsplit - catchall implementation, splits big prims
*/
struct draw_pt_front_end {
void (*prepare)( struct draw_pt_front_end *,
@@ -65,9 +70,7 @@ struct draw_pt_front_end {
unsigned opt );
void (*run)( struct draw_pt_front_end *,
- pt_elt_func elt_func,
- const void *elt_ptr,
- int elt_bias,
+ unsigned start,
unsigned count );
void (*finish)( struct draw_pt_front_end * );
@@ -78,6 +81,8 @@ struct draw_pt_front_end {
/* The "middle end" - prepares actual hardware vertices for the
* hardware backend.
*
+ * prim_flags is as defined by pipe_draw_info::flags.
+ *
* Currently two versions of this:
* - fetch, vertex shade, cliptest, prim-pipeline
* - fetch, emit (ie passthrough)
@@ -92,11 +97,13 @@ struct draw_pt_middle_end {
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
void (*run_linear)(struct draw_pt_middle_end *,
unsigned start,
- unsigned count);
+ unsigned count,
+ unsigned prim_flags );
/* Transform all vertices in a linear range and then draw them with
* the supplied element list. May fail and return FALSE.
@@ -105,7 +112,8 @@ struct draw_pt_middle_end {
unsigned fetch_start,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
int (*get_max_vertex_count)( struct draw_pt_middle_end * );
@@ -120,19 +128,11 @@ struct vbuf_render;
struct vertex_header;
-/* Helper functions.
- */
-pt_elt_func draw_pt_elt_func( struct draw_context *draw );
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start );
-
/* Frontends:
*
- * Currently only the general-purpose vcache implementation, could add
- * a special case for tiny vertex buffers.
+ * Currently only the general-purpose vsplit implementation.
*/
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw);
/* Middle-ends:
@@ -162,21 +162,31 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
unsigned *max_vertices );
void draw_pt_emit( struct pt_emit *emit,
- const float (*vertex_data)[4],
- unsigned vertex_count,
- unsigned stride,
- const ushort *elts,
- unsigned count );
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info);
void draw_pt_emit_linear( struct pt_emit *emit,
- const float (*vertex_data)[4],
- unsigned stride,
- unsigned count );
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info);
void draw_pt_emit_destroy( struct pt_emit *emit );
struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
+/*******************************************************************************
+ * HW stream output emit:
+ */
+struct pt_so_emit;
+
+void draw_pt_so_emit_prepare( struct pt_so_emit *emit );
+
+void draw_pt_so_emit( struct pt_so_emit *emit,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info );
+
+void draw_pt_so_emit_destroy( struct pt_so_emit *emit );
+
+struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw );
/*******************************************************************************
* API vertex fetch:
@@ -208,12 +218,12 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
struct pt_post_vs;
boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
- struct vertex_header *pipeline_verts,
- unsigned stride,
- unsigned count );
+ struct draw_vertex_info *info );
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
- boolean bypass_clipping,
+ boolean clip_xy,
+ boolean clip_z,
+ boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags );
@@ -227,6 +237,7 @@ void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
* Utils:
*/
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr);
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h
index 3c44f7c11ee..3127aad7310 100644
--- a/src/gallium/auxiliary/draw/draw_pt_decompose.h
+++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h
@@ -1,162 +1,7 @@
+#define LOCAL_VARS \
+ char *verts = (char *) vertices; \
+ const boolean last_vertex_last = \
+ !(draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first);
-
-static void FUNC( ARGS,
- unsigned count )
-{
- LOCAL_VARS;
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- POINT( (i + 0) );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE( DRAW_PIPE_RESET_STIPPLE,
- (i + 0),
- (i + 1));
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (i = 1; i < count; i++, flags = 0) {
- LINE( flags,
- (i - 1),
- (i ));
- }
-
- LINE( flags,
- (i - 1),
- (0 ));
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE( flags,
- (i - 1),
- (i ));
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- if (flatfirst) {
- /* put provoking vertex in last pos for clipper */
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 1),
- (i + 2),
- (i + 0 ));
- }
- else {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 0),
- (i + 1),
- (i + 2 ));
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 1 + (i&1)),
- (i + 2 - (i&1)),
- (i + 0) );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 0 + (i&1)),
- (i + 1 - (i&1)),
- (i + 2 ));
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 2),
- 0,
- (i + 1) );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (0),
- (i + 1),
- (i + 2 ));
- }
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- for (i = 0; i+3 < count; i += 4) {
- QUAD( (i + 0),
- (i + 1),
- (i + 2),
- (i + 3));
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (i = 0; i+3 < count; i += 2) {
- QUAD( (i + 2),
- (i + 0),
- (i + 1),
- (i + 3));
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
-
- for (i = 0; i+2 < count; i++, flags = edge_middle) {
-
- if (i + 3 == count)
- flags |= edge_last;
-
- TRIANGLE( flags,
- (i + 1),
- (i + 2),
- (0));
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- FLUSH;
-}
-
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
deleted file mode 100644
index 88f4d9f495a..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-#include "draw/draw_pt.h"
-#include "draw/draw_private.h"
-
-/* Neat get_elt func that also works for varrays drawing by encoding
- * the start value into a pointer.
- */
-
-static unsigned elt_uint( const void *elts, unsigned idx )
-{
- return *(((const uint *)elts) + idx);
-}
-
-static unsigned elt_ushort( const void *elts, unsigned idx )
-{
- return *(((const ushort *)elts) + idx);
-}
-
-static unsigned elt_ubyte( const void *elts, unsigned idx )
-{
- return *(((const ubyte *)elts) + idx);
-}
-
-static unsigned elt_vert( const void *elts, unsigned idx )
-{
- /* unsigned index is packed in the pointer */
- return (unsigned)(uintptr_t)elts + idx;
-}
-
-pt_elt_func draw_pt_elt_func( struct draw_context *draw )
-{
- switch (draw->pt.user.eltSize) {
- case 0: return &elt_vert;
- case 1: return &elt_ubyte;
- case 2: return &elt_ushort;
- case 4: return &elt_uint;
- default: return NULL;
- }
-}
-
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start )
-{
- const char *elts = draw->pt.user.elts;
-
- switch (draw->pt.user.eltSize) {
- case 0:
- return (const void *)(((const ubyte *)NULL) + start);
- case 1:
- return (const void *)(((const ubyte *)elts) + start);
- case 2:
- return (const void *)(((const ushort *)elts) + start);
- case 4:
- return (const void *)(((const uint *)elts) + start);
- default:
- return NULL;
- }
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index ad48fa39a4f..c8dfc16911e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -120,22 +120,21 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* even number */
- *max_vertices = *max_vertices & ~1;
}
void draw_pt_emit( struct pt_emit *emit,
- const float (*vertex_data)[4],
- unsigned vertex_count,
- unsigned stride,
- const ushort *elts,
- unsigned count )
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
{
+ const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data;
+ unsigned vertex_count = vert_info->count;
+ unsigned stride = vert_info->stride;
+ const ushort *elts = prim_info->elts;
struct draw_context *draw = emit->draw;
struct translate *translate = emit->translate;
struct vbuf_render *render = draw->render;
+ unsigned start, i;
void *hw_verts;
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
@@ -145,11 +144,6 @@ void draw_pt_emit( struct pt_emit *emit,
if (vertex_count == 0)
return;
- if (vertex_count >= UNDEFINED_VERTEX_ID) {
- assert(0);
- return;
- }
-
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -180,6 +174,7 @@ void draw_pt_emit( struct pt_emit *emit,
0,
~0);
+ /* fetch/translate vertex attribs to fill hw_verts[] */
translate->run( translate,
0,
vertex_count,
@@ -190,23 +185,31 @@ void draw_pt_emit( struct pt_emit *emit,
0,
vertex_count - 1 );
- render->draw(render,
- elts,
- count);
+ for (start = i = 0;
+ i < prim_info->primitive_count;
+ start += prim_info->primitive_lengths[i], i++)
+ {
+ render->draw_elements(render,
+ elts + start,
+ prim_info->primitive_lengths[i]);
+ }
render->release_vertices(render);
}
void draw_pt_emit_linear(struct pt_emit *emit,
- const float (*vertex_data)[4],
- unsigned stride,
- unsigned count)
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
{
+ const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data;
+ unsigned stride = vert_info->stride;
+ unsigned count = vert_info->count;
struct draw_context *draw = emit->draw;
struct translate *translate = emit->translate;
struct vbuf_render *render = draw->render;
void *hw_verts;
+ unsigned start, i;
#if 0
debug_printf("Linear emit\n");
@@ -215,9 +218,6 @@ void draw_pt_emit_linear(struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -258,7 +258,14 @@ void draw_pt_emit_linear(struct pt_emit *emit,
render->unmap_vertices( render, 0, count - 1 );
- render->draw_arrays(render, 0, count);
+ for (start = i = 0;
+ i < prim_info->primitive_count;
+ start += prim_info->primitive_lengths[i], i++)
+ {
+ render->draw_arrays(render,
+ start,
+ prim_info->primitive_lengths[i]);
+ }
render->release_vertices(render);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index a1347221b5d..ae12ee24bdc 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -29,7 +29,6 @@
#include "util/u_math.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
-#include "draw/draw_vbuf.h"
#include "draw/draw_pt.h"
#include "translate/translate.h"
#include "translate/translate_cache.h"
@@ -69,31 +68,12 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
fetch->vertex_size = vertex_size;
- /* Always emit/leave space for a vertex header.
- *
- * It's worth considering whether the vertex headers should contain
- * a pointer to the 'data', rather than having it inline.
- * Something to look at after we've fully switched over to the pt
- * paths.
+ /* Leave the clipmask/edgeflags/pad/vertex_id untouched
*/
- {
- /* Need to set header->vertex_id = 0xffff somehow.
- */
- key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
- key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
- key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
- key.element[nr].input_offset = 0;
- key.element[nr].instance_divisor = 0;
- key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
- key.element[nr].output_offset = dst_offset;
- dst_offset += 1 * sizeof(float);
- nr++;
-
-
- /* Just leave the clip[] array untouched.
- */
- dst_offset += 4 * sizeof(float);
- }
+ dst_offset += 1 * sizeof(float);
+ /* Just leave the clip[] array untouched.
+ */
+ dst_offset += 4 * sizeof(float);
if (instance_id_index != ~0) {
num_extra_inputs++;
@@ -132,26 +112,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
key.nr_elements = nr;
key.output_stride = vertex_size;
-
if (!fetch->translate ||
translate_key_compare(&fetch->translate->key, &key) != 0)
{
translate_key_sanitize(&key);
fetch->translate = translate_cache_find(fetch->cache, &key);
-
- {
- static struct vertex_header vh = { 0,
- 1,
- 0,
- UNDEFINED_VERTEX_ID,
- { .0f, .0f, .0f, .0f } };
-
- fetch->translate->set_buffer(fetch->translate,
- draw->pt.nr_vertex_buffers,
- &vh,
- 0,
- ~0);
- }
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index d7735bf1ac9..e706b7796f8 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -36,6 +36,7 @@
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
+#include "draw/draw_gs.h"
#include "translate/translate.h"
#include "translate/translate_cache.h"
@@ -100,9 +101,14 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
boolean ok;
struct translate_key key;
+ unsigned gs_out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ prim);
+
+
ok = draw->render->set_primitive( draw->render,
- prim );
+ gs_out_prim );
if (!ok) {
assert(0);
return;
@@ -185,15 +191,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
}
@@ -204,7 +201,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -214,11 +212,6 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (fetch_count >= UNDEFINED_VERTEX_ID) {
- assert(0);
- return;
- }
-
draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)fetch_count );
@@ -254,9 +247,9 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
+ draw->render->draw_elements( draw->render,
+ draw_elts,
+ draw_count );
/* Done -- that was easy, wasn't it:
*/
@@ -267,7 +260,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -277,9 +271,6 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
@@ -328,7 +319,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -338,9 +330,6 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- return FALSE;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
@@ -363,9 +352,9 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
+ draw->render->draw_elements( draw->render,
+ draw_elts,
+ draw_count );
/* Done -- that was easy, wasn't it:
*/
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index cbb5b6c9605..7c198c6026d 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -67,9 +67,8 @@ struct fetch_shade_emit {
-
static void fse_prepare( struct draw_pt_middle_end *middle,
- unsigned prim,
+ unsigned prim,
unsigned opt,
unsigned *max_vertices )
{
@@ -79,9 +78,12 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
const struct vertex_info *vinfo;
unsigned i;
unsigned nr_vbs = 0;
-
- if (!draw->render->set_primitive( draw->render,
+ /* Can't support geometry shader on this path.
+ */
+ assert(!draw->gs.geometry_shader);
+
+ if (!draw->render->set_primitive( draw->render,
prim )) {
assert(0);
return;
@@ -90,7 +92,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
/* Must do this after set_primitive() above:
*/
fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render);
-
fse->key.output_stride = vinfo->size * 4;
@@ -101,7 +102,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
fse->key.nr_inputs); /* inputs - fetch from api format */
fse->key.viewport = !draw->identity_viewport;
- fse->key.clip = !draw->bypass_clipping;
+ fse->key.clip = draw->clip_xy || draw->clip_z || draw->clip_user;
fse->key.const_vbuffers = 0;
memset(fse->key.element, 0,
@@ -174,15 +175,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
-
/* Probably need to do this somewhere (or fix exec shader not to
* need it):
*/
@@ -196,7 +188,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
static void fse_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -206,9 +199,6 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
@@ -264,7 +254,8 @@ fse_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -274,9 +265,6 @@ fse_run(struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (fetch_count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)fetch_count ))
@@ -307,9 +295,9 @@ fse_run(struct draw_pt_middle_end *middle,
draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) );
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
+ draw->render->draw_elements( draw->render,
+ draw_elts,
+ draw_count );
draw->render->release_vertices( draw->render );
@@ -326,7 +314,8 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -336,9 +325,6 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- return FALSE;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
@@ -357,9 +343,9 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
hw_verts );
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
+ draw->render->draw_elements( draw->render,
+ draw_elts,
+ draw_count );
draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index da5106463a7..b72fd612451 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -27,6 +27,7 @@
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
#include "draw/draw_context.h"
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
@@ -40,16 +41,16 @@ struct fetch_pipeline_middle_end {
struct draw_context *draw;
struct pt_emit *emit;
+ struct pt_so_emit *so_emit;
struct pt_fetch *fetch;
struct pt_post_vs *post_vs;
unsigned vertex_data_offset;
unsigned vertex_size;
- unsigned prim;
+ unsigned input_prim;
unsigned opt;
};
-
static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned prim,
unsigned opt,
@@ -61,6 +62,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned i;
unsigned instance_id_index = ~0;
+ unsigned gs_out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ prim);
+
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
*/
@@ -76,7 +81,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
}
}
- fpme->prim = prim;
+ fpme->input_prim = prim;
fpme->opt = opt;
/* Always leave room for the vertex header whether we need it or
@@ -95,173 +100,177 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- (boolean)draw->bypass_clipping,
- (boolean)draw->identity_viewport,
+ draw->clip_xy,
+ draw->clip_z,
+ draw->clip_user,
+ draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
- (draw->vs.edgeflag_output ? true : false) );
+ (draw->vs.edgeflag_output ? TRUE : FALSE) );
+
+ draw_pt_so_emit_prepare( fpme->so_emit );
if (!(opt & PT_PIPELINE)) {
- draw_pt_emit_prepare( fpme->emit,
- prim,
+ draw_pt_emit_prepare( fpme->emit,
+ gs_out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
- /* return even number */
- *max_vertices = *max_vertices & ~1;
-
/* No need to prepare the shader.
*/
vs->prepare(vs, draw);
}
-
-static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
- const unsigned *fetch_elts,
- unsigned fetch_count,
- const ushort *draw_elts,
- unsigned draw_count )
+static void fetch( struct pt_fetch *fetch,
+ const struct draw_fetch_info *fetch_info,
+ char *output)
{
- struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
- struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
- unsigned opt = fpme->opt;
- unsigned alloc_count = align( fetch_count, 4 );
-
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
-
- if (!pipeline_verts) {
- /* Not much we can do here - just skip the rendering.
- */
- assert(0);
- return;
+ if (fetch_info->linear) {
+ draw_pt_fetch_run_linear( fetch,
+ fetch_info->start,
+ fetch_info->count,
+ output );
}
-
- /* Fetch into our vertex buffer
- */
- draw_pt_fetch_run( fpme->fetch,
- fetch_elts,
- fetch_count,
- (char *)pipeline_verts );
-
- /* Run the shader, note that this overwrites the data[] parts of
- * the pipeline verts.
- */
- if (opt & PT_SHADE)
- {
- vshader->run_linear(vshader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.vs_constants,
- fetch_count,
- fpme->vertex_size,
- fpme->vertex_size);
- if (gshader)
- draw_geometry_shader_run(gshader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.gs_constants,
- fetch_count,
- fpme->vertex_size,
- fpme->vertex_size);
+ else {
+ draw_pt_fetch_run( fetch,
+ fetch_info->elts,
+ fetch_info->count,
+ output );
}
+}
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- fetch_count,
- fpme->vertex_size ))
- {
- opt |= PT_PIPELINE;
- }
- /* Do we need to run the pipeline?
- */
- if (opt & PT_PIPELINE) {
+static void pipeline(struct fetch_pipeline_middle_end *fpme,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
+{
+ if (prim_info->linear)
+ draw_pipeline_run_linear( fpme->draw,
+ vert_info,
+ prim_info);
+ else
draw_pipeline_run( fpme->draw,
- fpme->prim,
- pipeline_verts,
- fetch_count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
+ vert_info,
+ prim_info );
+}
+
+static void emit(struct pt_emit *emit,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
+{
+ if (prim_info->linear) {
+ draw_pt_emit_linear(emit, vert_info, prim_info);
}
else {
- draw_pt_emit( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- fetch_count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
+ draw_pt_emit(emit, vert_info, prim_info);
}
+}
- FREE(pipeline_verts);
+static void draw_vertex_shader_run(struct draw_vertex_shader *vshader,
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
+ const struct draw_vertex_info *input_verts,
+ struct draw_vertex_info *output_verts )
+{
+ output_verts->vertex_size = input_verts->vertex_size;
+ output_verts->stride = input_verts->vertex_size;
+ output_verts->count = input_verts->count;
+ output_verts->verts =
+ (struct vertex_header *)MALLOC(output_verts->vertex_size *
+ align(output_verts->count, 4));
+
+ vshader->run_linear(vshader,
+ (const float (*)[4])input_verts->verts->data,
+ ( float (*)[4])output_verts->verts->data,
+ constants,
+ const_size,
+ input_verts->count,
+ input_verts->vertex_size,
+ input_verts->vertex_size);
}
-
-static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
- unsigned start,
- unsigned count)
+static void fetch_pipeline_generic( struct draw_pt_middle_end *middle,
+ const struct draw_fetch_info *fetch_info,
+ const struct draw_prim_info *prim_info )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *shader = draw->vs.vertex_shader;
- struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
+ struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
+ struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
+ struct draw_prim_info gs_prim_info;
+ struct draw_vertex_info fetched_vert_info;
+ struct draw_vertex_info vs_vert_info;
+ struct draw_vertex_info gs_vert_info;
+ struct draw_vertex_info *vert_info;
unsigned opt = fpme->opt;
- unsigned alloc_count = align( count, 4 );
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
-
- if (!pipeline_verts) {
- /* Not much we can do here - just skip the rendering.
- */
+ fetched_vert_info.count = fetch_info->count;
+ fetched_vert_info.vertex_size = fpme->vertex_size;
+ fetched_vert_info.stride = fpme->vertex_size;
+ fetched_vert_info.verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size *
+ align(fetch_info->count, 4));
+ if (!fetched_vert_info.verts) {
assert(0);
return;
}
- /* Fetch into our vertex buffer
+ /* Fetch into our vertex buffer.
+ */
+ fetch( fpme->fetch, fetch_info, (char *)fetched_vert_info.verts );
+
+ /* Finished with fetch:
*/
- draw_pt_fetch_run_linear( fpme->fetch,
- start,
- count,
- (char *)pipeline_verts );
+ fetch_info = NULL;
+ vert_info = &fetched_vert_info;
/* Run the shader, note that this overwrites the data[] parts of
* the pipeline verts.
*/
- if (opt & PT_SHADE)
- {
- shader->run_linear(shader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.vs_constants,
- count,
- fpme->vertex_size,
- fpme->vertex_size);
-
- if (geometry_shader)
- draw_geometry_shader_run(geometry_shader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.gs_constants,
- count,
- fpme->vertex_size,
- fpme->vertex_size);
+ if (fpme->opt & PT_SHADE) {
+ draw_vertex_shader_run(vshader,
+ draw->pt.user.vs_constants,
+ draw->pt.user.vs_constants_size,
+ vert_info,
+ &vs_vert_info);
+
+ FREE(vert_info->verts);
+ vert_info = &vs_vert_info;
+ }
+
+ if ((fpme->opt & PT_SHADE) && gshader) {
+ draw_geometry_shader_run(gshader,
+ draw->pt.user.gs_constants,
+ draw->pt.user.gs_constants_size,
+ vert_info,
+ prim_info,
+ &gs_vert_info,
+ &gs_prim_info);
+
+ FREE(vert_info->verts);
+ vert_info = &gs_vert_info;
+ prim_info = &gs_prim_info;
}
+
+ /* Stream output needs to be done before clipping.
+ *
+ * XXX: Stream output surely needs to respect the prim_info->elt
+ * lists.
+ */
+ draw_pt_so_emit( fpme->so_emit,
+ vert_info,
+ prim_info );
+
if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- count,
- fpme->vertex_size ))
+ vert_info ))
{
opt |= PT_PIPELINE;
}
@@ -269,102 +278,102 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
/* Do we need to run the pipeline?
*/
if (opt & PT_PIPELINE) {
- draw_pipeline_run_linear( fpme->draw,
- fpme->prim,
- pipeline_verts,
- count,
- fpme->vertex_size);
+ pipeline( fpme,
+ vert_info,
+ prim_info );
}
else {
- draw_pt_emit_linear( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- fpme->vertex_size,
- count );
+ emit( fpme->emit,
+ vert_info,
+ prim_info );
}
-
- FREE(pipeline_verts);
+ FREE(vert_info->verts);
}
+static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count,
+ unsigned prim_flags )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
+
+ fetch_info.linear = FALSE;
+ fetch_info.start = 0;
+ fetch_info.elts = fetch_elts;
+ fetch_info.count = fetch_count;
+
+ prim_info.linear = FALSE;
+ prim_info.start = 0;
+ prim_info.count = draw_count;
+ prim_info.elts = draw_elts;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &draw_count;
+
+ fetch_pipeline_generic( middle, &fetch_info, &prim_info );
+}
-static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
- unsigned start,
- unsigned count,
- const ushort *draw_elts,
- unsigned draw_count )
+static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ unsigned prim_flags)
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *shader = draw->vs.vertex_shader;
- struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
- unsigned opt = fpme->opt;
- unsigned alloc_count = align( count, 4 );
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
+
+ fetch_info.linear = TRUE;
+ fetch_info.start = start;
+ fetch_info.count = count;
+ fetch_info.elts = NULL;
+
+ prim_info.linear = TRUE;
+ prim_info.start = 0;
+ prim_info.count = count;
+ prim_info.elts = NULL;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &count;
+
+ fetch_pipeline_generic( middle, &fetch_info, &prim_info );
+}
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
- if (!pipeline_verts)
- return FALSE;
- /* Fetch into our vertex buffer
- */
- draw_pt_fetch_run_linear( fpme->fetch,
- start,
- count,
- (char *)pipeline_verts );
+static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count,
+ unsigned prim_flags )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
- /* Run the shader, note that this overwrites the data[] parts of
- * the pipeline verts.
- */
- if (opt & PT_SHADE)
- {
- shader->run_linear(shader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.vs_constants,
- count,
- fpme->vertex_size,
- fpme->vertex_size);
-
- if (geometry_shader)
- draw_geometry_shader_run(geometry_shader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.gs_constants,
- count,
- fpme->vertex_size,
- fpme->vertex_size);
- }
+ fetch_info.linear = TRUE;
+ fetch_info.start = start;
+ fetch_info.count = count;
+ fetch_info.elts = NULL;
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- count,
- fpme->vertex_size ))
- {
- opt |= PT_PIPELINE;
- }
+ prim_info.linear = FALSE;
+ prim_info.start = 0;
+ prim_info.count = draw_count;
+ prim_info.elts = draw_elts;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &draw_count;
- /* Do we need to run the pipeline?
- */
- if (opt & PT_PIPELINE) {
- draw_pipeline_run( fpme->draw,
- fpme->prim,
- pipeline_verts,
- count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
- }
- else {
- draw_pt_emit( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
- }
+ fetch_pipeline_generic( middle, &fetch_info, &prim_info );
- FREE(pipeline_verts);
return TRUE;
}
@@ -385,6 +394,9 @@ static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
if (fpme->emit)
draw_pt_emit_destroy( fpme->emit );
+ if (fpme->so_emit)
+ draw_pt_so_emit_destroy( fpme->so_emit );
+
if (fpme->post_vs)
draw_pt_post_vs_destroy( fpme->post_vs );
@@ -416,7 +428,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
goto fail;
fpme->emit = draw_pt_emit_create( draw );
- if (!fpme->emit)
+ if (!fpme->emit)
+ goto fail;
+
+ fpme->so_emit = draw_pt_so_emit_create( draw );
+ if (!fpme->so_emit)
goto fail;
return &fpme->base;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index d2a492f2b4c..77291e304e1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2010 VMWare, Inc.
+ * Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -28,11 +28,11 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "draw/draw_context.h"
+#include "draw/draw_gs.h"
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
#include "draw/draw_vs.h"
-#include "draw/draw_gs.h"
#include "draw/draw_llvm.h"
@@ -41,53 +41,59 @@ struct llvm_middle_end {
struct draw_context *draw;
struct pt_emit *emit;
+ struct pt_so_emit *so_emit;
struct pt_fetch *fetch;
struct pt_post_vs *post_vs;
unsigned vertex_data_offset;
unsigned vertex_size;
- unsigned prim;
+ unsigned input_prim;
unsigned opt;
struct draw_llvm *llvm;
- struct draw_llvm_variant *variants;
struct draw_llvm_variant *current_variant;
- int nr_variants;
};
static void
llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
- unsigned prim,
+ unsigned in_prim,
unsigned opt,
unsigned *max_vertices )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *vs = draw->vs.vertex_shader;
- struct draw_geometry_shader *gs = draw->gs.geometry_shader;
- struct draw_llvm_variant_key key;
+ struct llvm_vertex_shader *shader =
+ llvm_vertex_shader(draw->vs.vertex_shader);
+ char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
+ struct draw_llvm_variant_key *key;
struct draw_llvm_variant *variant = NULL;
+ struct draw_llvm_variant_list_item *li;
unsigned i;
unsigned instance_id_index = ~0;
+
+ unsigned out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ in_prim);
+
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
*/
- unsigned nr = MAX2( vs->info.num_inputs,
- vs->info.num_outputs + 1 );
+ unsigned nr = MAX2( shader->base.info.num_inputs,
+ shader->base.info.num_outputs + 1 );
/* Scan for instanceID system value.
*/
- for (i = 0; i < vs->info.num_inputs; i++) {
- if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
+ for (i = 0; i < shader->base.info.num_inputs; i++) {
+ if (shader->base.info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
instance_id_index = i;
break;
}
}
- fpme->prim = prim;
+ fpme->input_prim = in_prim;
fpme->opt = opt;
/* Always leave room for the vertex header whether we need it or
@@ -97,57 +103,71 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
-
- draw_pt_fetch_prepare( fpme->fetch,
- vs->info.num_inputs,
- fpme->vertex_size,
- instance_id_index );
- if (opt & PT_SHADE) {
- vs->prepare(vs, draw);
- draw_geometry_shader_prepare(gs, draw);
- }
-
-
/* XXX: it's not really gl rasterization rules we care about here,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- (boolean)draw->bypass_clipping,
- (boolean)(draw->identity_viewport),
+ draw->clip_xy,
+ draw->clip_z,
+ draw->clip_user,
+ draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
- (draw->vs.edgeflag_output ? true : false) );
+ (draw->vs.edgeflag_output ? TRUE : FALSE) );
+
+ draw_pt_so_emit_prepare( fpme->so_emit );
if (!(opt & PT_PIPELINE)) {
draw_pt_emit_prepare( fpme->emit,
- prim,
+ out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
/* return even number */
*max_vertices = *max_vertices & ~1;
+
+ key = draw_llvm_make_variant_key(fpme->llvm, store);
- draw_llvm_make_variant_key(fpme->llvm, &key);
-
- variant = fpme->variants;
- while(variant) {
- if(memcmp(&variant->key, &key, sizeof key) == 0)
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
+ variant = li->base;
break;
+ }
+ li = next_elem(li);
+ }
- variant = variant->next;
+ if (variant) {
+ move_to_head(&fpme->llvm->vs_variants_list, &variant->list_item_global);
}
+ else {
+ unsigned i;
+ if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
+ /*
+ * XXX: should we flush here ?
+ */
+ for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
+ struct draw_llvm_variant_list_item *item =
+ last_elem(&fpme->llvm->vs_variants_list);
+ draw_llvm_destroy_variant(item->base);
+ }
+ }
+
+ variant = draw_llvm_create_variant(fpme->llvm, nr, key);
- if (!variant) {
- variant = draw_llvm_prepare(fpme->llvm, nr);
- variant->next = fpme->variants;
- fpme->variants = variant;
- ++fpme->nr_variants;
+ if (variant) {
+ insert_at_head(&shader->variants, &variant->list_item_local);
+ insert_at_head(&fpme->llvm->vs_variants_list, &variant->list_item_global);
+ fpme->llvm->nr_variants++;
+ shader->variants_cached++;
+ }
}
+
fpme->current_variant = variant;
/*XXX we only support one constant buffer */
@@ -158,125 +178,174 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
}
+static void pipeline(struct llvm_middle_end *llvm,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
+{
+ if (prim_info->linear)
+ draw_pipeline_run_linear( llvm->draw,
+ vert_info,
+ prim_info);
+ else
+ draw_pipeline_run( llvm->draw,
+ vert_info,
+ prim_info );
+}
+
+static void emit(struct pt_emit *emit,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
+{
+ if (prim_info->linear) {
+ draw_pt_emit_linear(emit, vert_info, prim_info);
+ }
+ else {
+ draw_pt_emit(emit, vert_info, prim_info);
+ }
+}
-static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
- const unsigned *fetch_elts,
- unsigned fetch_count,
- const ushort *draw_elts,
- unsigned draw_count )
+static void
+llvm_pipeline_generic( struct draw_pt_middle_end *middle,
+ const struct draw_fetch_info *fetch_info,
+ const struct draw_prim_info *prim_info )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_context *draw = fpme->draw;
+ struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
+ struct draw_prim_info gs_prim_info;
+ struct draw_vertex_info llvm_vert_info;
+ struct draw_vertex_info gs_vert_info;
+ struct draw_vertex_info *vert_info;
unsigned opt = fpme->opt;
- unsigned alloc_count = align( fetch_count, 4 );
-
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
- if (!pipeline_verts) {
- /* Not much we can do here - just skip the rendering.
- */
+ llvm_vert_info.count = fetch_info->count;
+ llvm_vert_info.vertex_size = fpme->vertex_size;
+ llvm_vert_info.stride = fpme->vertex_size;
+ llvm_vert_info.verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size *
+ align(fetch_info->count, 4));
+ if (!llvm_vert_info.verts) {
assert(0);
return;
}
- fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
- pipeline_verts,
- (const char **)draw->pt.user.vbuffer,
- fetch_elts,
- fetch_count,
- fpme->vertex_size,
- draw->pt.vertex_buffer );
-
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- fetch_count,
- fpme->vertex_size ))
- {
+ if (fetch_info->linear)
+ fpme->current_variant->jit_func( &fpme->llvm->jit_context,
+ llvm_vert_info.verts,
+ (const char **)draw->pt.user.vbuffer,
+ fetch_info->start,
+ fetch_info->count,
+ fpme->vertex_size,
+ draw->pt.vertex_buffer,
+ draw->instance_id);
+ else
+ fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
+ llvm_vert_info.verts,
+ (const char **)draw->pt.user.vbuffer,
+ fetch_info->elts,
+ fetch_info->count,
+ fpme->vertex_size,
+ draw->pt.vertex_buffer,
+ draw->instance_id);
+
+ /* Finished with fetch and vs:
+ */
+ fetch_info = NULL;
+ vert_info = &llvm_vert_info;
+
+
+ if ((opt & PT_SHADE) && gshader) {
+ draw_geometry_shader_run(gshader,
+ draw->pt.user.gs_constants,
+ draw->pt.user.gs_constants_size,
+ vert_info,
+ prim_info,
+ &gs_vert_info,
+ &gs_prim_info);
+
+ FREE(vert_info->verts);
+ vert_info = &gs_vert_info;
+ prim_info = &gs_prim_info;
+ }
+
+ /* stream output needs to be done before clipping */
+ draw_pt_so_emit( fpme->so_emit,
+ vert_info,
+ prim_info );
+
+ if (draw_pt_post_vs_run( fpme->post_vs, vert_info )) {
opt |= PT_PIPELINE;
}
/* Do we need to run the pipeline?
*/
if (opt & PT_PIPELINE) {
- draw_pipeline_run( fpme->draw,
- fpme->prim,
- pipeline_verts,
- fetch_count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
+ pipeline( fpme,
+ vert_info,
+ prim_info );
}
else {
- draw_pt_emit( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- fetch_count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
+ emit( fpme->emit,
+ vert_info,
+ prim_info );
}
+ FREE(vert_info->verts);
+}
- FREE(pipeline_verts);
+static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count,
+ unsigned prim_flags )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
+
+ fetch_info.linear = FALSE;
+ fetch_info.start = 0;
+ fetch_info.elts = fetch_elts;
+ fetch_info.count = fetch_count;
+
+ prim_info.linear = FALSE;
+ prim_info.start = 0;
+ prim_info.count = draw_count;
+ prim_info.elts = draw_elts;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &draw_count;
+
+ llvm_pipeline_generic( middle, &fetch_info, &prim_info );
}
static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned prim_flags)
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- unsigned opt = fpme->opt;
- unsigned alloc_count = align( count, 4 );
-
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
-
- if (!pipeline_verts) {
- /* Not much we can do here - just skip the rendering.
- */
- assert(0);
- return;
- }
-
-#if 0
- debug_printf("#### Pipeline = %p (data = %p)\n",
- pipeline_verts, pipeline_verts->data);
-#endif
- fpme->current_variant->jit_func( &fpme->llvm->jit_context,
- pipeline_verts,
- (const char **)draw->pt.user.vbuffer,
- start,
- count,
- fpme->vertex_size,
- draw->pt.vertex_buffer );
-
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- count,
- fpme->vertex_size ))
- {
- opt |= PT_PIPELINE;
- }
-
- /* Do we need to run the pipeline?
- */
- if (opt & PT_PIPELINE) {
- draw_pipeline_run_linear( fpme->draw,
- fpme->prim,
- pipeline_verts,
- count,
- fpme->vertex_size);
- }
- else {
- draw_pt_emit_linear( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- fpme->vertex_size,
- count );
- }
-
- FREE(pipeline_verts);
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
+
+ fetch_info.linear = TRUE;
+ fetch_info.start = start;
+ fetch_info.count = count;
+ fetch_info.elts = NULL;
+
+ prim_info.linear = TRUE;
+ prim_info.start = 0;
+ prim_info.count = count;
+ prim_info.elts = NULL;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &count;
+
+ llvm_pipeline_generic( middle, &fetch_info, &prim_info );
}
@@ -286,56 +355,29 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- unsigned opt = fpme->opt;
- unsigned alloc_count = align( count, 4 );
-
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
-
- if (!pipeline_verts)
- return FALSE;
-
- fpme->current_variant->jit_func( &fpme->llvm->jit_context,
- pipeline_verts,
- (const char **)draw->pt.user.vbuffer,
- start,
- count,
- fpme->vertex_size,
- draw->pt.vertex_buffer );
-
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- count,
- fpme->vertex_size ))
- {
- opt |= PT_PIPELINE;
- }
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
- /* Do we need to run the pipeline?
- */
- if (opt & PT_PIPELINE) {
- draw_pipeline_run( fpme->draw,
- fpme->prim,
- pipeline_verts,
- count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
- }
- else {
- draw_pt_emit( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
- }
+ fetch_info.linear = TRUE;
+ fetch_info.start = start;
+ fetch_info.count = count;
+ fetch_info.elts = NULL;
+
+ prim_info.linear = FALSE;
+ prim_info.start = 0;
+ prim_info.count = draw_count;
+ prim_info.elts = draw_elts;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &draw_count;
+
+ llvm_pipeline_generic( middle, &fetch_info, &prim_info );
- FREE(pipeline_verts);
return TRUE;
}
@@ -356,17 +398,18 @@ static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
if (fpme->emit)
draw_pt_emit_destroy( fpme->emit );
+ if (fpme->so_emit)
+ draw_pt_so_emit_destroy( fpme->so_emit );
+
if (fpme->post_vs)
draw_pt_post_vs_destroy( fpme->post_vs );
- if (fpme->llvm)
- draw_llvm_destroy( fpme->llvm );
-
FREE(middle);
}
-struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw )
+struct draw_pt_middle_end *
+draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw)
{
struct llvm_middle_end *fpme = 0;
@@ -398,13 +441,15 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_cont
if (!fpme->emit)
goto fail;
- fpme->llvm = draw_llvm_create(draw);
+ fpme->so_emit = draw_pt_so_emit_create( draw );
+ if (!fpme->so_emit)
+ goto fail;
+
+ fpme->llvm = draw->llvm;
if (!fpme->llvm)
goto fail;
- fpme->variants = NULL;
fpme->current_variant = NULL;
- fpme->nr_variants = 0;
return &fpme->base;
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 5525dfc748d..769409cfd67 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -26,22 +26,38 @@
**************************************************************************/
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "pipe/p_context.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
-#include "draw/draw_vbuf.h"
#include "draw/draw_pt.h"
+
+#define DO_CLIP_XY 0x1
+#define DO_CLIP_FULL_Z 0x2
+#define DO_CLIP_HALF_Z 0x4
+#define DO_CLIP_USER 0x8
+#define DO_VIEWPORT 0x10
+#define DO_EDGEFLAG 0x20
+
+
struct pt_post_vs {
struct draw_context *draw;
+ unsigned flags;
+
boolean (*run)( struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride );
+ struct draw_vertex_info *info );
};
-
+static INLINE void
+initialize_vertex_header(struct vertex_header *header)
+{
+ header->clipmask = 0;
+ header->edgeflag = 1;
+ header->pad = 0;
+ header->vertex_id = UNDEFINED_VERTEX_ID;
+}
static INLINE float
dot4(const float *a, const float *b)
@@ -52,215 +68,121 @@ dot4(const float *a, const float *b)
a[3]*b[3]);
}
+#define FLAGS (0)
+#define TAG(x) x##_none
+#include "draw_cliptest_tmp.h"
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_viewport
+#include "draw_cliptest_tmp.h"
-static INLINE unsigned
-compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
- unsigned mask = 0x0;
- unsigned i;
-
-#if 0
- debug_printf("compute clipmask %f %f %f %f\n",
- clip[0], clip[1], clip[2], clip[3]);
- assert(clip[3] != 0.0);
-#endif
-
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= (1<<0);
- if ( clip[0] + clip[3] < 0) mask |= (1<<1);
- if (-clip[1] + clip[3] < 0) mask |= (1<<2);
- if ( clip[1] + clip[3] < 0) mask |= (1<<3);
- if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
- if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
-
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
-
- return mask;
-}
-
-
-/* The normal case - cliptest, rhw divide, viewport transform.
- *
- * Also handle identity viewport here at the expense of a few wasted
- * instructions
- */
-static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
-{
- struct vertex_header *out = vertices;
- const float *scale = pvs->draw->viewport.scale;
- const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = draw_current_shader_position_output(pvs->draw);
- unsigned clipped = 0;
- unsigned j;
-
- if (0) debug_printf("%s\n", __FUNCTION__);
-
- for (j = 0; j < count; j++) {
- float *position = out->data[pos];
-
-#if 0
- debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n",
- j, out, position, position[0], position[1], position[2], position[3]);
-#endif
-
- out->clip[0] = position[0];
- out->clip[1] = position[1];
- out->clip[2] = position[2];
- out->clip[3] = position[3];
-
- out->vertex_id = 0xffff;
- out->clipmask = compute_clipmask_gl(out->clip,
- pvs->draw->plane,
- pvs->draw->nr_planes);
- clipped += out->clipmask;
-
- if (out->clipmask == 0)
- {
- /* divide by w */
- float w = 1.0f / position[3];
-
- /* Viewport mapping */
- position[0] = position[0] * w * scale[0] + trans[0];
- position[1] = position[1] * w * scale[1] + trans[1];
- position[2] = position[2] * w * scale[2] + trans[2];
- position[3] = w;
-#if 0
- debug_printf("post viewport: %f %f %f %f\n",
- position[0],
- position[1],
- position[2],
- position[3]);
-#endif
- }
-
- out = (struct vertex_header *)( (char *)out + stride );
- }
-
- return clipped != 0;
-}
-
-
+#define FLAGS (DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_halfz_viewport
+#include "draw_cliptest_tmp.h"
-/* As above plus edgeflags
- */
-static boolean
-post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
-{
- unsigned j;
- boolean needpipe;
+#define FLAGS (DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_fullz_viewport
+#include "draw_cliptest_tmp.h"
- needpipe = post_vs_cliptest_viewport_gl( pvs, vertices, count, stride);
+#define FLAGS (DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_halfz_viewport
+#include "draw_cliptest_tmp.h"
- /* If present, copy edgeflag VS output into vertex header.
- * Otherwise, leave header as is.
- */
- if (pvs->draw->vs.edgeflag_output) {
- struct vertex_header *out = vertices;
- int ef = pvs->draw->vs.edgeflag_output;
-
- for (j = 0; j < count; j++) {
- const float *edgeflag = out->data[ef];
- out->edgeflag = !(edgeflag[0] != 1.0f);
- needpipe |= !out->edgeflag;
- out = (struct vertex_header *)( (char *)out + stride );
- }
- }
- return needpipe;
-}
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_user_viewport
+#include "draw_cliptest_tmp.h"
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT | DO_EDGEFLAG)
+#define TAG(x) x##_xy_fullz_user_viewport_edgeflag
+#include "draw_cliptest_tmp.h"
-/* If bypass_clipping is set, skip cliptest and rhw divide.
+/* Don't want to create 64 versions of this function, so catch the
+ * less common ones here. This is looking like something which should
+ * be code-generated, perhaps appended to the end of the vertex
+ * shader.
*/
-static boolean post_vs_viewport( struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
-{
- struct vertex_header *out = vertices;
- const float *scale = pvs->draw->viewport.scale;
- const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = draw_current_shader_position_output(pvs->draw);
- unsigned j;
-
- if (0) debug_printf("%s\n", __FUNCTION__);
- for (j = 0; j < count; j++) {
- float *position = out->data[pos];
-
- /* Viewport mapping only, no cliptest/rhw divide
- */
- position[0] = position[0] * scale[0] + trans[0];
- position[1] = position[1] * scale[1] + trans[1];
- position[2] = position[2] * scale[2] + trans[2];
-
- out = (struct vertex_header *)((char *)out + stride);
- }
-
- return FALSE;
-}
+#define FLAGS (pvs->flags)
+#define TAG(x) x##_generic
+#include "draw_cliptest_tmp.h"
-/* If bypass_clipping is set and we have an identity viewport, nothing
- * to do.
- */
-static boolean post_vs_none( struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
-{
- if (0) debug_printf("%s\n", __FUNCTION__);
- return FALSE;
-}
boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
- struct vertex_header *pipeline_verts,
- unsigned count,
- unsigned stride )
+ struct draw_vertex_info *info )
{
- return pvs->run( pvs, pipeline_verts, count, stride );
+ return pvs->run( pvs, info );
}
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
- boolean bypass_clipping,
+ boolean clip_xy,
+ boolean clip_z,
+ boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags )
{
- if (!need_edgeflags) {
- if (bypass_clipping) {
- if (bypass_viewport)
- pvs->run = post_vs_none;
- else
- pvs->run = post_vs_viewport;
- }
- else {
- /* if (opengl) */
- pvs->run = post_vs_cliptest_viewport_gl;
- }
+ pvs->flags = 0;
+
+ if (clip_xy)
+ pvs->flags |= DO_CLIP_XY;
+
+ if (clip_z && opengl) {
+ pvs->flags |= DO_CLIP_FULL_Z;
+ ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 1 );
}
- else {
- /* If we need to copy edgeflags to the vertex header, it should
- * mean we're running the primitive pipeline. Hence the bypass
- * flags should be false.
- */
- assert(!bypass_clipping);
- assert(!bypass_viewport);
- pvs->run = post_vs_cliptest_viewport_gl_edgeflag;
+
+ if (clip_z && !opengl) {
+ pvs->flags |= DO_CLIP_HALF_Z;
+ ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 0 );
+ }
+
+ if (clip_user)
+ pvs->flags |= DO_CLIP_USER;
+
+ if (!bypass_viewport)
+ pvs->flags |= DO_VIEWPORT;
+
+ if (need_edgeflags)
+ pvs->flags |= DO_EDGEFLAG;
+
+ /* Now select the relevant function:
+ */
+ switch (pvs->flags) {
+ case 0:
+ pvs->run = do_cliptest_none;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_fullz_viewport;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_halfz_viewport;
+ break;
+
+ case DO_CLIP_FULL_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_fullz_viewport;
+ break;
+
+ case DO_CLIP_HALF_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_halfz_viewport;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_fullz_user_viewport;
+ break;
+
+ case (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER |
+ DO_VIEWPORT | DO_EDGEFLAG):
+ pvs->run = do_cliptest_xy_fullz_user_viewport_edgeflag;
+ break;
+
+ default:
+ pvs->run = do_cliptest_generic;
+ break;
}
}
@@ -272,7 +194,7 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw )
return NULL;
pvs->draw = draw;
-
+
return pvs;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
new file mode 100644
index 00000000000..c86bdd99a33
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -0,0 +1,293 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+struct pt_so_emit {
+ struct draw_context *draw;
+
+ void *buffers[PIPE_MAX_SO_BUFFERS];
+
+ unsigned input_vertex_stride;
+ const float (*inputs)[4];
+
+ boolean has_so;
+
+ boolean single_buffer;
+
+ unsigned emitted_primitives;
+ unsigned emitted_vertices;
+};
+
+
+void draw_pt_so_emit_prepare(struct pt_so_emit *emit)
+{
+ struct draw_context *draw = emit->draw;
+
+ emit->has_so = (draw->so.state.num_outputs > 0);
+
+ /* if we have a state with outputs make sure we have
+ * buffers to output to */
+ if (emit->has_so) {
+ boolean has_valid_buffer = FALSE;
+ unsigned i;
+ for (i = 0; i < draw->so.num_buffers; ++i) {
+ if (draw->so.buffers[i]) {
+ has_valid_buffer = TRUE;
+ break;
+ }
+ }
+ emit->has_so = has_valid_buffer;
+ }
+
+ if (!emit->has_so)
+ return;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+}
+
+static boolean
+is_component_writable(unsigned mask,
+ unsigned compo)
+{
+ switch (mask) {
+ case TGSI_WRITEMASK_NONE:
+ return FALSE;
+ case TGSI_WRITEMASK_X:
+ return compo == 0;
+ case TGSI_WRITEMASK_Y:
+ return compo == 1;
+ case TGSI_WRITEMASK_XY:
+ return compo == 0 || compo == 1;
+ case TGSI_WRITEMASK_Z:
+ return compo == 2;
+ case TGSI_WRITEMASK_XZ:
+ return compo == 0 || compo == 2;
+ case TGSI_WRITEMASK_YZ:
+ return compo == 1 || compo == 2;
+ case TGSI_WRITEMASK_XYZ:
+ return compo == 0 || compo == 1 || compo == 2;
+ case TGSI_WRITEMASK_W:
+ return compo == 3;
+ case TGSI_WRITEMASK_XW:
+ return compo == 0 || compo == 3;
+ case TGSI_WRITEMASK_YW:
+ return compo == 1 || compo == 3;
+ case TGSI_WRITEMASK_XYW:
+ return compo == 0 || compo == 1 || compo == 3;
+ case TGSI_WRITEMASK_ZW:
+ return compo == 2 || compo == 3;
+ case TGSI_WRITEMASK_XZW:
+ return compo == 0 || compo == 1 || compo == 3;
+ case TGSI_WRITEMASK_YZW:
+ return compo == 1 || compo == 2 || compo == 4;
+ case TGSI_WRITEMASK_XYZW:
+ return compo < 4;
+ default:
+ debug_assert(!"Unknown writemask in stream out");
+ return compo < 4;
+ }
+}
+
+static void so_emit_prim(struct pt_so_emit *so,
+ unsigned *indices,
+ unsigned num_vertices)
+{
+ unsigned slot, i;
+ unsigned input_vertex_stride = so->input_vertex_stride;
+ struct draw_context *draw = so->draw;
+ const float (*input_ptr)[4];
+ const struct pipe_stream_output_state *state =
+ &draw->so.state;
+ float **buffer = 0;
+
+ input_ptr = so->inputs;
+
+ for (i = 0; i < num_vertices; ++i) {
+ const float (*input)[4];
+ unsigned total_written_compos = 0;
+ /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/
+ input = (const float (*)[4])(
+ (const char *)input_ptr + (indices[i] * input_vertex_stride));
+ for (slot = 0; slot < state->num_outputs; ++slot) {
+ unsigned idx = state->register_index[slot];
+ unsigned writemask = state->register_mask[slot];
+ unsigned written_compos = 0;
+ unsigned compo;
+
+ buffer = (float**)&so->buffers[state->output_buffer[slot]];
+
+ /*debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
+ slot, vs_slot, idx);*/
+#if 1
+ assert(!util_is_inf_or_nan(input[idx][0]));
+ assert(!util_is_inf_or_nan(input[idx][1]));
+ assert(!util_is_inf_or_nan(input[idx][2]));
+ assert(!util_is_inf_or_nan(input[idx][3]));
+#endif
+ for (compo = 0; compo < 4; ++compo) {
+ if (is_component_writable(writemask, compo)) {
+ float *buf = *buffer;
+ buf[written_compos++] = input[idx][compo];
+ }
+ }
+#if 0
+ debug_printf("\t\t(writemask = %d)%f %f %f %f\n",
+ writemask,
+ input[idx][0],
+ input[idx][1],
+ input[idx][2],
+ input[idx][3]);
+#endif
+ *buffer += written_compos;
+ total_written_compos += written_compos;
+ }
+ if (so->single_buffer) {
+ int stride = (int)state->stride -
+ sizeof(float) * total_written_compos;
+
+ debug_assert(stride >= 0);
+ *buffer = (float*) (((char*)*buffer) + stride);
+ }
+ }
+ so->emitted_vertices += num_vertices;
+ ++so->emitted_primitives;
+}
+
+static void so_point(struct pt_so_emit *so, int idx)
+{
+ unsigned indices[1];
+
+ indices[0] = idx;
+
+ so_emit_prim(so, indices, 1);
+}
+
+static void so_line(struct pt_so_emit *so, int i0, int i1)
+{
+ unsigned indices[2];
+
+ indices[0] = i0;
+ indices[1] = i1;
+
+ so_emit_prim(so, indices, 2);
+}
+
+static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2)
+{
+ unsigned indices[3];
+
+ indices[0] = i0;
+ indices[1] = i1;
+ indices[2] = i2;
+
+ so_emit_prim(so, indices, 3);
+}
+
+
+#define FUNC so_run_linear
+#define GET_ELT(idx) (start + (idx))
+#include "draw_so_emit_tmp.h"
+
+
+#define FUNC so_run_elts
+#define LOCAL_VARS const ushort *elts = input_prims->elts;
+#define GET_ELT(idx) (elts[start + (idx)])
+#include "draw_so_emit_tmp.h"
+
+
+void draw_pt_so_emit( struct pt_so_emit *emit,
+ const struct draw_vertex_info *input_verts,
+ const struct draw_prim_info *input_prims )
+{
+ struct draw_context *draw = emit->draw;
+ struct vbuf_render *render = draw->render;
+ unsigned start, i;
+
+ if (!emit->has_so)
+ return;
+
+ emit->emitted_vertices = 0;
+ emit->emitted_primitives = 0;
+ emit->input_vertex_stride = input_verts->stride;
+ emit->inputs = (const float (*)[4])input_verts->verts->data;
+ for (i = 0; i < draw->so.num_buffers; ++i) {
+ emit->buffers[i] = draw->so.buffers[i];
+ }
+ emit->single_buffer = TRUE;
+ for (i = 0; i < draw->so.state.num_outputs; ++i) {
+ if (draw->so.state.output_buffer[i] != 0)
+ emit->single_buffer = FALSE;
+ }
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ for (start = i = 0; i < input_prims->primitive_count;
+ start += input_prims->primitive_lengths[i], i++)
+ {
+ unsigned count = input_prims->primitive_lengths[i];
+
+ if (input_prims->linear) {
+ so_run_linear(emit, input_prims, input_verts,
+ start, count);
+ } else {
+ so_run_elts(emit, input_prims, input_verts,
+ start, count);
+ }
+ }
+
+ render->set_stream_output_info(render,
+ emit->emitted_primitives,
+ emit->emitted_vertices);
+}
+
+
+struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw )
+{
+ struct pt_so_emit *emit = CALLOC_STRUCT(pt_so_emit);
+ if (!emit)
+ return NULL;
+
+ emit->draw = draw;
+
+ return emit;
+}
+
+void draw_pt_so_emit_destroy( struct pt_so_emit *emit )
+{
+ FREE(emit);
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index 3236d38e6ab..513bbbed216 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -53,7 +53,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_LINES_ADJACENCY:
*first = 4;
- *incr = 2;
+ *incr = 4;
break;
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
*first = 4;
@@ -65,7 +65,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_TRIANGLES_ADJACENCY:
*first = 6;
- *incr = 3;
+ *incr = 6;
break;
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_TRIANGLE_FAN:
@@ -75,7 +75,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
*first = 6;
- *incr = 1;
+ *incr = 2;
break;
case PIPE_PRIM_QUADS:
*first = 4;
@@ -92,3 +92,10 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
}
}
+
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr)
+{
+ if (count < first)
+ return 0;
+ return count - (count - first) % incr;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
deleted file mode 100644
index d0e16c9bc3c..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-#define FETCH_MAX 256
-#define DRAW_MAX (FETCH_MAX+8)
-
-struct varray_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned driver_fetch_max;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-};
-
-
-static void varray_flush_linear(struct varray_frontend *varray,
- unsigned start, unsigned count)
-{
- if (count) {
- assert(varray->middle->run_linear);
- varray->middle->run_linear(varray->middle, start, count);
- }
-}
-
-static void varray_line_loop_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count,
- boolean end )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 1) {
- unsigned nr = 0, i;
-
- for (i = 0; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- if (end)
- varray->fetch_elts[nr++] = start;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-static void varray_fan_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 2) {
- unsigned nr = 0, i;
-
- if (segment_start != 0)
- varray->fetch_elts[nr++] = start;
-
- for (i = 0 ; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-
-#define FUNC varray_run
-#include "draw_pt_varray_tmp_linear.h"
-
-static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = {
- PIPE_PRIM_POINTS,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */
- PIPE_PRIM_LINE_STRIP,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLE_STRIP,
- PIPE_PRIM_TRIANGLE_FAN,
- PIPE_PRIM_QUADS,
- PIPE_PRIM_QUAD_STRIP,
- PIPE_PRIM_POLYGON
-};
-
-
-
-static void varray_prepare(struct draw_pt_front_end *frontend,
- unsigned prim,
- struct draw_pt_middle_end *middle,
- unsigned opt)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
-
- varray->base.run = varray_run;
-
- varray->input_prim = prim;
- varray->output_prim = decompose_prim[prim];
-
- varray->middle = middle;
- middle->prepare(middle, varray->output_prim, opt, &varray->driver_fetch_max );
-
- /* check that the max is even */
- assert((varray->driver_fetch_max & 1) == 0);
-
- varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max);
-}
-
-
-
-
-static void varray_finish(struct draw_pt_front_end *frontend)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- varray->middle->finish(varray->middle);
- varray->middle = NULL;
-}
-
-static void varray_destroy(struct draw_pt_front_end *frontend)
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
-{
- ushort i;
- struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
- if (varray == NULL)
- return NULL;
-
- varray->base.prepare = varray_prepare;
- varray->base.run = NULL;
- varray->base.finish = varray_finish;
- varray->base.destroy = varray_destroy;
- varray->draw = draw;
-
- for (i = 0; i < DRAW_MAX; i++) {
- varray->draw_elts[i] = i;
- }
-
- return &varray->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
deleted file mode 100644
index 7c722457c3c..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ /dev/null
@@ -1,238 +0,0 @@
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- struct draw_context *draw = varray->draw;
- unsigned start = (unsigned)elts;
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i, j;
- ushort flags;
- unsigned first, incr;
-
- varray->fetch_start = start;
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i < end; i++) {
- POINT(varray, i + 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+1 < end; i += 2) {
- LINE(varray, DRAW_PIPE_RESET_STIPPLE,
- i + 0, i + 1);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- LINE(varray, flags, i - 1, 0);
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i += 3) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1 + (i&1), i + 2 - (i&1));
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- else {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i + 2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0 + (i&1), i + 1 - (i&1), i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- else {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- }
- break;
-
- case PIPE_PRIM_QUADS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 4) {
- QUAD(varray, i + 0, i + 1, i + 2, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 2) {
- QUAD(varray, i + 2, i + 0, i + 1, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++, flags = edge_middle) {
-
- if (i + 3 == count)
- flags |= edge_last;
-
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- varray_flush(varray);
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
deleted file mode 100644
index a292346be95..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ /dev/null
@@ -1,98 +0,0 @@
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- return count - (count - first) % incr;
-}
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- unsigned start = (unsigned) ((char *) elts - (char *) NULL);
-
- unsigned j;
- unsigned first, incr;
-
- assert(elt_bias == 0);
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
- /* Sanitize primitive length:
- */
- count = trim(count, first, incr);
- if (count < first)
- return;
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_LINE_STRIP:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_QUADS:
- case PIPE_PRIM_QUAD_STRIP:
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
- varray_flush_linear(varray, start + j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- /* Always have to decompose as we've stated that this will be
- * emitted as a line-strip.
- */
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_line_loop_segment(varray, start, j, nr, nr == remaining);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
-
- case PIPE_PRIM_POLYGON:
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count < varray->driver_fetch_max) {
- varray_flush_linear(varray, start, count);
- }
- else {
- for ( j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_fan_segment(varray, start, j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
deleted file mode 100644
index 37ffbac4f92..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-#include "util/u_memory.h"
-#include "util/u_prim.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-
-#define CACHE_MAX 256
-#define FETCH_MAX 256
-#define DRAW_MAX (16*1024)
-
-struct vcache_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- unsigned in[CACHE_MAX];
- ushort out[CACHE_MAX];
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned draw_count;
- unsigned fetch_count;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-
- unsigned middle_prim;
- unsigned opt;
-};
-
-static INLINE void
-vcache_flush( struct vcache_frontend *vcache )
-{
- if (vcache->middle_prim != vcache->output_prim) {
- vcache->middle_prim = vcache->output_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
- if (vcache->draw_count) {
- vcache->middle->run( vcache->middle,
- vcache->fetch_elts,
- vcache->fetch_count,
- vcache->draw_elts,
- vcache->draw_count );
- }
-
- memset(vcache->in, ~0, sizeof(vcache->in));
- vcache->fetch_count = 0;
- vcache->draw_count = 0;
-}
-
-static INLINE void
-vcache_check_flush( struct vcache_frontend *vcache )
-{
- if ( vcache->draw_count + 6 >= DRAW_MAX ||
- vcache->fetch_count + 4 >= FETCH_MAX )
- {
- vcache_flush( vcache );
- }
-}
-
-
-static INLINE void
-vcache_elt( struct vcache_frontend *vcache,
- unsigned felt,
- ushort flags )
-{
- unsigned idx = felt % CACHE_MAX;
-
- if (vcache->in[idx] != felt) {
- assert(vcache->fetch_count < FETCH_MAX);
-
- vcache->in[idx] = felt;
- vcache->out[idx] = (ushort)vcache->fetch_count;
- vcache->fetch_elts[vcache->fetch_count++] = felt;
- }
-
- vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
-}
-
-
-
-static INLINE void
-vcache_triangle( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-static INLINE void
-vcache_line( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_point( struct vcache_frontend *vcache,
- unsigned i0 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_check_flush(vcache);
-}
-
-static INLINE void
-vcache_quad( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
-{
- vcache_triangle( vcache, i0, i1, i3 );
- vcache_triangle( vcache, i1, i2, i3 );
-}
-
-static INLINE void
-vcache_ef_quad( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
-{
- if (vcache->draw->rasterizer->flatshade_first) {
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_1 ),
- i0, i1, i2 );
-
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_EDGE_FLAG_2 |
- DRAW_PIPE_EDGE_FLAG_1 ),
- i0, i2, i3 );
- }
- else {
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_2 ),
- i0, i1, i3 );
-
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_1 ),
- i1, i2, i3 );
- }
-}
-
-/* At least for now, we're back to using a template include file for
- * this. The two paths aren't too different though - it may be
- * possible to reunify them.
- */
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1)
-#define POINT(vc,i0) vcache_point(vc,i0)
-#define FUNC vcache_run_extras
-#include "draw_pt_vcache_tmp.h"
-
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1)
-#define POINT(vc,i0) vcache_point(vc,i0)
-#define FUNC vcache_run
-#include "draw_pt_vcache_tmp.h"
-
-static INLINE void
-rebase_uint_elts( const unsigned *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-static INLINE void
-rebase_ushort_elts( const ushort *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-static INLINE void
-rebase_ubyte_elts( const ubyte *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-
-static INLINE void
-translate_uint_elts( const unsigned *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-static INLINE void
-translate_ushort_elts( const ushort *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-static INLINE void
-translate_ubyte_elts( const ubyte *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-
-
-#if 0
-static INLINE enum pipe_format
-format_from_get_elt( pt_elt_func get_elt )
-{
- switch (draw->pt.user.eltSize) {
- case 1: return PIPE_FORMAT_R8_UNORM;
- case 2: return PIPE_FORMAT_R16_UNORM;
- case 4: return PIPE_FORMAT_R32_UNORM;
- default: return PIPE_FORMAT_NONE;
- }
-}
-#endif
-
-static INLINE void
-vcache_check_run( struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned draw_count )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- struct draw_context *draw = vcache->draw;
- unsigned min_index = draw->pt.user.min_index;
- unsigned max_index = draw->pt.user.max_index;
- unsigned index_size = draw->pt.user.eltSize;
- unsigned fetch_count = max_index + 1 - min_index;
- const ushort *transformed_elts;
- ushort *storage = NULL;
- boolean ok = FALSE;
-
-
- if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
- vcache->fetch_max,
- draw_count);
-
- if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
- fetch_count >= UNDEFINED_VERTEX_ID ||
- fetch_count > draw_count) {
- if (0) debug_printf("fail\n");
- goto fail;
- }
-
- if (vcache->middle_prim != vcache->input_prim) {
- vcache->middle_prim = vcache->input_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
-
- assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
- (elt_bias < 0 && min_index + elt_bias < min_index));
-
- if (min_index == 0 &&
- index_size == 2)
- {
- transformed_elts = (const ushort *)elts;
- }
- else
- {
- storage = MALLOC( draw_count * sizeof(ushort) );
- if (!storage)
- goto fail;
-
- if (min_index == 0) {
- switch(index_size) {
- case 1:
- translate_ubyte_elts( (const ubyte *)elts,
- draw_count,
- storage );
- break;
-
- case 2:
- translate_ushort_elts( (const ushort *)elts,
- draw_count,
- storage );
- break;
-
- case 4:
- translate_uint_elts( (const uint *)elts,
- draw_count,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- else {
- switch(index_size) {
- case 1:
- rebase_ubyte_elts( (const ubyte *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 2:
- rebase_ushort_elts( (const ushort *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 4:
- rebase_uint_elts( (const uint *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- transformed_elts = storage;
- }
-
- if (fetch_count < UNDEFINED_VERTEX_ID)
- ok = vcache->middle->run_linear_elts( vcache->middle,
- min_index + elt_bias, /* start */
- fetch_count,
- transformed_elts,
- draw_count );
-
- FREE(storage);
-
- if (ok)
- return;
-
- debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
- fetch_count, draw_count);
-
- fail:
- vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
-}
-
-
-
-
-static void
-vcache_prepare( struct draw_pt_front_end *frontend,
- unsigned prim,
- struct draw_pt_middle_end *middle,
- unsigned opt )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
-
- if (opt & PT_PIPELINE)
- {
- vcache->base.run = vcache_run_extras;
- }
- else
- {
- vcache->base.run = vcache_check_run;
- }
-
- vcache->input_prim = prim;
- vcache->output_prim = u_reduced_prim(prim);
-
- vcache->middle = middle;
- vcache->opt = opt;
-
- /* Have to run prepare here, but try and guess a good prim for
- * doing so:
- */
- vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim;
- middle->prepare( middle, vcache->middle_prim, opt, &vcache->fetch_max );
-}
-
-
-
-
-static void
-vcache_finish( struct draw_pt_front_end *frontend )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- vcache->middle->finish( vcache->middle );
- vcache->middle = NULL;
-}
-
-static void
-vcache_destroy( struct draw_pt_front_end *frontend )
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
-{
- struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
- if (vcache == NULL)
- return NULL;
-
- vcache->base.prepare = vcache_prepare;
- vcache->base.run = NULL;
- vcache->base.finish = vcache_finish;
- vcache->base.destroy = vcache_destroy;
- vcache->draw = draw;
-
- memset(vcache->in, ~0, sizeof(vcache->in));
-
- return &vcache->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
deleted file mode 100644
index f7a63de3ba9..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ /dev/null
@@ -1,197 +0,0 @@
-
-
-static void FUNC( struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned count )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- struct draw_context *draw = vcache->draw;
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i;
- ushort flags;
-
- if (0) debug_printf("%s %d\n", __FUNCTION__, count);
-
-
- switch (vcache->input_prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- POINT( vcache,
- get_elt(elts, i + 0) + elt_bias );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE( vcache,
- DRAW_PIPE_RESET_STIPPLE,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (i = 1; i < count; i++, flags = 0) {
- LINE( vcache,
- flags,
- get_elt(elts, i - 1) + elt_bias,
- get_elt(elts, i ) + elt_bias);
- }
-
- LINE( vcache,
- flags,
- get_elt(elts, i - 1) + elt_bias,
- get_elt(elts, 0 ) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE( vcache,
- flags,
- get_elt(elts, i - 1) + elt_bias,
- get_elt(elts, i ) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2 ) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1 + (i&1)) + elt_bias,
- get_elt(elts, i + 2 - (i&1)) + elt_bias);
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0 + (i&1)) + elt_bias,
- get_elt(elts, i + 1 - (i&1)) + elt_bias,
- get_elt(elts, i + 2 ) + elt_bias);
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, 0 ) + elt_bias);
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2 ) + elt_bias);
- }
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- for (i = 0; i+3 < count; i += 4) {
- QUAD( vcache,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, i + 3) + elt_bias );
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (i = 0; i+3 < count; i += 2) {
- QUAD( vcache,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 3) + elt_bias );
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
- ushort edge_next, edge_finish;
-
- if (flatfirst) {
- flags = DRAW_PIPE_RESET_STIPPLE | edge_middle | edge_last;
- edge_next = edge_last;
- edge_finish = edge_first;
- }
- else {
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
- edge_next = edge_middle;
- edge_finish = edge_last;
- }
-
- for (i = 0; i+2 < count; i++, flags = edge_next) {
-
- if (i + 3 == count)
- flags |= edge_finish;
-
- if (flatfirst) {
- TRIANGLE( vcache,
- flags,
- get_elt(elts, 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias );
- }
- else {
- TRIANGLE( vcache,
- flags,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, 0) + elt_bias);
- }
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- vcache_flush( vcache );
-}
-
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
new file mode 100644
index 00000000000..a6875253094
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -0,0 +1,208 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+#define SEGMENT_SIZE 1024
+#define MAP_SIZE 256
+
+struct vsplit_frontend {
+ struct draw_pt_front_end base;
+ struct draw_context *draw;
+
+ unsigned prim;
+
+ struct draw_pt_middle_end *middle;
+
+ unsigned max_vertices;
+ ushort segment_size;
+
+ /* buffers for splitting */
+ unsigned fetch_elts[SEGMENT_SIZE];
+ ushort draw_elts[SEGMENT_SIZE];
+ ushort identity_draw_elts[SEGMENT_SIZE];
+
+ struct {
+ /* map a fetch element to a draw element */
+ unsigned fetches[MAP_SIZE];
+ ushort draws[MAP_SIZE];
+ boolean has_max_fetch;
+
+ ushort num_fetch_elts;
+ ushort num_draw_elts;
+ } cache;
+};
+
+
+static void
+vsplit_clear_cache(struct vsplit_frontend *vsplit)
+{
+ memset(vsplit->cache.fetches, 0xff, sizeof(vsplit->cache.fetches));
+ vsplit->cache.has_max_fetch = FALSE;
+ vsplit->cache.num_fetch_elts = 0;
+ vsplit->cache.num_draw_elts = 0;
+}
+
+static void
+vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
+{
+ vsplit->middle->run(vsplit->middle,
+ vsplit->fetch_elts, vsplit->cache.num_fetch_elts,
+ vsplit->draw_elts, vsplit->cache.num_draw_elts, flags);
+}
+
+/**
+ * Add a fetch element and add it to the draw elements.
+ */
+static INLINE void
+vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ unsigned hash = fetch % MAP_SIZE;
+
+ if (vsplit->cache.fetches[hash] != fetch) {
+ /* update cache */
+ vsplit->cache.fetches[hash] = fetch;
+ vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts;
+
+ /* add fetch */
+ assert(vsplit->cache.num_fetch_elts < vsplit->segment_size);
+ vsplit->fetch_elts[vsplit->cache.num_fetch_elts++] = fetch;
+ }
+
+ vsplit->draw_elts[vsplit->cache.num_draw_elts++] = vsplit->cache.draws[hash];
+}
+
+
+/**
+ * Add a fetch element and add it to the draw elements. The fetch element is
+ * in full range (uint).
+ */
+static INLINE void
+vsplit_add_cache_uint(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ /* special care for 0xffffffff */
+ if (fetch == 0xffffffff && !vsplit->cache.has_max_fetch) {
+ unsigned hash = fetch % MAP_SIZE;
+ vsplit->cache.fetches[hash] = fetch - 1; /* force update */
+ vsplit->cache.has_max_fetch = TRUE;
+ }
+
+ vsplit_add_cache(vsplit, fetch);
+}
+
+
+#define FUNC vsplit_run_linear
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ubyte
+#define ELT_TYPE ubyte
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ushort
+#define ELT_TYPE ushort
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_uint
+#define ELT_TYPE uint
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache_uint(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+
+static void vsplit_prepare(struct draw_pt_front_end *frontend,
+ unsigned in_prim,
+ struct draw_pt_middle_end *middle,
+ unsigned opt)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+
+ switch (vsplit->draw->pt.user.eltSize) {
+ case 0:
+ vsplit->base.run = vsplit_run_linear;
+ break;
+ case 1:
+ vsplit->base.run = vsplit_run_ubyte;
+ break;
+ case 2:
+ vsplit->base.run = vsplit_run_ushort;
+ break;
+ case 4:
+ vsplit->base.run = vsplit_run_uint;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* split only */
+ vsplit->prim = in_prim;
+
+ vsplit->middle = middle;
+ middle->prepare(middle, vsplit->prim, opt, &vsplit->max_vertices);
+
+ vsplit->segment_size = MIN2(SEGMENT_SIZE, vsplit->max_vertices);
+}
+
+
+static void vsplit_finish(struct draw_pt_front_end *frontend)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+ vsplit->middle->finish(vsplit->middle);
+ vsplit->middle = NULL;
+}
+
+
+static void vsplit_destroy(struct draw_pt_front_end *frontend)
+{
+ FREE(frontend);
+}
+
+
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw)
+{
+ struct vsplit_frontend *vsplit = CALLOC_STRUCT(vsplit_frontend);
+ ushort i;
+
+ if (!vsplit)
+ return NULL;
+
+ vsplit->base.prepare = vsplit_prepare;
+ vsplit->base.run = NULL;
+ vsplit->base.finish = vsplit_finish;
+ vsplit->base.destroy = vsplit_destroy;
+ vsplit->draw = draw;
+
+ for (i = 0; i < SEGMENT_SIZE; i++)
+ vsplit->identity_draw_elts[i] = i;
+
+ return &vsplit->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
new file mode 100644
index 00000000000..3f66f962e11
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -0,0 +1,309 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#define CONCAT2(name, elt_type) name ## elt_type
+#define CONCAT(name, elt_type) CONCAT2(name, elt_type)
+
+#ifdef ELT_TYPE
+
+/**
+ * Fetch all elements in [min_index, max_index] with bias, and use the
+ * (rebased) index buffer as the draw elements.
+ */
+static boolean
+CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned istart, unsigned icount)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *)
+ ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
+ const unsigned min_index = draw->pt.user.min_index;
+ const unsigned max_index = draw->pt.user.max_index;
+ const int elt_bias = draw->pt.user.eltBias;
+ unsigned fetch_start, fetch_count;
+ const ushort *draw_elts = NULL;
+ unsigned i;
+
+ /* use the ib directly */
+ if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
+ if (icount > vsplit->max_vertices)
+ return FALSE;
+
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+ assert(idx >= min_index && idx <= max_index);
+ }
+ draw_elts = (const ushort *) ib;
+ }
+ else {
+ /* have to go through vsplit->draw_elts */
+ if (icount > vsplit->segment_size)
+ return FALSE;
+ }
+
+ /* this is faster only when we fetch less elements than the normal path */
+ if (max_index - min_index > icount - 1)
+ return FALSE;
+
+ if (elt_bias < 0 && min_index < -elt_bias)
+ return FALSE;
+
+ /* why this check? */
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ if (draw->pt.vertex_element[i].instance_divisor)
+ return FALSE;
+ }
+
+ fetch_start = min_index + elt_bias;
+ fetch_count = max_index - min_index + 1;
+
+ if (!draw_elts) {
+ if (min_index == 0) {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) idx;
+ }
+ }
+ else {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) (idx - min_index);
+ }
+ }
+
+ draw_elts = vsplit->draw_elts;
+ }
+
+ return vsplit->middle->run_linear_elts(vsplit->middle,
+ fetch_start, fetch_count,
+ draw_elts, icount, 0x0);
+}
+
+/**
+ * Use the cache to prepare the fetch and draw elements, and flush.
+ *
+ * When spoken is TRUE, ispoken replaces istart; When close is TRUE, iclose is
+ * appended.
+ */
+static INLINE void
+CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart, unsigned icount,
+ boolean spoken, unsigned ispoken,
+ boolean close, unsigned iclose)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *)
+ ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
+ const int ibias = draw->pt.user.eltBias;
+ unsigned i;
+
+ assert(icount + !!close <= vsplit->segment_size);
+
+ vsplit_clear_cache(vsplit);
+
+ spoken = !!spoken;
+ if (ibias == 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, ib[ispoken]);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, ib[istart + i]);
+
+ if (close)
+ ADD_CACHE(vsplit, ib[iclose]);
+ }
+ else if (ibias > 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, (uint) ib[ispoken] + ibias);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, (uint) ib[istart + i] + ibias);
+
+ if (close)
+ ADD_CACHE(vsplit, (uint) ib[iclose] + ibias);
+ }
+ else {
+ if (spoken) {
+ if (ib[ispoken] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[ispoken] + ibias);
+ }
+
+ for (i = spoken; i < icount; i++) {
+ if (ib[istart + i] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[istart + i] + ibias);
+ }
+
+ if (close) {
+ if (ib[iclose] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[iclose] + ibias);
+ }
+ }
+
+ vsplit_flush_cache(vsplit, flags);
+}
+
+static void
+CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount)
+{
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, FALSE, 0);
+}
+
+static void
+CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, close_loop, i0);
+}
+
+static void
+CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, use_spoken, i0, FALSE, 0);
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->segment_size; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) \
+ CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount)
+
+#else /* ELT_TYPE */
+
+static void
+vsplit_segment_simple_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount)
+{
+ assert(icount <= vsplit->max_vertices);
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+}
+
+static void
+vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean close_loop = (flags == DRAW_SPLIT_BEFORE);
+ unsigned nr;
+
+ assert(icount + !!close_loop <= vsplit->segment_size);
+
+ if (close_loop) {
+ for (nr = 0; nr < icount; nr++)
+ vsplit->fetch_elts[nr] = istart + nr;
+ vsplit->fetch_elts[nr++] = i0;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+static void
+vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0);
+ unsigned nr = 0, i;
+
+ assert(icount + !!use_spoken <= vsplit->segment_size);
+
+ if (use_spoken) {
+ vsplit->fetch_elts[nr++] = i0;
+ for (i = 1 ; i < icount; i++)
+ vsplit->fetch_elts[nr++] = istart + i;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->max_vertices; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) FALSE
+
+#define ELT_TYPE linear
+
+#endif /* ELT_TYPE */
+
+#define FUNC_VARS \
+ struct draw_pt_front_end *frontend, \
+ unsigned start, \
+ unsigned count
+
+#define SEGMENT_SIMPLE(flags, istart, icount) \
+ CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount)
+
+#define SEGMENT_LOOP(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#define SEGMENT_FAN(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#include "draw_split_tmp.h"
+
+#undef CONCAT2
+#undef CONCAT
+
+#undef ELT_TYPE
+#undef ADD_CACHE
diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
new file mode 100644
index 00000000000..7fafde9d5e6
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
@@ -0,0 +1,31 @@
+#define FUNC_VARS \
+ struct pt_so_emit *so, \
+ const struct draw_prim_info *input_prims, \
+ const struct draw_vertex_info *input_verts, \
+ unsigned start, \
+ unsigned count
+
+#define FUNC_ENTER \
+ /* declare more local vars */ \
+ const unsigned prim = input_prims->prim; \
+ const unsigned prim_flags = input_prims->flags; \
+ const boolean last_vertex_last = TRUE; \
+ do { \
+ debug_assert(input_prims->primitive_count == 1); \
+ switch (prim) { \
+ case PIPE_PRIM_LINES_ADJACENCY: \
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY: \
+ case PIPE_PRIM_TRIANGLES_ADJACENCY: \
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: \
+ debug_assert(!"unexpected primitive type in stream output"); \
+ return; \
+ default: \
+ break; \
+ } \
+ } while (0) \
+
+#define POINT(i0) so_point(so,i0)
+#define LINE(flags,i0,i1) so_line(so,i0,i1)
+#define TRIANGLE(flags,i0,i1,i2) so_tri(so,i0,i1,i2)
+
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_split_tmp.h b/src/gallium/auxiliary/draw/draw_split_tmp.h
new file mode 100644
index 00000000000..47defc62b96
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_split_tmp.h
@@ -0,0 +1,176 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+static void
+FUNC(FUNC_VARS)
+{
+ unsigned first, incr;
+ LOCAL_VARS
+
+ /*
+ * prim, start, count, and max_count_{simple,loop,fan} should have been
+ * defined
+ */
+ if (0) {
+ debug_printf("%s: prim 0x%x, start %d, count %d, max_count_simple %d, "
+ "max_count_loop %d, max_count_fan %d\n",
+ __FUNCTION__, prim, start, count, max_count_simple,
+ max_count_loop, max_count_fan);
+ }
+
+ draw_pt_split_prim(prim, &first, &incr);
+ /* sanitize primitive length */
+ count = draw_pt_trim_count(count, first, incr);
+ if (count < first)
+ return;
+
+ /* try flushing the entire primitive */
+ if (PRIMITIVE(start, count))
+ return;
+
+ /* must be able to at least flush two complete primitives */
+ assert(max_count_simple >= first + incr &&
+ max_count_loop >= first + incr &&
+ max_count_fan >= first + incr);
+
+ /* no splitting required */
+ if (count <= max_count_simple) {
+ SEGMENT_SIMPLE(0x0, start, count);
+ }
+ else {
+ const unsigned rollback = first - incr;
+ unsigned flags = DRAW_SPLIT_AFTER, seg_start = 0, seg_max;
+
+ /*
+ * Both count and seg_max below are explicitly trimmed. Because
+ *
+ * seg_start = N * (seg_max - rollback) = N' * incr,
+ *
+ * we have
+ *
+ * remaining = count - seg_start = first + N'' * incr.
+ *
+ * That is, remaining is implicitly trimmed.
+ */
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_simple, count), first, incr);
+ if (prim == PIPE_PRIM_TRIANGLE_STRIP ||
+ prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) {
+ /* make sure we flush even number of triangles at a time */
+ if (seg_max < count && !(((seg_max - first) / incr) & 1))
+ seg_max -= incr;
+ }
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_SIMPLE(flags, start + seg_start, seg_max);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_SIMPLE(flags, start + seg_start, remaining);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_loop, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_LOOP(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_LOOP(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_fan, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_FAN(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_FAN(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+#undef FUNC
+#undef FUNC_VARS
+#undef LOCAL_VARS
+
+#undef PRIMITIVE
+#undef SEGMENT_SIMPLE
+#undef SEGMENT_LOOP
+#undef SEGMENT_FAN
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
index cccd3bf4358..e32803c0720 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -98,14 +98,14 @@ struct vbuf_render {
boolean (*set_primitive)( struct vbuf_render *, unsigned prim );
/**
- * DrawElements, note indices are ushort. The driver must complete
- * this call, if necessary splitting the index list itself.
+ * Draw indexed primitives. Note that indices are ushort. The driver
+ * must complete this call, if necessary splitting the index list itself.
*/
- void (*draw)( struct vbuf_render *,
- const ushort *indices,
- uint nr_indices );
+ void (*draw_elements)( struct vbuf_render *,
+ const ushort *indices,
+ uint nr_indices );
- /* Draw Arrays path too.
+ /* Draw non-indexed primitives.
*/
void (*draw_arrays)( struct vbuf_render *,
unsigned start,
@@ -117,6 +117,14 @@ struct vbuf_render {
void (*release_vertices)( struct vbuf_render * );
void (*destroy)( struct vbuf_render * );
+
+
+ /**
+ * Called after writing data to the stream out buffers
+ */
+ void (*set_stream_output_info)( struct vbuf_render *vbufr,
+ unsigned primitive_count,
+ unsigned vertices_count );
};
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index 3af31ffe126..e63cf5f4f98 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -166,7 +166,7 @@ static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit
}
}
-static INLINE enum attrib_emit draw_translate_vinfo_size(enum attrib_emit emit)
+static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit)
{
switch (emit) {
case EMIT_OMIT:
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index c2832eefa2a..fb665b08fff 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -48,18 +48,30 @@
DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE)
+
+/**
+ * Set a vertex shader constant buffer.
+ * \param slot which constant buffer in [0, PIPE_MAX_CONSTANT_BUFFERS-1]
+ * \param constants the mapped buffer
+ * \param size size of buffer in bytes
+ */
void
draw_vs_set_constants(struct draw_context *draw,
unsigned slot,
const void *constants,
unsigned size)
{
- if (((uintptr_t)constants) & 0xf) {
+ const int alignment = 16;
+
+ /* check if buffer is 16-byte aligned */
+ if (((uintptr_t)constants) & (alignment - 1)) {
+ /* if not, copy the constants into a new, 16-byte aligned buffer */
if (size > draw->vs.const_storage_size[slot]) {
if (draw->vs.aligned_constant_storage[slot]) {
align_free((void *)draw->vs.aligned_constant_storage[slot]);
}
- draw->vs.aligned_constant_storage[slot] = align_malloc(size, 16);
+ draw->vs.aligned_constant_storage[slot] =
+ align_malloc(size, alignment);
}
assert(constants);
memcpy((void *)draw->vs.aligned_constant_storage[slot],
@@ -85,18 +97,27 @@ struct draw_vertex_shader *
draw_create_vertex_shader(struct draw_context *draw,
const struct pipe_shader_state *shader)
{
- struct draw_vertex_shader *vs;
+ struct draw_vertex_shader *vs = NULL;
if (draw->dump_vs) {
tgsi_dump(shader->tokens, 0);
}
- vs = draw_create_vs_sse( draw, shader );
- if (!vs) {
+ if (!draw->pt.middle.llvm) {
+#if defined(PIPE_ARCH_X86)
+ vs = draw_create_vs_sse( draw, shader );
+#elif defined(PIPE_ARCH_PPC)
vs = draw_create_vs_ppc( draw, shader );
- if (!vs) {
- vs = draw_create_vs_exec( draw, shader );
- }
+#endif
+ }
+#if HAVE_LLVM
+ else {
+ vs = draw_create_vs_llvm(draw, shader);
+ }
+#endif
+
+ if (!vs) {
+ vs = draw_create_vs_exec( draw, shader );
}
if (vs)
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 6c7e94db433..f9a038788fb 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -133,7 +133,8 @@ struct draw_vertex_shader {
void (*run_linear)( struct draw_vertex_shader *shader,
const float (*input)[4],
float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride );
@@ -165,7 +166,6 @@ draw_create_vs_ppc(struct draw_context *draw,
const struct pipe_shader_state *templ);
-
struct draw_vs_varient_key;
struct draw_vertex_shader;
@@ -173,6 +173,11 @@ struct draw_vs_varient *
draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key );
+#if HAVE_LLVM
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *state);
+#endif
/********************************************************************************
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 1911242f825..68e8295b5e1 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -32,6 +32,8 @@
#define DRAW_VS_AOS_H
#include "pipe/p_config.h"
+#include "tgsi/tgsi_exec.h"
+#include "draw_vs.h"
#ifdef PIPE_ARCH_X86
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index bc34d390dae..dab3eb1ca8e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -85,7 +85,8 @@ static void
vs_exec_run_linear( struct draw_vertex_shader *shader,
const float (*input)[4],
float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride )
@@ -95,9 +96,8 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
unsigned int i, j;
unsigned slot;
- for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- machine->Consts[i] = constants[i];
- }
+ tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
+ constants, const_size);
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
new file mode 100644
index 00000000000..fa9992db783
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -0,0 +1,127 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_screen.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+#include "draw_vs.h"
+#include "draw_llvm.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+
+static void
+vs_llvm_prepare(struct draw_vertex_shader *shader,
+ struct draw_context *draw)
+{
+ /*struct llvm_vertex_shader *evs = llvm_vertex_shader(shader);*/
+}
+
+static void
+vs_llvm_run_linear( struct draw_vertex_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ /* we should never get here since the entire pipeline is
+ * generated in draw_pt_fetch_shade_pipeline_llvm.c */
+ debug_assert(0);
+}
+
+
+static void
+vs_llvm_delete( struct draw_vertex_shader *dvs )
+{
+ struct llvm_vertex_shader *shader = llvm_vertex_shader(dvs);
+ struct pipe_fence_handle *fence = NULL;
+ struct draw_llvm_variant_list_item *li;
+ struct pipe_context *pipe = dvs->draw->pipe;
+
+ /*
+ * XXX: This might be not neccessary at all.
+ */
+ pipe->flush(pipe, 0, &fence);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+
+
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ struct draw_llvm_variant_list_item *next = next_elem(li);
+ draw_llvm_destroy_variant(li->base);
+ li = next;
+ }
+
+ assert(shader->variants_cached == 0);
+ FREE((void*) dvs->state.tokens);
+ FREE( dvs );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *state)
+{
+ struct llvm_vertex_shader *vs = CALLOC_STRUCT( llvm_vertex_shader );
+
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(state->tokens);
+ if (!vs->base.state.tokens) {
+ FREE(vs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(state->tokens, &vs->base.info);
+
+ vs->variant_key_size =
+ draw_llvm_variant_key_size(
+ vs->base.info.file_max[TGSI_FILE_INPUT]+1,
+ vs->base.info.file_max[TGSI_FILE_SAMPLER]+1);
+
+ vs->base.draw = draw;
+ vs->base.prepare = vs_llvm_prepare;
+ vs->base.run_linear = vs_llvm_run_linear;
+ vs->base.delete = vs_llvm_delete;
+ vs->base.create_varient = draw_vs_create_varient_generic;
+
+ make_empty_list(&vs->variants);
+
+ return &vs->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index d869eecec5e..5df84916c51 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
*/
shader->func(inputs_soa, outputs_soa, temps_soa,
(float (*)[4]) shader->base.immediates,
- (const float (*)[4])constants[0],
+ (float (*)[4])constants[0],
ppc_builtin_constants);
/* convert (up to) four output verts from SoA back to AoS format */
@@ -190,7 +190,7 @@ draw_create_vs_ppc(struct draw_context *draw,
vs->base.create_varient = draw_vs_varient_aos_ppc;
else
#endif
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->base.prepare = vs_ppc_prepare;
vs->base.run_linear = vs_ppc_run_linear;
vs->base.delete = vs_ppc_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 14c95082a9d..0b0c6077c6f 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -84,6 +84,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
const float (*input)[4],
float (*output)[4],
const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride )
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 6eb26927f27..eacd1601877 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -149,7 +149,8 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
temp_buffer,
temp_buffer,
- vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants_size,
count,
temp_vertex_stride,
temp_vertex_stride);
@@ -214,7 +215,8 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
temp_buffer,
temp_buffer,
- vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants_size,
count,
temp_vertex_stride,
temp_vertex_stride);
diff --git a/src/gallium/auxiliary/gallivm/f.cpp b/src/gallium/auxiliary/gallivm/f.cpp
new file mode 100644
index 00000000000..5eb09c01ab3
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/f.cpp
@@ -0,0 +1,85 @@
+/**************************************************************************
+ *
+ * (C) Copyright VMware, Inc 2010.
+ * (C) Copyright John Maddock 2006.
+ * Use, modification and distribution are subject to the
+ * Boost Software License, Version 1.0. (See accompanying file
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ *
+ **************************************************************************/
+
+
+/*
+ * This file allows to compute the minimax polynomial coefficients we use
+ * for fast exp2/log2.
+ *
+ * How to use this source:
+ *
+ * - Download and abuild the NTL library from
+ * http://shoup.net/ntl/download.html
+ *
+ * - Download boost source code matching to your distro.
+ *
+ * - Goto libs/math/minimax and replace f.cpp with this file.
+ *
+ * - Build as
+ *
+ * g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a -lboost_math_tr1
+ *
+ * - Run as
+ *
+ * ./minimax
+ *
+ * - For example, to compute exp2 5th order polynomial between [0, 1] do:
+ *
+ * variant 1
+ * range 0 1
+ * order 5 0
+ * steps 200
+ * info
+ *
+ * - For more info see
+ * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
+ */
+
+#define L22
+#include <boost/math/bindings/rr.hpp>
+#include <boost/math/tools/polynomial.hpp>
+
+#include <cmath>
+
+
+boost::math::ntl::RR f(const boost::math::ntl::RR& x, int variant)
+{
+ static const boost::math::ntl::RR tiny = boost::math::tools::min_value<boost::math::ntl::RR>() * 64;
+
+ switch(variant)
+ {
+ case 0:
+ // log2(x)/(x - 1)
+ return log(x)/log(2.0)/(x - 1.0);
+
+ case 1:
+ // exp2(x)
+ return exp(x*log(2.0));
+ }
+
+ return 0;
+}
+
+
+void show_extra(
+ const boost::math::tools::polynomial<boost::math::ntl::RR>& n,
+ const boost::math::tools::polynomial<boost::math::ntl::RR>& d,
+ const boost::math::ntl::RR& x_offset,
+ const boost::math::ntl::RR& y_offset,
+ int variant)
+{
+ switch(variant)
+ {
+ default:
+ // do nothing here...
+ ;
+ }
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld.h b/src/gallium/auxiliary/gallivm/lp_bld.h
index 2fa682f4009..8103bc917fc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld.h
@@ -35,6 +35,17 @@
#define LP_BLD_H
+/**
+ * @file
+ * LLVM IR building helpers interfaces.
+ *
+ * We use LLVM-C bindings for now. They are not documented, but follow the C++
+ * interfaces very closely, and appear to be complete enough for code
+ * genration. See
+ * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
+ * for a standalone example.
+ */
+
#include <llvm-c/Core.h>
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 20ae958714b..64c468c14d4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -60,6 +60,11 @@
#include "lp_bld_arit.h"
+#define EXP_POLY_DEGREE 3
+
+#define LOG_POLY_DEGREE 5
+
+
/**
* Generate min(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
@@ -73,6 +78,9 @@ lp_build_min_simple(struct lp_build_context *bld,
const char *intrinsic = NULL;
LLVMValueRef cond;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
/* TODO: optimize the constant case */
if(type.width * type.length == 128) {
@@ -119,6 +127,9 @@ lp_build_max_simple(struct lp_build_context *bld,
const char *intrinsic = NULL;
LLVMValueRef cond;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
/* TODO: optimize the constant case */
if(type.width * type.length == 128) {
@@ -161,6 +172,8 @@ lp_build_comp(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
if(a == bld->one)
return bld->zero;
if(a == bld->zero)
@@ -174,9 +187,15 @@ lp_build_comp(struct lp_build_context *bld,
}
if(LLVMIsConstant(a))
- return LLVMConstSub(bld->one, a);
+ if (type.floating)
+ return LLVMConstFSub(bld->one, a);
+ else
+ return LLVMConstSub(bld->one, a);
else
- return LLVMBuildSub(bld->builder, bld->one, a, "");
+ if (type.floating)
+ return LLVMBuildFSub(bld->builder, bld->one, a, "");
+ else
+ return LLVMBuildSub(bld->builder, bld->one, a, "");
}
@@ -191,6 +210,9 @@ lp_build_add(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return b;
if(b == bld->zero)
@@ -218,9 +240,15 @@ lp_build_add(struct lp_build_context *bld,
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
- res = LLVMConstAdd(a, b);
+ if (type.floating)
+ res = LLVMConstFAdd(a, b);
+ else
+ res = LLVMConstAdd(a, b);
else
- res = LLVMBuildAdd(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFAdd(bld->builder, a, b, "");
+ else
+ res = LLVMBuildAdd(bld->builder, a, b, "");
/* clamp to ceiling of 1.0 */
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
@@ -232,20 +260,20 @@ lp_build_add(struct lp_build_context *bld,
}
-/** Return the sum of the elements of a */
+/** Return the scalar sum of the elements of a */
LLVMValueRef
lp_build_sum_vector(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
LLVMValueRef index, res;
- int i;
+ unsigned i;
- if (a == bld->zero)
- return bld->zero;
- if (a == bld->undef)
- return bld->undef;
- assert(type.length > 1);
+ assert(lp_check_value(type, a));
+
+ if (type.length == 1) {
+ return a;
+ }
assert(!bld->type.norm);
@@ -254,9 +282,16 @@ lp_build_sum_vector(struct lp_build_context *bld,
for (i = 1; i < type.length; i++) {
index = LLVMConstInt(LLVMInt32Type(), i, 0);
- res = LLVMBuildAdd(bld->builder, res,
- LLVMBuildExtractElement(bld->builder, a, index, ""),
- "");
+ if (type.floating)
+ res = LLVMBuildFAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder,
+ a, index, ""),
+ "");
+ else
+ res = LLVMBuildAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder,
+ a, index, ""),
+ "");
}
return res;
@@ -274,6 +309,9 @@ lp_build_sub(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(b == bld->zero)
return a;
if(a == bld->undef || b == bld->undef)
@@ -301,9 +339,15 @@ lp_build_sub(struct lp_build_context *bld,
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
- res = LLVMConstSub(a, b);
+ if (type.floating)
+ res = LLVMConstFSub(a, b);
+ else
+ res = LLVMConstSub(a, b);
else
- res = LLVMBuildSub(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFSub(bld->builder, a, b, "");
+ else
+ res = LLVMBuildSub(bld->builder, a, b, "");
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
res = lp_build_max_simple(bld, res, bld->zero);
@@ -361,6 +405,10 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
LLVMValueRef c8;
LLVMValueRef ab;
+ assert(!i16_type.floating);
+ assert(lp_check_value(i16_type, a));
+ assert(lp_check_value(i16_type, b));
+
c8 = lp_build_const_int_vec(i16_type, 8);
#if 0
@@ -396,6 +444,9 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef shift;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return bld->zero;
if(a == bld->one)
@@ -434,7 +485,10 @@ lp_build_mul(struct lp_build_context *bld,
shift = NULL;
if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
- res = LLVMConstMul(a, b);
+ if (type.floating)
+ res = LLVMConstFMul(a, b);
+ else
+ res = LLVMConstMul(a, b);
if(shift) {
if(type.sign)
res = LLVMConstAShr(res, shift);
@@ -443,7 +497,10 @@ lp_build_mul(struct lp_build_context *bld,
}
}
else {
- res = LLVMBuildMul(bld->builder, a, b, "");
+ if (type.floating)
+ res = LLVMBuildFMul(bld->builder, a, b, "");
+ else
+ res = LLVMBuildMul(bld->builder, a, b, "");
if(shift) {
if(type.sign)
res = LLVMBuildAShr(bld->builder, res, shift, "");
@@ -466,6 +523,8 @@ lp_build_mul_imm(struct lp_build_context *bld,
{
LLVMValueRef factor;
+ assert(lp_check_value(bld->type, a));
+
if(b == 0)
return bld->zero;
@@ -473,12 +532,12 @@ lp_build_mul_imm(struct lp_build_context *bld,
return a;
if(b == -1)
- return LLVMBuildNeg(bld->builder, a, "");
+ return lp_build_negate(bld, a);
if(b == 2 && bld->type.floating)
return lp_build_add(bld, a, a);
- if(util_is_pot(b)) {
+ if(util_is_power_of_two(b)) {
unsigned shift = ffs(b) - 1;
if(bld->type.floating) {
@@ -519,6 +578,9 @@ lp_build_div(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == bld->zero)
return bld->zero;
if(a == bld->one)
@@ -530,44 +592,125 @@ lp_build_div(struct lp_build_context *bld,
if(a == bld->undef || b == bld->undef)
return bld->undef;
- if(LLVMIsConstant(a) && LLVMIsConstant(b))
- return LLVMConstFDiv(a, b);
+ if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+ if (type.floating)
+ return LLVMConstFDiv(a, b);
+ else if (type.sign)
+ return LLVMConstSDiv(a, b);
+ else
+ return LLVMConstUDiv(a, b);
+ }
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
return lp_build_mul(bld, a, lp_build_rcp(bld, b));
- return LLVMBuildFDiv(bld->builder, a, b, "");
+ if (type.floating)
+ return LLVMBuildFDiv(bld->builder, a, b, "");
+ else if (type.sign)
+ return LLVMBuildSDiv(bld->builder, a, b, "");
+ else
+ return LLVMBuildUDiv(bld->builder, a, b, "");
}
/**
- * Linear interpolation.
- *
- * This also works for integer values with a few caveats.
+ * Linear interpolation -- without any checks.
*
* @sa http://www.stereopsis.com/doubleblend.html
*/
-LLVMValueRef
-lp_build_lerp(struct lp_build_context *bld,
- LLVMValueRef x,
- LLVMValueRef v0,
- LLVMValueRef v1)
+static INLINE LLVMValueRef
+lp_build_lerp_simple(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1)
{
LLVMValueRef delta;
LLVMValueRef res;
+ assert(lp_check_value(bld->type, x));
+ assert(lp_check_value(bld->type, v0));
+ assert(lp_check_value(bld->type, v1));
+
delta = lp_build_sub(bld, v1, v0);
res = lp_build_mul(bld, x, delta);
res = lp_build_add(bld, v0, res);
- if(bld->type.fixed)
+ if (bld->type.fixed) {
/* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
* but it will be wrong for other uses. Basically we need a more
* powerful lp_type, capable of further distinguishing the values
* interpretation from the value storage. */
res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Linear interpolation.
+ */
+LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(lp_check_value(type, x));
+ assert(lp_check_value(type, v0));
+ assert(lp_check_value(type, v1));
+
+ if (type.norm) {
+ struct lp_type wide_type;
+ struct lp_build_context wide_bld;
+ LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh;
+ LLVMValueRef shift;
+
+ assert(type.length >= 2);
+ assert(!type.sign);
+
+ /*
+ * Create a wider type, enough to hold the intermediate result of the
+ * multiplication.
+ */
+ memset(&wide_type, 0, sizeof wide_type);
+ wide_type.fixed = TRUE;
+ wide_type.width = type.width*2;
+ wide_type.length = type.length/2;
+
+ lp_build_context_init(&wide_bld, bld->builder, wide_type);
+
+ lp_build_unpack2(bld->builder, type, wide_type, x, &xl, &xh);
+ lp_build_unpack2(bld->builder, type, wide_type, v0, &v0l, &v0h);
+ lp_build_unpack2(bld->builder, type, wide_type, v1, &v1l, &v1h);
+
+ /*
+ * Scale x from [0, 255] to [0, 256]
+ */
+
+ shift = lp_build_const_int_vec(wide_type, type.width - 1);
+
+ xl = lp_build_add(&wide_bld, xl,
+ LLVMBuildAShr(bld->builder, xl, shift, ""));
+ xh = lp_build_add(&wide_bld, xh,
+ LLVMBuildAShr(bld->builder, xh, shift, ""));
+
+ /*
+ * Lerp both halves.
+ */
+
+ resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l);
+ resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h);
+
+ res = lp_build_pack2(bld->builder, wide_type, type, resl, resh);
+ } else {
+ res = lp_build_lerp_simple(bld, x, v0, v1);
+ }
return res;
}
@@ -597,6 +740,9 @@ lp_build_min(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -625,6 +771,9 @@ lp_build_max(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -654,6 +803,10 @@ lp_build_clamp(struct lp_build_context *bld,
LLVMValueRef min,
LLVMValueRef max)
{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, min));
+ assert(lp_check_value(bld->type, max));
+
a = lp_build_min(bld, a, max);
a = lp_build_max(bld, a, min);
return a;
@@ -670,31 +823,20 @@ lp_build_abs(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
+ assert(lp_check_value(type, a));
+
if(!type.sign)
return a;
if(type.floating) {
/* Mask out the sign bit */
- if (type.length == 1) {
- LLVMTypeRef int_type = LLVMIntType(type.width);
- LLVMTypeRef float_type = LLVMFloatType();
- unsigned long long absMask = ~(1ULL << (type.width - 1));
- LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0);
- a = LLVMBuildBitCast(bld->builder, a, int_type, "");
- a = LLVMBuildAnd(bld->builder, a, mask, "");
- a = LLVMBuildBitCast(bld->builder, a, float_type, "");
- return a;
- }
- else {
- /* vector of floats */
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- unsigned long long absMask = ~(1ULL << (type.width - 1));
- LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask));
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- a = LLVMBuildAnd(bld->builder, a, mask, "");
- a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
- return a;
- }
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ unsigned long long absMask = ~(1ULL << (type.width - 1));
+ LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask));
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+ return a;
}
if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -716,7 +858,16 @@ LLVMValueRef
lp_build_negate(struct lp_build_context *bld,
LLVMValueRef a)
{
- return LLVMBuildNeg(bld->builder, a, "");
+ assert(lp_check_value(bld->type, a));
+
+#if HAVE_LLVM >= 0x0207
+ if (bld->type.floating)
+ a = LLVMBuildFNeg(bld->builder, a, "");
+ else
+#endif
+ a = LLVMBuildNeg(bld->builder, a, "");
+
+ return a;
}
@@ -729,6 +880,8 @@ lp_build_sgn(struct lp_build_context *bld,
LLVMValueRef cond;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+
/* Handle non-zero case */
if(!type.sign) {
/* if not zero then sign must be positive */
@@ -742,17 +895,9 @@ lp_build_sgn(struct lp_build_context *bld,
LLVMValueRef one;
unsigned long long maskBit = (unsigned long long)1 << (type.width - 1);
- if (type.length == 1) {
- int_type = lp_build_int_elem_type(type);
- vec_type = lp_build_elem_type(type);
- mask = LLVMConstInt(int_type, maskBit, 0);
- }
- else {
- /* vector */
- int_type = lp_build_int_vec_type(type);
- vec_type = lp_build_vec_type(type);
- mask = lp_build_const_int_vec(type, maskBit);
- }
+ int_type = lp_build_int_vec_type(type);
+ vec_type = lp_build_vec_type(type);
+ mask = lp_build_const_int_vec(type, maskBit);
/* Take the sign bit and add it to 1 constant */
sign = LLVMBuildBitCast(bld->builder, a, int_type, "");
@@ -795,6 +940,7 @@ lp_build_set_sign(struct lp_build_context *bld,
LLVMValueRef val, res;
assert(type.floating);
+ assert(lp_check_value(type, a));
/* val = reinterpret_cast<int>(a) */
val = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
@@ -819,21 +965,11 @@ lp_build_int_to_float(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
assert(type.floating);
- /*assert(lp_check_value(type, a));*/
- if (type.length == 1) {
- LLVMTypeRef float_type = LLVMFloatType();
- return LLVMBuildSIToFP(bld->builder, a, float_type, "");
- }
- else {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
- LLVMValueRef res;
- res = LLVMBuildSIToFP(bld->builder, a, vec_type, "");
- return res;
- }
+ return LLVMBuildSIToFP(bld->builder, a, vec_type, "");
}
@@ -853,31 +989,75 @@ lp_build_round_sse41(struct lp_build_context *bld,
enum lp_build_round_sse41_mode mode)
{
const struct lp_type type = bld->type;
- LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMTypeRef i32t = LLVMInt32Type();
const char *intrinsic;
+ LLVMValueRef res;
assert(type.floating);
- assert(type.width*type.length == 128);
+
assert(lp_check_value(type, a));
assert(util_cpu_caps.has_sse4_1);
- switch(type.width) {
- case 32:
- intrinsic = "llvm.x86.sse41.round.ps";
- break;
- case 64:
- intrinsic = "llvm.x86.sse41.round.pd";
- break;
- default:
- assert(0);
- return bld->undef;
+ if (type.length == 1) {
+ LLVMTypeRef vec_type;
+ LLVMValueRef undef;
+ LLVMValueRef args[3];
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ss";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.sd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ vec_type = LLVMVectorType(bld->elem_type, 4);
+
+ undef = LLVMGetUndef(vec_type);
+
+ args[0] = undef;
+ args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");
+ args[2] = LLVMConstInt(i32t, mode, 0);
+
+ res = lp_build_intrinsic(bld->builder, intrinsic,
+ vec_type, args, Elements(args));
+
+ res = LLVMBuildExtractElement(bld->builder, res, index0, "");
+ }
+ else {
+ assert(type.width*type.length == 128);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ps";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.pd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ res = lp_build_intrinsic_binary(bld->builder, intrinsic,
+ bld->vec_type, a,
+ LLVMConstInt(i32t, mode, 0));
}
- return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
- LLVMConstInt(LLVMInt32Type(), mode, 0));
+ return res;
}
+/**
+ * Return the integer part of a float (vector) value. The returned value is
+ * a float (vector).
+ * Ex: trunc(-1.5) = 1.0
+ */
LLVMValueRef
lp_build_trunc(struct lp_build_context *bld,
LLVMValueRef a)
@@ -887,8 +1067,10 @@ lp_build_trunc(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if(util_cpu_caps.has_sse4_1)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
@@ -900,6 +1082,12 @@ lp_build_trunc(struct lp_build_context *bld,
}
+/**
+ * Return float (vector) rounded to nearest integer (vector). The returned
+ * value is a float (vector).
+ * Ex: round(0.9) = 1.0
+ * Ex: round(-1.5) = -2.0
+ */
LLVMValueRef
lp_build_round(struct lp_build_context *bld,
LLVMValueRef a)
@@ -909,8 +1097,10 @@ lp_build_round(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if(util_cpu_caps.has_sse4_1)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -921,6 +1111,11 @@ lp_build_round(struct lp_build_context *bld,
}
+/**
+ * Return floor of float (vector), result is a float (vector)
+ * Ex: floor(1.1) = 1.0
+ * Ex: floor(-1.1) = -2.0
+ */
LLVMValueRef
lp_build_floor(struct lp_build_context *bld,
LLVMValueRef a)
@@ -928,16 +1123,12 @@ lp_build_floor(struct lp_build_context *bld,
const struct lp_type type = bld->type;
assert(type.floating);
+ assert(lp_check_value(type, a));
- if (type.length == 1) {
- LLVMValueRef res;
- res = lp_build_ifloor(bld, a);
- res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), "");
- return res;
- }
-
- if(util_cpu_caps.has_sse4_1)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -948,6 +1139,11 @@ lp_build_floor(struct lp_build_context *bld,
}
+/**
+ * Return ceiling of float (vector), returning float (vector).
+ * Ex: ceil( 1.1) = 2.0
+ * Ex: ceil(-1.1) = -1.0
+ */
LLVMValueRef
lp_build_ceil(struct lp_build_context *bld,
LLVMValueRef a)
@@ -957,8 +1153,10 @@ lp_build_ceil(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if(util_cpu_caps.has_sse4_1)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -970,7 +1168,7 @@ lp_build_ceil(struct lp_build_context *bld,
/**
- * Return fractional part of 'a' computed as a - floor(f)
+ * Return fractional part of 'a' computed as a - floor(a)
* Typically used in texture coord arithmetic.
*/
LLVMValueRef
@@ -983,72 +1181,67 @@ lp_build_fract(struct lp_build_context *bld,
/**
- * Convert to integer, through whichever rounding method that's fastest,
- * typically truncating toward zero.
+ * Return the integer part of a float (vector) value. The returned value is
+ * an integer (vector).
+ * Ex: itrunc(-1.5) = 1
*/
LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
assert(type.floating);
+ assert(lp_check_value(type, a));
- if (type.length == 1) {
- LLVMTypeRef int_type = LLVMIntType(type.width);
- return LLVMBuildFPToSI(bld->builder, a, int_type, "");
- }
- else {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- assert(lp_check_value(type, a));
- return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
- }
+ return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
}
/**
- * Convert float[] to int[] with round().
+ * Return float (vector) rounded to nearest integer (vector). The returned
+ * value is an integer (vector).
+ * Ex: iround(0.9) = 1
+ * Ex: iround(-1.5) = -2
*/
LLVMValueRef
lp_build_iround(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
- if (type.length == 1) {
- /* scalar float to int */
- LLVMTypeRef int_type = LLVMIntType(type.width);
- /* XXX we want rounding here! */
- res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
- return res;
- }
-
assert(lp_check_value(type, a));
- if(util_cpu_caps.has_sse4_1) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
}
else {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
LLVMValueRef half;
- /* get sign bit */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
-
- /* sign * 0.5 */
half = lp_build_const_vec(type, 0.5);
- half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
- half = LLVMBuildOr(bld->builder, sign, half, "");
- half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
- res = LLVMBuildAdd(bld->builder, a, half, "");
+ if (type.sign) {
+ LLVMTypeRef vec_type = bld->vec_type;
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+
+ /* get sign bit */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+
+ /* sign * 0.5 */
+ half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
+ half = LLVMBuildOr(bld->builder, sign, half, "");
+ half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
+ }
+
+ res = LLVMBuildFAdd(bld->builder, a, half, "");
}
res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
@@ -1058,89 +1251,156 @@ lp_build_iround(struct lp_build_context *bld,
/**
- * Convert float[] to int[] with floor().
+ * Return floor of float (vector), result is an int (vector)
+ * Ex: ifloor(1.1) = 1.0
+ * Ex: ifloor(-1.1) = -2.0
*/
LLVMValueRef
lp_build_ifloor(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
-
- if (type.length == 1) {
- /* scalar float to int */
- LLVMTypeRef int_type = LLVMIntType(type.width);
- res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
- return res;
- }
-
assert(lp_check_value(type, a));
- if(util_cpu_caps.has_sse4_1) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
}
else {
- /* Take the sign bit and add it to 1 constant */
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- unsigned mantissa = lp_mantissa(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
- LLVMValueRef offset;
-
- /* sign = a < 0 ? ~0 : 0 */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "");
- lp_build_name(sign, "floor.sign");
-
- /* offset = -0.99999(9)f */
- offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
- offset = LLVMConstBitCast(offset, int_vec_type);
-
- /* offset = a < 0 ? -0.99999(9)f : 0.0f */
- offset = LLVMBuildAnd(bld->builder, offset, sign, "");
- offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
- lp_build_name(offset, "floor.offset");
-
- res = LLVMBuildAdd(bld->builder, a, offset, "");
- lp_build_name(res, "floor.res");
+ res = a;
+
+ if (type.sign) {
+ /* Take the sign bit and add it to 1 constant */
+ LLVMTypeRef vec_type = bld->vec_type;
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef offset;
+
+ /* sign = a < 0 ? ~0 : 0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");
+
+ /* offset = -0.99999(9)f */
+ offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
+ offset = LLVMConstBitCast(offset, int_vec_type);
+
+ /* offset = a < 0 ? offset : 0.0f */
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
+
+ res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res");
+ }
}
- res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
- lp_build_name(res, "floor");
+ /* round to nearest (toward zero) */
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "ifloor.res");
return res;
}
+/**
+ * Return ceiling of float (vector), returning int (vector).
+ * Ex: iceil( 1.1) = 2
+ * Ex: iceil(-1.1) = -1
+ */
LLVMValueRef
lp_build_iceil(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if(util_cpu_caps.has_sse4_1) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
}
else {
- assert(0);
- res = bld->undef;
+ LLVMTypeRef vec_type = bld->vec_type;
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef offset;
+
+ /* offset = 0.99999(9)f */
+ offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
+
+ if (type.sign) {
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+
+ /* sign = a < 0 ? 0 : ~0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
+ sign = LLVMBuildNot(bld->builder, sign, "iceil.not");
+
+ /* offset = a < 0 ? 0.0 : offset */
+ offset = LLVMConstBitCast(offset, int_vec_type);
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
+ }
+
+ res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
}
- res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
+ /* round to nearest (toward zero) */
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "iceil.res");
return res;
}
+/**
+ * Combined ifloor() & fract().
+ *
+ * Preferred to calling the functions separately, as it will ensure that the
+ * stratergy (floor() vs ifloor()) that results in less redundant work is used.
+ */
+void
+lp_build_ifloor_fract(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef *out_ipart,
+ LLVMValueRef *out_fpart)
+{
+
+
+ const struct lp_type type = bld->type;
+ LLVMValueRef ipart;
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
+ /*
+ * floor() is easier.
+ */
+
+ ipart = lp_build_floor(bld, a);
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart");
+ }
+ else {
+ /*
+ * ifloor() is easier.
+ */
+
+ *out_ipart = lp_build_ifloor(bld, a);
+ ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart");
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ }
+}
+
+
LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a)
@@ -1149,6 +1409,8 @@ lp_build_sqrt(struct lp_build_context *bld,
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
+ assert(lp_check_value(type, a));
+
/* TODO: optimize the constant case */
/* TODO: optimize the constant case */
@@ -1159,12 +1421,44 @@ lp_build_sqrt(struct lp_build_context *bld,
}
+/**
+ * Do one Newton-Raphson step to improve reciprocate precision:
+ *
+ * x_{i+1} = x_i * (2 - a * x_i)
+ *
+ * XXX: Unfortunately this won't give IEEE-754 conformant results for 0 or
+ * +/-Inf, giving NaN instead. Certain applications rely on this behavior,
+ * such as Google Earth, which does RCP(RSQRT(0.0) when drawing the Earth's
+ * halo. It would be necessary to clamp the argument to prevent this.
+ *
+ * See also:
+ * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rcp_refine(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef rcp_a)
+{
+ LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
+ LLVMValueRef res;
+
+ res = LLVMBuildFMul(bld->builder, a, rcp_a, "");
+ res = LLVMBuildFSub(bld->builder, two, res, "");
+ res = LLVMBuildFMul(bld->builder, rcp_a, res, "");
+
+ return res;
+}
+
+
LLVMValueRef
lp_build_rcp(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
if(a == bld->zero)
return bld->undef;
if(a == bld->one)
@@ -1177,15 +1471,65 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
- /* FIXME: improve precision */
- return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+ /*
+ * We don't use RCPPS because:
+ * - it only has 10bits of precision
+ * - it doesn't even get the reciprocate of 1.0 exactly
+ * - doing Newton-Rapshon steps yields wrong (NaN) values for 0.0 or Inf
+ * - for recent processors the benefit over DIVPS is marginal, a case
+ * depedent
+ *
+ * We could still use it on certain processors if benchmarks show that the
+ * RCPPS plus necessary workarounds are still preferrable to DIVPS; or for
+ * particular uses that require less workarounds.
+ */
+
+ if (FALSE && util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ const unsigned num_iterations = 0;
+ LLVMValueRef res;
+ unsigned i;
+
+ res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
+
+ for (i = 0; i < num_iterations; ++i) {
+ res = lp_build_rcp_refine(bld, a, res);
+ }
+
+ return res;
+ }
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
}
/**
+ * Do one Newton-Raphson step to improve rsqrt precision:
+ *
+ * x_{i+1} = 0.5 * x_i * (3.0 - a * x_i * x_i)
+ *
+ * See also:
+ * - http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static INLINE LLVMValueRef
+lp_build_rsqrt_refine(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef rsqrt_a)
+{
+ LLVMValueRef half = lp_build_const_vec(bld->type, 0.5);
+ LLVMValueRef three = lp_build_const_vec(bld->type, 3.0);
+ LLVMValueRef res;
+
+ res = LLVMBuildFMul(bld->builder, rsqrt_a, rsqrt_a, "");
+ res = LLVMBuildFMul(bld->builder, a, res, "");
+ res = LLVMBuildFSub(bld->builder, three, res, "");
+ res = LLVMBuildFMul(bld->builder, rsqrt_a, res, "");
+ res = LLVMBuildFMul(bld->builder, half, res, "");
+
+ return res;
+}
+
+
+/**
* Generate 1/sqrt(a)
*/
LLVMValueRef
@@ -1194,70 +1538,476 @@ lp_build_rsqrt(struct lp_build_context *bld,
{
const struct lp_type type = bld->type;
+ assert(lp_check_value(type, a));
+
assert(type.floating);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
- return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
+ if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ const unsigned num_iterations = 0;
+ LLVMValueRef res;
+ unsigned i;
+
+ res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
+
+ for (i = 0; i < num_iterations; ++i) {
+ res = lp_build_rsqrt_refine(bld, a, res);
+ }
+
+ return res;
+ }
return lp_build_rcp(bld, lp_build_sqrt(bld, a));
}
+static inline LLVMValueRef
+lp_build_const_v4si(unsigned long value)
+{
+ LLVMValueRef element = LLVMConstInt(LLVMInt32Type(), value, 0);
+ LLVMValueRef elements[4] = { element, element, element, element };
+ return LLVMConstVector(elements, 4);
+}
+
+static inline LLVMValueRef
+lp_build_const_v4sf(float value)
+{
+ LLVMValueRef element = LLVMConstReal(LLVMFloatType(), value);
+ LLVMValueRef elements[4] = { element, element, element, element };
+ return LLVMConstVector(elements, 4);
+}
+
+
/**
- * Generate cos(a)
+ * Generate sin(a) using SSE2
*/
LLVMValueRef
-lp_build_cos(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_sin(struct lp_build_context *bld,
+ LLVMValueRef a)
{
-#ifdef PIPE_OS_WINDOWS
+ struct lp_type int_type = lp_int_type(bld->type);
+ LLVMBuilderRef b = bld->builder;
+ LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4);
+ LLVMTypeRef v4si = LLVMVectorType(LLVMInt32Type(), 4);
+
/*
- * FIXME: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf()
- * which is neither efficient nor does the CRT linkage work on Windows
- * causing segmentation fault. So simply disable the code for now.
+ * take the absolute value,
+ * x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
*/
- return bld->one;
-#else
- const struct lp_type type = bld->type;
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- char intrinsic[32];
- /* TODO: optimize the constant case */
+ LLVMValueRef inv_sig_mask = lp_build_const_v4si(~0x80000000);
+ LLVMValueRef a_v4si = LLVMBuildBitCast(b, a, v4si, "a_v4si");
- assert(type.floating);
- util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width);
+ LLVMValueRef absi = LLVMBuildAnd(b, a_v4si, inv_sig_mask, "absi");
+ LLVMValueRef x_abs = LLVMBuildBitCast(b, absi, v4sf, "x_abs");
- return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
-#endif
+ /*
+ * extract the sign bit (upper one)
+ * sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask);
+ */
+ LLVMValueRef sig_mask = lp_build_const_v4si(0x80000000);
+ LLVMValueRef sign_bit_i = LLVMBuildAnd(b, a_v4si, sig_mask, "sign_bit_i");
+
+ /*
+ * scale by 4/Pi
+ * y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
+ */
+
+ LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
+ LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
+
+ /*
+ * store the integer part of y in mm0
+ * emm2 = _mm_cvttps_epi32(y);
+ */
+
+ LLVMValueRef emm2_i = LLVMBuildFPToSI(b, scale_y, v4si, "emm2_i");
+
+ /*
+ * j=(j+1) & (~1) (see the cephes sources)
+ * emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
+ */
+
+ LLVMValueRef all_one = lp_build_const_v4si(1);
+ LLVMValueRef emm2_add = LLVMBuildAdd(b, emm2_i, all_one, "emm2_add");
+ /*
+ * emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
+ */
+ LLVMValueRef inv_one = lp_build_const_v4si(~1);
+ LLVMValueRef emm2_and = LLVMBuildAnd(b, emm2_add, inv_one, "emm2_and");
+
+ /*
+ * y = _mm_cvtepi32_ps(emm2);
+ */
+ LLVMValueRef y_2 = LLVMBuildSIToFP(b, emm2_and, v4sf, "y_2");
+
+ /* get the swap sign flag
+ * emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
+ */
+ LLVMValueRef pi32_4 = lp_build_const_v4si(4);
+ LLVMValueRef emm0_and = LLVMBuildAnd(b, emm2_add, pi32_4, "emm0_and");
+
+ /*
+ * emm2 = _mm_slli_epi32(emm0, 29);
+ */
+ LLVMValueRef const_29 = lp_build_const_v4si(29);
+ LLVMValueRef swap_sign_bit = LLVMBuildShl(b, emm0_and, const_29, "swap_sign_bit");
+
+ /*
+ * get the polynom selection mask
+ * there is one polynom for 0 <= x <= Pi/4
+ * and another one for Pi/4<x<=Pi/2
+ * Both branches will be computed.
+ *
+ * emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
+ * emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ */
+
+ LLVMValueRef pi32_2 = lp_build_const_v4si(2);
+ LLVMValueRef emm2_3 = LLVMBuildAnd(b, emm2_and, pi32_2, "emm2_3");
+ LLVMValueRef poly_mask = lp_build_compare(b, int_type, PIPE_FUNC_EQUAL,
+ emm2_3, lp_build_const_v4si(0));
+ /*
+ * sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+ */
+ LLVMValueRef sign_bit_1 = LLVMBuildXor(b, sign_bit_i, swap_sign_bit, "sign_bit");
+
+ /*
+ * _PS_CONST(minus_cephes_DP1, -0.78515625);
+ * _PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
+ * _PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
+ */
+ LLVMValueRef DP1 = lp_build_const_v4sf(-0.78515625);
+ LLVMValueRef DP2 = lp_build_const_v4sf(-2.4187564849853515625e-4);
+ LLVMValueRef DP3 = lp_build_const_v4sf(-3.77489497744594108e-8);
+
+ /*
+ * The magic pass: "Extended precision modular arithmetic"
+ * x = ((x - y * DP1) - y * DP2) - y * DP3;
+ * xmm1 = _mm_mul_ps(y, xmm1);
+ * xmm2 = _mm_mul_ps(y, xmm2);
+ * xmm3 = _mm_mul_ps(y, xmm3);
+ */
+ LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+ LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+ LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
+
+ /*
+ * x = _mm_add_ps(x, xmm1);
+ * x = _mm_add_ps(x, xmm2);
+ * x = _mm_add_ps(x, xmm3);
+ */
+
+ LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+ LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+ LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
+
+ /*
+ * Evaluate the first polynom (0 <= x <= Pi/4)
+ *
+ * z = _mm_mul_ps(x,x);
+ */
+ LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
+
+ /*
+ * _PS_CONST(coscof_p0, 2.443315711809948E-005);
+ * _PS_CONST(coscof_p1, -1.388731625493765E-003);
+ * _PS_CONST(coscof_p2, 4.166664568298827E-002);
+ */
+ LLVMValueRef coscof_p0 = lp_build_const_v4sf(2.443315711809948E-005);
+ LLVMValueRef coscof_p1 = lp_build_const_v4sf(-1.388731625493765E-003);
+ LLVMValueRef coscof_p2 = lp_build_const_v4sf(4.166664568298827E-002);
+
+ /*
+ * y = *(v4sf*)_ps_coscof_p0;
+ * y = _mm_mul_ps(y, z);
+ */
+ LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+ LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+ LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+ LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+ LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+ LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
+
+
+ /*
+ * tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+ * y = _mm_sub_ps(y, tmp);
+ * y = _mm_add_ps(y, *(v4sf*)_ps_1);
+ */
+ LLVMValueRef half = lp_build_const_v4sf(0.5);
+ LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+ LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
+ LLVMValueRef one = lp_build_const_v4sf(1.0);
+ LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
+
+ /*
+ * _PS_CONST(sincof_p0, -1.9515295891E-4);
+ * _PS_CONST(sincof_p1, 8.3321608736E-3);
+ * _PS_CONST(sincof_p2, -1.6666654611E-1);
+ */
+ LLVMValueRef sincof_p0 = lp_build_const_v4sf(-1.9515295891E-4);
+ LLVMValueRef sincof_p1 = lp_build_const_v4sf(8.3321608736E-3);
+ LLVMValueRef sincof_p2 = lp_build_const_v4sf(-1.6666654611E-1);
+
+ /*
+ * Evaluate the second polynom (Pi/4 <= x <= 0)
+ *
+ * y2 = *(v4sf*)_ps_sincof_p0;
+ * y2 = _mm_mul_ps(y2, z);
+ * y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
+ * y2 = _mm_mul_ps(y2, z);
+ * y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
+ * y2 = _mm_mul_ps(y2, z);
+ * y2 = _mm_mul_ps(y2, x);
+ * y2 = _mm_add_ps(y2, x);
+ */
+
+ LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+ LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+ LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+ LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+ LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+ LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+ LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
+
+ /*
+ * select the correct result from the two polynoms
+ * xmm3 = poly_mask;
+ * y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+ * y = _mm_andnot_ps(xmm3, y);
+ * y = _mm_add_ps(y,y2);
+ */
+ LLVMValueRef y2_i = LLVMBuildBitCast(b, y2_9, v4si, "y2_i");
+ LLVMValueRef y_i = LLVMBuildBitCast(b, y_10, v4si, "y_i");
+ LLVMValueRef y2_and = LLVMBuildAnd(b, y2_i, poly_mask, "y2_and");
+ LLVMValueRef inv = lp_build_const_v4si(~0);
+ LLVMValueRef poly_mask_inv = LLVMBuildXor(b, poly_mask, inv, "poly_mask_inv");
+ LLVMValueRef y_and = LLVMBuildAnd(b, y_i, poly_mask_inv, "y_and");
+ LLVMValueRef y_combine = LLVMBuildAdd(b, y_and, y2_and, "y_combine");
+
+ /*
+ * update the sign
+ * y = _mm_xor_ps(y, sign_bit);
+ */
+ LLVMValueRef y_sign = LLVMBuildXor(b, y_combine, sign_bit_1, "y_sin");
+ LLVMValueRef y_result = LLVMBuildBitCast(b, y_sign, v4sf, "y_result");
+ return y_result;
}
/**
- * Generate sin(a)
+ * Generate cos(a) using SSE2
*/
LLVMValueRef
-lp_build_sin(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_cos(struct lp_build_context *bld,
+ LLVMValueRef a)
{
-#ifdef PIPE_OS_WINDOWS
+ struct lp_type int_type = lp_int_type(bld->type);
+ LLVMBuilderRef b = bld->builder;
+ LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4);
+ LLVMTypeRef v4si = LLVMVectorType(LLVMInt32Type(), 4);
+
/*
- * FIXME: X86 backend translates llvm.sin.v4f32 to 4 calls to CRT's sinf()
- * which is neither efficient nor does the CRT linkage work on Windows
- * causing segmentation fault. So simply disable the code for now.
+ * take the absolute value,
+ * x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
*/
- return bld->zero;
-#else
- const struct lp_type type = bld->type;
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- char intrinsic[32];
- /* TODO: optimize the constant case */
+ LLVMValueRef inv_sig_mask = lp_build_const_v4si(~0x80000000);
+ LLVMValueRef a_v4si = LLVMBuildBitCast(b, a, v4si, "a_v4si");
- assert(type.floating);
- util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width);
+ LLVMValueRef absi = LLVMBuildAnd(b, a_v4si, inv_sig_mask, "absi");
+ LLVMValueRef x_abs = LLVMBuildBitCast(b, absi, v4sf, "x_abs");
- return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
-#endif
+ /*
+ * scale by 4/Pi
+ * y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
+ */
+
+ LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516);
+ LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y");
+
+ /*
+ * store the integer part of y in mm0
+ * emm2 = _mm_cvttps_epi32(y);
+ */
+
+ LLVMValueRef emm2_i = LLVMBuildFPToSI(b, scale_y, v4si, "emm2_i");
+
+ /*
+ * j=(j+1) & (~1) (see the cephes sources)
+ * emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
+ */
+
+ LLVMValueRef all_one = lp_build_const_v4si(1);
+ LLVMValueRef emm2_add = LLVMBuildAdd(b, emm2_i, all_one, "emm2_add");
+ /*
+ * emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
+ */
+ LLVMValueRef inv_one = lp_build_const_v4si(~1);
+ LLVMValueRef emm2_and = LLVMBuildAnd(b, emm2_add, inv_one, "emm2_and");
+
+ /*
+ * y = _mm_cvtepi32_ps(emm2);
+ */
+ LLVMValueRef y_2 = LLVMBuildSIToFP(b, emm2_and, v4sf, "y_2");
+
+
+ /*
+ * emm2 = _mm_sub_epi32(emm2, *(v4si*)_pi32_2);
+ */
+ LLVMValueRef const_2 = lp_build_const_v4si(2);
+ LLVMValueRef emm2_2 = LLVMBuildSub(b, emm2_and, const_2, "emm2_2");
+
+
+ /* get the swap sign flag
+ * emm0 = _mm_andnot_si128(emm2, *(v4si*)_pi32_4);
+ */
+ LLVMValueRef inv = lp_build_const_v4si(~0);
+ LLVMValueRef emm0_not = LLVMBuildXor(b, emm2_2, inv, "emm0_not");
+ LLVMValueRef pi32_4 = lp_build_const_v4si(4);
+ LLVMValueRef emm0_and = LLVMBuildAnd(b, emm0_not, pi32_4, "emm0_and");
+
+ /*
+ * emm2 = _mm_slli_epi32(emm0, 29);
+ */
+ LLVMValueRef const_29 = lp_build_const_v4si(29);
+ LLVMValueRef sign_bit = LLVMBuildShl(b, emm0_and, const_29, "sign_bit");
+
+ /*
+ * get the polynom selection mask
+ * there is one polynom for 0 <= x <= Pi/4
+ * and another one for Pi/4<x<=Pi/2
+ * Both branches will be computed.
+ *
+ * emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
+ * emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ */
+
+ LLVMValueRef pi32_2 = lp_build_const_v4si(2);
+ LLVMValueRef emm2_3 = LLVMBuildAnd(b, emm2_2, pi32_2, "emm2_3");
+ LLVMValueRef poly_mask = lp_build_compare(b, int_type, PIPE_FUNC_EQUAL,
+ emm2_3, lp_build_const_v4si(0));
+
+ /*
+ * _PS_CONST(minus_cephes_DP1, -0.78515625);
+ * _PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
+ * _PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
+ */
+ LLVMValueRef DP1 = lp_build_const_v4sf(-0.78515625);
+ LLVMValueRef DP2 = lp_build_const_v4sf(-2.4187564849853515625e-4);
+ LLVMValueRef DP3 = lp_build_const_v4sf(-3.77489497744594108e-8);
+
+ /*
+ * The magic pass: "Extended precision modular arithmetic"
+ * x = ((x - y * DP1) - y * DP2) - y * DP3;
+ * xmm1 = _mm_mul_ps(y, xmm1);
+ * xmm2 = _mm_mul_ps(y, xmm2);
+ * xmm3 = _mm_mul_ps(y, xmm3);
+ */
+ LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
+ LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
+ LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
+
+ /*
+ * x = _mm_add_ps(x, xmm1);
+ * x = _mm_add_ps(x, xmm2);
+ * x = _mm_add_ps(x, xmm3);
+ */
+
+ LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
+ LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
+ LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
+
+ /*
+ * Evaluate the first polynom (0 <= x <= Pi/4)
+ *
+ * z = _mm_mul_ps(x,x);
+ */
+ LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z");
+
+ /*
+ * _PS_CONST(coscof_p0, 2.443315711809948E-005);
+ * _PS_CONST(coscof_p1, -1.388731625493765E-003);
+ * _PS_CONST(coscof_p2, 4.166664568298827E-002);
+ */
+ LLVMValueRef coscof_p0 = lp_build_const_v4sf(2.443315711809948E-005);
+ LLVMValueRef coscof_p1 = lp_build_const_v4sf(-1.388731625493765E-003);
+ LLVMValueRef coscof_p2 = lp_build_const_v4sf(4.166664568298827E-002);
+
+ /*
+ * y = *(v4sf*)_ps_coscof_p0;
+ * y = _mm_mul_ps(y, z);
+ */
+ LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
+ LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
+ LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
+ LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+ LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
+ LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
+
+
+ /*
+ * tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+ * y = _mm_sub_ps(y, tmp);
+ * y = _mm_add_ps(y, *(v4sf*)_ps_1);
+ */
+ LLVMValueRef half = lp_build_const_v4sf(0.5);
+ LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp");
+ LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8");
+ LLVMValueRef one = lp_build_const_v4sf(1.0);
+ LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9");
+
+ /*
+ * _PS_CONST(sincof_p0, -1.9515295891E-4);
+ * _PS_CONST(sincof_p1, 8.3321608736E-3);
+ * _PS_CONST(sincof_p2, -1.6666654611E-1);
+ */
+ LLVMValueRef sincof_p0 = lp_build_const_v4sf(-1.9515295891E-4);
+ LLVMValueRef sincof_p1 = lp_build_const_v4sf(8.3321608736E-3);
+ LLVMValueRef sincof_p2 = lp_build_const_v4sf(-1.6666654611E-1);
+
+ /*
+ * Evaluate the second polynom (Pi/4 <= x <= 0)
+ *
+ * y2 = *(v4sf*)_ps_sincof_p0;
+ * y2 = _mm_mul_ps(y2, z);
+ * y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
+ * y2 = _mm_mul_ps(y2, z);
+ * y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
+ * y2 = _mm_mul_ps(y2, z);
+ * y2 = _mm_mul_ps(y2, x);
+ * y2 = _mm_add_ps(y2, x);
+ */
+
+ LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
+ LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
+ LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
+ LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+ LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
+ LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
+ LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
+
+ /*
+ * select the correct result from the two polynoms
+ * xmm3 = poly_mask;
+ * y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+ * y = _mm_andnot_ps(xmm3, y);
+ * y = _mm_add_ps(y,y2);
+ */
+ LLVMValueRef y2_i = LLVMBuildBitCast(b, y2_9, v4si, "y2_i");
+ LLVMValueRef y_i = LLVMBuildBitCast(b, y_10, v4si, "y_i");
+ LLVMValueRef y2_and = LLVMBuildAnd(b, y2_i, poly_mask, "y2_and");
+ LLVMValueRef poly_mask_inv = LLVMBuildXor(b, poly_mask, inv, "poly_mask_inv");
+ LLVMValueRef y_and = LLVMBuildAnd(b, y_i, poly_mask_inv, "y_and");
+ LLVMValueRef y_combine = LLVMBuildAdd(b, y_and, y2_and, "y_combine");
+
+ /*
+ * update the sign
+ * y = _mm_xor_ps(y, sign_bit);
+ */
+ LLVMValueRef y_sign = LLVMBuildXor(b, y_combine, sign_bit, "y_sin");
+ LLVMValueRef y_result = LLVMBuildBitCast(b, y_sign, v4sf, "y_result");
+ return y_result;
}
@@ -1270,9 +2020,11 @@ lp_build_pow(struct lp_build_context *bld,
LLVMValueRef y)
{
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x) && LLVMIsConstant(y))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x) && LLVMIsConstant(y)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y));
}
@@ -1288,6 +2040,8 @@ lp_build_exp(struct lp_build_context *bld,
/* log2(e) = 1/log(2) */
LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634);
+ assert(lp_check_value(bld->type, x));
+
return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
}
@@ -1302,14 +2056,12 @@ lp_build_log(struct lp_build_context *bld,
/* log(2) */
LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529);
+ assert(lp_check_value(bld->type, x));
+
return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
}
-#define EXP_POLY_DEGREE 3
-#define LOG_POLY_DEGREE 5
-
-
/**
* Generate polynomial.
* Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
@@ -1321,22 +2073,22 @@ lp_build_polynomial(struct lp_build_context *bld,
unsigned num_coeffs)
{
const struct lp_type type = bld->type;
- LLVMTypeRef float_type = LLVMFloatType();
LLVMValueRef res = NULL;
unsigned i;
+ assert(lp_check_value(bld->type, x));
+
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
for (i = num_coeffs; i--; ) {
LLVMValueRef coeff;
- if (type.length == 1)
- coeff = LLVMConstReal(float_type, coeffs[i]);
- else
- coeff = lp_build_const_vec(type, coeffs[i]);
+ coeff = lp_build_const_vec(type, coeffs[i]);
if(res)
res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
@@ -1352,17 +2104,31 @@ lp_build_polynomial(struct lp_build_context *bld,
/**
- * Minimax polynomial fit of 2**x, in range [-0.5, 0.5[
+ * Minimax polynomial fit of 2**x, in range [0, 1[
*/
const double lp_build_exp2_polynomial[] = {
#if EXP_POLY_DEGREE == 5
- 9.9999994e-1, 6.9315308e-1, 2.4015361e-1, 5.5826318e-2, 8.9893397e-3, 1.8775767e-3
+ 0.999999999690134838155,
+ 0.583974334321735217258,
+ 0.164553105719676828492,
+ 0.0292811063701710962255,
+ 0.00354944426657875141846,
+ 0.000296253726543423377365
#elif EXP_POLY_DEGREE == 4
- 1.0000026, 6.9300383e-1, 2.4144275e-1, 5.2011464e-2, 1.3534167e-2
+ 1.00000001502262084505,
+ 0.563586057338685991394,
+ 0.150436017652442413623,
+ 0.0243220604213317927308,
+ 0.0025359088446580436489
#elif EXP_POLY_DEGREE == 3
- 9.9992520e-1, 6.9583356e-1, 2.2606716e-1, 7.8024521e-2
+ 0.999925218562710312959,
+ 0.695833540494823811697,
+ 0.226067155427249155588,
+ 0.0780245226406372992967
#elif EXP_POLY_DEGREE == 2
- 1.0017247, 6.5763628e-1, 3.3718944e-1
+ 1.00172476321474503578,
+ 0.657636275736077639316,
+ 0.33718943461968720704
#else
#error
#endif
@@ -1385,28 +2151,31 @@ lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef expfpart = NULL;
LLVMValueRef res = NULL;
+ assert(lp_check_value(bld->type, x));
+
if(p_exp2_int_part || p_frac_part || p_exp2) {
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
assert(type.floating && type.width == 32);
x = lp_build_min(bld, x, lp_build_const_vec(type, 129.0));
x = lp_build_max(bld, x, lp_build_const_vec(type, -126.99999));
- /* ipart = int(x - 0.5) */
- ipart = LLVMBuildSub(bld->builder, x, lp_build_const_vec(type, 0.5f), "");
- ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
+ /* ipart = floor(x) */
+ ipart = lp_build_floor(bld, x);
/* fpart = x - ipart */
- fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
- fpart = LLVMBuildSub(bld->builder, x, fpart, "");
+ fpart = LLVMBuildFSub(bld->builder, x, ipart, "");
}
if(p_exp2_int_part || p_exp2) {
/* expipart = (float) (1 << ipart) */
+ ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_const_int_vec(type, 127), "");
expipart = LLVMBuildShl(bld->builder, expipart, lp_build_const_int_vec(type, 23), "");
expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
@@ -1416,7 +2185,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
Elements(lp_build_exp2_polynomial));
- res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
+ res = LLVMBuildFMul(bld->builder, expipart, expfpart, "");
}
if(p_exp2_int_part)
@@ -1447,13 +2216,27 @@ lp_build_exp2(struct lp_build_context *bld,
*/
const double lp_build_log2_polynomial[] = {
#if LOG_POLY_DEGREE == 6
- 3.11578814719469302614, -3.32419399085241980044, 2.59883907202499966007, -1.23152682416275988241, 0.318212422185251071475, -0.0344359067839062357313
+ 3.11578814719469302614,
+ -3.32419399085241980044,
+ 2.59883907202499966007,
+ -1.23152682416275988241,
+ 0.318212422185251071475,
+ -0.0344359067839062357313
#elif LOG_POLY_DEGREE == 5
- 2.8882704548164776201, -2.52074962577807006663, 1.48116647521213171641, -0.465725644288844778798, 0.0596515482674574969533
+ 2.8882704548164776201,
+ -2.52074962577807006663,
+ 1.48116647521213171641,
+ -0.465725644288844778798,
+ 0.0596515482674574969533
#elif LOG_POLY_DEGREE == 4
- 2.61761038894603480148, -1.75647175389045657003, 0.688243882994381274313, -0.107254423828329604454
+ 2.61761038894603480148,
+ -1.75647175389045657003,
+ 0.688243882994381274313,
+ -0.107254423828329604454
#elif LOG_POLY_DEGREE == 3
- 2.28330284476918490682, -1.04913055217340124191, 0.204446009836232697516
+ 2.28330284476918490682,
+ -1.04913055217340124191,
+ 0.204446009836232697516
#else
#error
#endif
@@ -1485,11 +2268,15 @@ lp_build_log2_approx(struct lp_build_context *bld,
LLVMValueRef logmant = NULL;
LLVMValueRef res = NULL;
+ assert(lp_check_value(bld->type, x));
+
if(p_exp || p_floor_log2 || p_log2) {
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
assert(type.floating && type.width == 32);
@@ -1515,9 +2302,9 @@ lp_build_log2_approx(struct lp_build_context *bld,
Elements(lp_build_log2_polynomial));
/* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
- logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+ logmant = LLVMBuildFMul(bld->builder, logmant, LLVMBuildFSub(bld->builder, mant, bld->one, ""), "");
- res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+ res = LLVMBuildFAdd(bld->builder, logmant, logexp, "");
}
if(p_exp) {
@@ -1533,89 +2320,90 @@ lp_build_log2_approx(struct lp_build_context *bld,
}
-/** scalar version of above function */
-static void
-lp_build_float_log2_approx(struct lp_build_context *bld,
- LLVMValueRef x,
- LLVMValueRef *p_exp,
- LLVMValueRef *p_floor_log2,
- LLVMValueRef *p_log2)
+LLVMValueRef
+lp_build_log2(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ LLVMValueRef res;
+ lp_build_log2_approx(bld, x, NULL, NULL, &res);
+ return res;
+}
+
+
+/**
+ * Faster (and less accurate) log2.
+ *
+ * log2(x) = floor(log2(x)) + frac(x)
+ *
+ * See http://www.flipcode.com/archives/Fast_log_Function.shtml
+ */
+LLVMValueRef
+lp_build_fast_log2(struct lp_build_context *bld,
+ LLVMValueRef x)
{
const struct lp_type type = bld->type;
- LLVMTypeRef float_type = LLVMFloatType();
- LLVMTypeRef int_type = LLVMIntType(type.width);
+ LLVMTypeRef vec_type = bld->vec_type;
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
- LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0);
- LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0);
- LLVMValueRef one = LLVMConstBitCast(bld->one, int_type);
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1);
+ LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
- LLVMValueRef i = NULL;
- LLVMValueRef exp = NULL;
- LLVMValueRef mant = NULL;
- LLVMValueRef logexp = NULL;
- LLVMValueRef logmant = NULL;
- LLVMValueRef res = NULL;
+ LLVMValueRef ipart;
+ LLVMValueRef fpart;
- if(p_exp || p_floor_log2 || p_log2) {
- /* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n",
- __FUNCTION__);
+ assert(lp_check_value(bld->type, x));
- assert(type.floating && type.width == 32);
+ assert(type.floating);
- i = LLVMBuildBitCast(bld->builder, x, int_type, "");
+ x = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
- /* exp = (float) exponent(x) */
- exp = LLVMBuildAnd(bld->builder, i, expmask, "");
- }
+ /* ipart = floor(log2(x)) - 1 */
+ ipart = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), "");
+ ipart = LLVMBuildAnd(bld->builder, ipart, lp_build_const_int_vec(type, 255), "");
+ ipart = LLVMBuildSub(bld->builder, ipart, lp_build_const_int_vec(type, 128), "");
+ ipart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
- if(p_floor_log2 || p_log2) {
- LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0);
- LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0);
- logexp = LLVMBuildLShr(bld->builder, exp, c23, "");
- logexp = LLVMBuildSub(bld->builder, logexp, c127, "");
- logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, "");
- }
+ /* fpart = 1.0 + frac(x) */
+ fpart = LLVMBuildAnd(bld->builder, x, mantmask, "");
+ fpart = LLVMBuildOr(bld->builder, fpart, one, "");
+ fpart = LLVMBuildBitCast(bld->builder, fpart, vec_type, "");
- if(p_log2) {
- /* mant = (float) mantissa(x) */
- mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
- mant = LLVMBuildOr(bld->builder, mant, one, "");
- mant = LLVMBuildBitCast(bld->builder, mant, float_type, "");
+ /* floor(log2(x)) + frac(x) */
+ return LLVMBuildFAdd(bld->builder, ipart, fpart, "");
+}
- logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
- Elements(lp_build_log2_polynomial));
- /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
- logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+/**
+ * Fast implementation of iround(log2(x)).
+ *
+ * Not an approximation -- it should give accurate results all the time.
+ */
+LLVMValueRef
+lp_build_ilog2(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
- res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
- }
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef sqrt2 = lp_build_const_vec(type, 1.4142135623730951);
- if(p_exp) {
- exp = LLVMBuildBitCast(bld->builder, exp, float_type, "");
- *p_exp = exp;
- }
+ LLVMValueRef ipart;
- if(p_floor_log2)
- *p_floor_log2 = logexp;
+ assert(lp_check_value(bld->type, x));
- if(p_log2)
- *p_log2 = res;
-}
+ assert(type.floating);
+ /* x * 2^(0.5) i.e., add 0.5 to the log2(x) */
+ x = LLVMBuildFMul(bld->builder, x, sqrt2, "");
-LLVMValueRef
-lp_build_log2(struct lp_build_context *bld,
- LLVMValueRef x)
-{
- LLVMValueRef res;
- if (bld->type.length == 1) {
- lp_build_float_log2_approx(bld, x, NULL, NULL, &res);
- }
- else {
- lp_build_log2_approx(bld, x, NULL, NULL, &res);
- }
- return res;
+ x = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
+
+ /* ipart = floor(log2(x) + 0.5) */
+ ipart = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), "");
+ ipart = LLVMBuildAnd(bld->builder, ipart, lp_build_const_int_vec(type, 255), "");
+ ipart = LLVMBuildSub(bld->builder, ipart, lp_build_const_int_vec(type, 127), "");
+
+ return ipart;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 31efa9921ce..8424384f8f7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -171,6 +171,12 @@ LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a);
+void
+lp_build_ifloor_fract(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef *out_ipart,
+ LLVMValueRef *out_fpart);
+
LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a);
@@ -212,6 +218,15 @@ LLVMValueRef
lp_build_log2(struct lp_build_context *bld,
LLVMValueRef a);
+LLVMValueRef
+lp_build_fast_log2(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ilog2(struct lp_build_context *bld,
+ LLVMValueRef x);
+
+
void
lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef x,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c
new file mode 100644
index 00000000000..f2ebd868a8d
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "lp_bld_assert.h"
+#include "lp_bld_init.h"
+#include "lp_bld_printf.h"
+
+
+/**
+ * A call to lp_build_assert() will build a function call to this function.
+ */
+static void
+lp_assert(int condition, const char *msg)
+{
+ if (!condition) {
+ debug_printf("LLVM assertion '%s' failed!\n", msg);
+ assert(condition);
+ }
+}
+
+
+
+/**
+ * lp_build_assert.
+ *
+ * Build an assertion in LLVM IR by building a function call to the
+ * lp_assert() function above.
+ *
+ * \param condition should be an 'i1' or 'i32' value
+ * \param msg a string to print if the assertion fails.
+ */
+LLVMValueRef
+lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition,
+ const char *msg)
+{
+ LLVMModuleRef module;
+ LLVMTypeRef arg_types[2];
+ LLVMValueRef msg_string, assert_func, params[2], r;
+
+ module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
+ LLVMGetInsertBlock(builder)));
+
+ msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1);
+
+ arg_types[0] = LLVMInt32Type();
+ arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+
+ /* lookup the lp_assert function */
+ assert_func = LLVMGetNamedFunction(module, "lp_assert");
+
+ /* Create the assertion function if not found */
+ if (!assert_func) {
+ LLVMTypeRef func_type =
+ LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0);
+
+ assert_func = LLVMAddFunction(module, "lp_assert", func_type);
+ LLVMSetFunctionCallConv(assert_func, LLVMCCallConv);
+ LLVMSetLinkage(assert_func, LLVMExternalLinkage);
+ LLVMAddGlobalMapping(lp_build_engine, assert_func,
+ func_to_pointer((func_pointer)lp_assert));
+ }
+ assert(assert_func);
+
+ /* build function call param list */
+ params[0] = LLVMBuildZExt(builder, condition, arg_types[0], "");
+ params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], "");
+
+ /* check arg types */
+ assert(LLVMTypeOf(params[0]) == arg_types[0]);
+ assert(LLVMTypeOf(params[1]) == arg_types[1]);
+
+ r = LLVMBuildCall(builder, assert_func, params, 2, "");
+
+ return r;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h
new file mode 100644
index 00000000000..ddd879dc2c6
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_ASSERT_H
+#define LP_BLD_ASSERT_H
+
+
+#include "lp_bld.h"
+
+
+LLVMValueRef
+lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition,
+ const char *msg);
+
+
+#endif
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_bitarit.c b/src/gallium/auxiliary/gallivm/lp_bld_bitarit.c
new file mode 100644
index 00000000000..706479b4d56
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_bitarit.c
@@ -0,0 +1,187 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_bitarit.h"
+
+
+/**
+ * Return (a | b)
+ */
+LLVMValueRef
+lp_build_or(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ /* can't do bitwise ops on floating-point values */
+ if (type.floating) {
+ a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, "");
+ }
+
+ res = LLVMBuildOr(bld->builder, a, b, "");
+
+ if (type.floating) {
+ res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Return (a & b)
+ */
+LLVMValueRef
+lp_build_and(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ /* can't do bitwise ops on floating-point values */
+ if (type.floating) {
+ a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, "");
+ }
+
+ res = LLVMBuildAnd(bld->builder, a, b, "");
+
+ if (type.floating) {
+ res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Return (a & ~b)
+ */
+LLVMValueRef
+lp_build_andnot(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ /* can't do bitwise ops on floating-point values */
+ if (type.floating) {
+ a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, "");
+ }
+
+ res = LLVMBuildNot(bld->builder, b, "");
+ res = LLVMBuildAnd(bld->builder, a, res, "");
+
+ if (type.floating) {
+ res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Shift left.
+ */
+LLVMValueRef
+lp_build_shl(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(!type.floating);
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ res = LLVMBuildShl(bld->builder, a, b, "");
+
+ return res;
+}
+
+
+/**
+ * Shift right.
+ */
+LLVMValueRef
+lp_build_shr(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(!type.floating);
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ if (type.sign) {
+ res = LLVMBuildAShr(bld->builder, a, b, "");
+ } else {
+ res = LLVMBuildLShr(bld->builder, a, b, "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Shift left with immediate.
+ */
+LLVMValueRef
+lp_build_shl_imm(struct lp_build_context *bld, LLVMValueRef a, unsigned imm)
+{
+ LLVMValueRef b = lp_build_const_int_vec(bld->type, imm);
+ assert(imm <= bld->type.width);
+ return lp_build_shl(bld, a, b);
+}
+
+
+/**
+ * Shift right with immediate.
+ */
+LLVMValueRef
+lp_build_shr_imm(struct lp_build_context *bld, LLVMValueRef a, unsigned imm)
+{
+ LLVMValueRef b = lp_build_const_int_vec(bld->type, imm);
+ assert(imm <= bld->type.width);
+ return lp_build_shr(bld, a, b);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_bitarit.h b/src/gallium/auxiliary/gallivm/lp_bld_bitarit.h
new file mode 100644
index 00000000000..5c5b9818519
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_bitarit.h
@@ -0,0 +1,69 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper bitwise arithmetic functions.
+ *
+ * @author Jose Fonseca <[email protected]>
+ */
+
+
+#ifndef LP_BLD_BITARIT_H
+#define LP_BLD_BITARIT_H
+
+
+#include "gallivm/lp_bld.h"
+
+
+struct lp_type;
+struct lp_build_context;
+
+
+LLVMValueRef
+lp_build_or(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b);
+
+LLVMValueRef
+lp_build_and(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b);
+
+LLVMValueRef
+lp_build_andnot(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b);
+
+LLVMValueRef
+lp_build_shl(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b);
+
+LLVMValueRef
+lp_build_shr(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b);
+
+LLVMValueRef
+lp_build_shl_imm(struct lp_build_context *bld, LLVMValueRef a, unsigned imm);
+
+LLVMValueRef
+lp_build_shr_imm(struct lp_build_context *bld, LLVMValueRef a, unsigned imm);
+
+
+#endif /* !LP_BLD_ARIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
index 57843e9a60c..dd839c0bea5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -280,31 +280,45 @@ lp_build_one(struct lp_type type)
/**
- * Build constant-valued vector from a scalar value.
+ * Build constant-valued element from a scalar value.
*/
LLVMValueRef
-lp_build_const_vec(struct lp_type type,
- double val)
+lp_build_const_elem(struct lp_type type,
+ double val)
{
LLVMTypeRef elem_type = lp_build_elem_type(type);
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- assert(type.length <= LP_MAX_VECTOR_LENGTH);
+ LLVMValueRef elem;
if(type.floating) {
- elems[0] = LLVMConstReal(elem_type, val);
+ elem = LLVMConstReal(elem_type, val);
}
else {
double dscale = lp_const_scale(type);
- elems[0] = LLVMConstInt(elem_type, val*dscale + 0.5, 0);
+ elem = LLVMConstInt(elem_type, val*dscale + 0.5, 0);
}
- for(i = 1; i < type.length; ++i)
- elems[i] = elems[0];
+ return elem;
+}
- return LLVMConstVector(elems, type.length);
+
+/**
+ * Build constant-valued vector from a scalar value.
+ */
+LLVMValueRef
+lp_build_const_vec(struct lp_type type,
+ double val)
+{
+ if (type.length == 1) {
+ return lp_build_const_elem(type, val);
+ } else {
+ LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+ elems[0] = lp_build_const_elem(type, val);
+ for(i = 1; i < type.length; ++i)
+ elems[i] = elems[0];
+ return LLVMConstVector(elems, type.length);
+ }
}
@@ -321,6 +335,9 @@ lp_build_const_int_vec(struct lp_type type,
for(i = 0; i < type.length; ++i)
elems[i] = LLVMConstInt(elem_type, val, type.sign ? 1 : 0);
+ if (type.length == 1)
+ return elems[0];
+
return LLVMConstVector(elems, type.length);
}
@@ -365,9 +382,12 @@ lp_build_const_aos(struct lp_type type,
}
+/**
+ * @param mask TGSI_WRITEMASK_xxx
+ */
LLVMValueRef
lp_build_const_mask_aos(struct lp_type type,
- const boolean cond[4])
+ unsigned mask)
{
LLVMTypeRef elem_type = LLVMIntType(type.width);
LLVMValueRef masks[LP_MAX_VECTOR_LENGTH];
@@ -375,9 +395,13 @@ lp_build_const_mask_aos(struct lp_type type,
assert(type.length <= LP_MAX_VECTOR_LENGTH);
- for(j = 0; j < type.length; j += 4)
- for(i = 0; i < 4; ++i)
- masks[j + i] = LLVMConstInt(elem_type, cond[i] ? ~0 : 0, 0);
+ for (j = 0; j < type.length; j += 4) {
+ for( i = 0; i < 4; ++i) {
+ masks[j + i] = LLVMConstInt(elem_type,
+ mask & (1 << i) ? ~0ULL : 0,
+ 1);
+ }
+ }
return LLVMConstVector(masks, type.length);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
index 9ca2f0664eb..6b1fc590c17 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -85,6 +85,10 @@ lp_build_one(struct lp_type type);
LLVMValueRef
+lp_build_const_elem(struct lp_type type,
+ double val);
+
+LLVMValueRef
lp_build_const_vec(struct lp_type type, double val);
@@ -100,7 +104,15 @@ lp_build_const_aos(struct lp_type type,
LLVMValueRef
lp_build_const_mask_aos(struct lp_type type,
- const boolean cond[4]);
+ unsigned mask);
+
+
+static INLINE LLVMValueRef
+lp_build_const_int32(int i)
+{
+ return LLVMConstInt(LLVMInt32Type(), i, 0);
+}
+
#endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 3f7f2ebde9c..127b13bc286 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -63,6 +63,7 @@
#include "util/u_debug.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
@@ -83,6 +84,9 @@
*
* Although the result values can be scaled to an arbitrary bit width specified
* by dst_width, the actual result type will have the same width.
+ *
+ * Ex: src = { float, float, float, float }
+ * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
*/
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
@@ -114,8 +118,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
scale = (double)mask/ubound;
bias = (double)((unsigned long long)1 << (mantissa - n));
- res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), "");
- res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), "");
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
if(dst_width > n) {
@@ -152,6 +156,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
/**
* Inverse of lp_build_clamped_float_to_unsigned_norm above.
+ * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
+ * return {float, float, float, float} with values in range [0, 1].
*/
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
@@ -170,6 +176,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
double scale;
double bias;
+ assert(dst_type.floating);
+
mantissa = lp_mantissa(dst_type);
n = MIN2(mantissa, src_width);
@@ -194,8 +202,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
res = LLVMBuildBitCast(builder, res, vec_type, "");
- res = LLVMBuildSub(builder, res, bias_, "");
- res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), "");
+ res = LLVMBuildFSub(builder, res, bias_, "");
+ res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");
return res;
}
@@ -219,20 +227,116 @@ lp_build_conv(LLVMBuilderRef builder,
unsigned num_tmps;
unsigned i;
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+ assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
tmp_type = src_type;
- for(i = 0; i < num_srcs; ++i)
+ for(i = 0; i < num_srcs; ++i) {
+ assert(lp_check_value(src_type, src[i]));
tmp[i] = src[i];
+ }
num_tmps = num_srcs;
+
+ /* Special case 4x4f --> 1x16ub
+ */
+ if (src_type.floating == 1 &&
+ src_type.fixed == 0 &&
+ src_type.sign == 1 &&
+ src_type.norm == 0 &&
+ src_type.width == 32 &&
+ src_type.length == 4 &&
+
+ dst_type.floating == 0 &&
+ dst_type.fixed == 0 &&
+ dst_type.sign == 0 &&
+ dst_type.norm == 1 &&
+ dst_type.width == 8 &&
+ dst_type.length == 16)
+ {
+ int i;
+
+ for (i = 0; i < num_dsts; i++, src += 4) {
+ struct lp_type int16_type = dst_type;
+ struct lp_type int32_type = dst_type;
+ LLVMValueRef lo, hi;
+ LLVMValueRef src_int0;
+ LLVMValueRef src_int1;
+ LLVMValueRef src_int2;
+ LLVMValueRef src_int3;
+ LLVMTypeRef int16_vec_type;
+ LLVMTypeRef int32_vec_type;
+ LLVMTypeRef src_vec_type;
+ LLVMTypeRef dst_vec_type;
+ LLVMValueRef const_255f;
+ LLVMValueRef a, b, c, d;
+
+ int16_type.width *= 2;
+ int16_type.length /= 2;
+ int16_type.sign = 1;
+
+ int32_type.width *= 4;
+ int32_type.length /= 4;
+ int32_type.sign = 1;
+
+ src_vec_type = lp_build_vec_type(src_type);
+ dst_vec_type = lp_build_vec_type(dst_type);
+ int16_vec_type = lp_build_vec_type(int16_type);
+ int32_vec_type = lp_build_vec_type(int32_type);
+
+ const_255f = lp_build_const_vec(src_type, 255.0f);
+
+ a = LLVMBuildFMul(builder, src[0], const_255f, "");
+ b = LLVMBuildFMul(builder, src[1], const_255f, "");
+ c = LLVMBuildFMul(builder, src[2], const_255f, "");
+ d = LLVMBuildFMul(builder, src[3], const_255f, "");
+
+ /* lp_build_round generates excessively general code without
+ * sse4, so do rounding manually.
+ */
+ if (!util_cpu_caps.has_sse4_1) {
+ LLVMValueRef const_half = lp_build_const_vec(src_type, 0.5f);
+
+ a = LLVMBuildFAdd(builder, a, const_half, "");
+ b = LLVMBuildFAdd(builder, b, const_half, "");
+ c = LLVMBuildFAdd(builder, c, const_half, "");
+ d = LLVMBuildFAdd(builder, d, const_half, "");
+
+ src_int0 = LLVMBuildFPToSI(builder, a, int32_vec_type, "");
+ src_int1 = LLVMBuildFPToSI(builder, b, int32_vec_type, "");
+ src_int2 = LLVMBuildFPToSI(builder, c, int32_vec_type, "");
+ src_int3 = LLVMBuildFPToSI(builder, d, int32_vec_type, "");
+ }
+ else {
+ struct lp_build_context bld;
+
+ bld.builder = builder;
+ bld.type = src_type;
+ bld.vec_type = src_vec_type;
+ bld.int_elem_type = lp_build_elem_type(int32_type);
+ bld.int_vec_type = int32_vec_type;
+ bld.undef = lp_build_undef(src_type);
+ bld.zero = lp_build_zero(src_type);
+ bld.one = lp_build_one(src_type);
+
+ src_int0 = lp_build_iround(&bld, a);
+ src_int1 = lp_build_iround(&bld, b);
+ src_int2 = lp_build_iround(&bld, c);
+ src_int3 = lp_build_iround(&bld, d);
+ }
+
+ lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1);
+ hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3);
+ dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi);
+ }
+ return;
+ }
+
/*
* Clamp if necessary
*/
@@ -290,7 +394,7 @@ lp_build_conv(LLVMBuilderRef builder,
if (dst_scale != 1.0) {
LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale);
for(i = 0; i < num_tmps; ++i)
- tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+ tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
/* Use an equally sized integer for intermediate computations */
@@ -326,30 +430,25 @@ lp_build_conv(LLVMBuilderRef builder,
/*
* Truncate or expand bit width
+ *
+ * No data conversion should happen here, although the sign bits are
+ * crucial to avoid bad clamping.
*/
- assert(!tmp_type.floating || tmp_type.width == dst_type.width);
+ {
+ struct lp_type new_type;
- if(tmp_type.width > dst_type.width) {
- assert(num_dsts == 1);
- tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
- tmp_type.width = dst_type.width;
- tmp_type.length = dst_type.length;
- num_tmps = 1;
- }
+ new_type = tmp_type;
+ new_type.sign = dst_type.sign;
+ new_type.width = dst_type.width;
+ new_type.length = dst_type.length;
+
+ lp_build_resize(builder, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts);
- if(tmp_type.width < dst_type.width) {
- assert(num_tmps == 1);
- lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
- tmp_type.width = dst_type.width;
- tmp_type.length = dst_type.length;
+ tmp_type = new_type;
num_tmps = num_dsts;
}
- assert(tmp_type.width == dst_type.width);
- assert(tmp_type.length == dst_type.length);
- assert(num_tmps == num_dsts);
-
/*
* Scale to the widest range
*/
@@ -390,7 +489,7 @@ lp_build_conv(LLVMBuilderRef builder,
if (src_scale != 1.0) {
LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale);
for(i = 0; i < num_tmps; ++i)
- tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
+ tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
}
}
@@ -406,8 +505,10 @@ lp_build_conv(LLVMBuilderRef builder,
}
}
- for(i = 0; i < num_dsts; ++i)
+ for(i = 0; i < num_dsts; ++i) {
dst[i] = tmp[i];
+ assert(lp_check_value(dst_type, dst[i]));
+ }
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
index 39dfc51e503..d3a5afff8c2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
@@ -46,7 +46,7 @@
boolean
lp_check_alignment(const void *ptr, unsigned alignment)
{
- assert(util_is_pot(alignment));
+ assert(util_is_power_of_two(alignment));
return ((uintptr_t)ptr & (alignment - 1)) == 0;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index 7b010cbdb09..369c1bbf09a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -36,6 +36,20 @@
#include "util/u_string.h"
+#define GALLIVM_DEBUG_TGSI 0x1
+#define GALLIVM_DEBUG_IR 0x2
+#define GALLIVM_DEBUG_ASM 0x4
+#define GALLIVM_DEBUG_NO_OPT 0x8
+#define GALLIVM_DEBUG_PERF 0x10
+
+
+#ifdef DEBUG
+extern unsigned gallivm_debug;
+#else
+#define gallivm_debug 0
+#endif
+
+
static INLINE void
lp_build_name(LLVMValueRef val, const char *format, ...)
{
@@ -53,6 +67,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...)
}
+void
+lp_debug_dump_value(LLVMValueRef value);
+
+
boolean
lp_check_alignment(const void *ptr, unsigned alignment);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index 8f15b1d287d..5bc9c741a88 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -38,7 +38,7 @@
#include "lp_bld_flow.h"
-#define LP_BUILD_FLOW_MAX_VARIABLES 32
+#define LP_BUILD_FLOW_MAX_VARIABLES 64
#define LP_BUILD_FLOW_MAX_DEPTH 32
/**
@@ -407,6 +407,7 @@ lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
/* for each variable, update the Phi node with a (variable, block) pair */
for(i = 0; i < skip->num_variables; ++i) {
assert(*flow->variables[i]);
+ assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
}
@@ -433,6 +434,7 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow)
/* add (variable, block) tuples to the phi nodes */
for(i = 0; i < skip->num_variables; ++i) {
assert(*flow->variables[i]);
+ assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
*flow->variables[i] = skip->phi[i];
}
@@ -821,8 +823,11 @@ lp_build_alloca(LLVMBuilderRef builder,
LLVMBuilderRef first_builder = LLVMCreateBuilder();
LLVMValueRef res;
- LLVMPositionBuilderAtEnd(first_builder, first_block);
- LLVMPositionBuilderBefore(first_builder, first_instr);
+ if (first_instr) {
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+ } else {
+ LLVMPositionBuilderAtEnd(first_builder, first_block);
+ }
res = LLVMBuildAlloca(first_builder, type, name);
@@ -840,7 +845,7 @@ lp_build_alloca(LLVMBuilderRef builder,
* first block may prevent the X86 backend from successfully align the stack as
* required.
*
- * Also the scalarrepl pass is supossedly more powerful and can promote
+ * Also the scalarrepl pass is supposedly more powerful and can promote
* arrays in many cases.
*
* See also:
@@ -859,7 +864,11 @@ lp_build_array_alloca(LLVMBuilderRef builder,
LLVMBuilderRef first_builder = LLVMCreateBuilder();
LLVMValueRef res;
- LLVMPositionBuilderBefore(first_builder, first_instr);
+ if (first_instr) {
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+ } else {
+ LLVMPositionBuilderAtEnd(first_builder, first_block);
+ }
res = LLVMBuildArrayAlloca(first_builder, type, count, name);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 085937588ff..60e22d727ad 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -40,6 +40,7 @@
struct util_format_description;
struct lp_type;
+struct lp_build_context;
/*
@@ -47,9 +48,9 @@ struct lp_type;
*/
LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- const struct util_format_description *desc,
- LLVMValueRef packed);
+lp_build_format_swizzle_aos(const struct util_format_description *desc,
+ struct lp_build_context *bld,
+ LLVMValueRef unswizzled);
LLVMValueRef
lp_build_pack_rgba_aos(LLVMBuilderRef builder,
@@ -59,7 +60,9 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
LLVMValueRef
lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
- LLVMValueRef ptr,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
LLVMValueRef i,
LLVMValueRef j);
@@ -70,17 +73,22 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
void
lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
- struct lp_type type,
- const LLVMValueRef *unswizzled,
- LLVMValueRef *swizzled);
+ struct lp_build_context *bld,
+ const LLVMValueRef unswizzled[4],
+ LLVMValueRef swizzled_out[4]);
void
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
struct lp_type type,
LLVMValueRef packed,
- LLVMValueRef *rgba);
+ LLVMValueRef rgba_out[4]);
+void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba);
void
lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
@@ -90,7 +98,20 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef offsets,
LLVMValueRef i,
LLVMValueRef j,
- LLVMValueRef *rgba);
+ LLVMValueRef rgba_out[4]);
+
+/*
+ * YUV
+ */
+LLVMValueRef
+lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ unsigned n,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef i,
+ LLVMValueRef j);
+
#endif /* !LP_BLD_FORMAT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 6257e9a4047..6b9189e1da5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -38,33 +38,124 @@
#include "util/u_math.h"
#include "util/u_string.h"
+#include "lp_bld_arit.h"
#include "lp_bld_init.h"
#include "lp_bld_type.h"
#include "lp_bld_flow.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_debug.h"
#include "lp_bld_format.h"
/**
+ * Basic swizzling. Rearrange the order of the unswizzled array elements
+ * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported
+ * too.
+ * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
+ */
+LLVMValueRef
+lp_build_format_swizzle_aos(const struct util_format_description *desc,
+ struct lp_build_context *bld,
+ LLVMValueRef unswizzled)
+{
+ unsigned char swizzles[4];
+ unsigned chan;
+
+ assert(bld->type.length % 4 == 0);
+
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ /*
+ * For ZS formats do RGBA = ZZZ1
+ */
+ if (chan == 3) {
+ swizzle = UTIL_FORMAT_SWIZZLE_1;
+ } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
+ swizzle = UTIL_FORMAT_SWIZZLE_0;
+ } else {
+ swizzle = desc->swizzle[0];
+ }
+ } else {
+ swizzle = desc->swizzle[chan];
+ }
+ swizzles[chan] = swizzle;
+ }
+
+ return lp_build_swizzle_aos(bld, unswizzled, swizzles);
+}
+
+
+/**
+ * Whether the format matches the vector type, apart of swizzles.
+ */
+static INLINE boolean
+format_matches_type(const struct util_format_description *desc,
+ struct lp_type type)
+{
+ enum util_format_type chan_type;
+ unsigned chan;
+
+ assert(type.length % 4 == 0);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+ desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
+ desc->block.width != 1 ||
+ desc->block.height != 1) {
+ return FALSE;
+ }
+
+ if (type.floating) {
+ chan_type = UTIL_FORMAT_TYPE_FLOAT;
+ } else if (type.fixed) {
+ chan_type = UTIL_FORMAT_TYPE_FIXED;
+ } else if (type.sign) {
+ chan_type = UTIL_FORMAT_TYPE_SIGNED;
+ } else {
+ chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
+ }
+
+ for (chan = 0; chan < desc->nr_channels; ++chan) {
+ if (desc->channel[chan].size != type.width) {
+ return FALSE;
+ }
+
+ if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
+ if (desc->channel[chan].type != chan_type ||
+ desc->channel[chan].normalized != type.norm) {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+
+/**
* Unpack a single pixel into its RGBA components.
*
- * @param packed integer.
+ * @param desc the pixel format for the packed pixel value
+ * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
*
- * @return RGBA in a 4 floats vector.
+ * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
*/
-LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- const struct util_format_description *desc,
- LLVMValueRef packed)
+static INLINE LLVMValueRef
+lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ LLVMValueRef packed)
{
LLVMValueRef shifted, casted, scaled, masked;
LLVMValueRef shifts[4];
LLVMValueRef masks[4];
LLVMValueRef scales[4];
- LLVMValueRef swizzles[4];
- LLVMValueRef aux[4];
- bool normalized;
- int empty_channel;
- bool needs_uitofp;
+
+ boolean normalized;
+ boolean needs_uitofp;
unsigned shift;
unsigned i;
@@ -76,10 +167,12 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
/* Do the intermediate integer computations with 32bit integers since it
* matches floating point size */
- if (desc->block.bits < 32)
- packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
+ assert (LLVMTypeOf(packed) == LLVMInt32Type());
- /* Broadcast the packed value to all four channels */
+ /* Broadcast the packed value to all four channels
+ * before: packed = BGRA
+ * after: packed = {BGRA, BGRA, BGRA, BGRA}
+ */
packed = LLVMBuildInsertElement(builder,
LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
packed,
@@ -94,8 +187,9 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
/* Initialize vector constants */
normalized = FALSE;
needs_uitofp = FALSE;
- empty_channel = -1;
shift = 0;
+
+ /* Loop over 4 color components */
for (i = 0; i < 4; ++i) {
unsigned bits = desc->channel[i].size;
@@ -103,7 +197,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
shifts[i] = LLVMGetUndef(LLVMInt32Type());
masks[i] = LLVMConstNull(LLVMInt32Type());
scales[i] = LLVMConstNull(LLVMFloatType());
- empty_channel = i;
}
else {
unsigned long long mask = (1ULL << bits) - 1;
@@ -128,8 +221,13 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
shift += bits;
}
+ /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
+ * into masked = {B, G, R, A}
+ */
shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
+
+
if (!needs_uitofp) {
/* UIToFP can't be expressed in SSE2 */
casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
@@ -137,55 +235,17 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
}
+ /* At this point 'casted' may be a vector of floats such as
+ * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized
+ * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
+ */
+
if (normalized)
- scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
+ scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
else
scaled = casted;
- for (i = 0; i < 4; ++i)
- aux[i] = LLVMGetUndef(LLVMFloatType());
-
- for (i = 0; i < 4; ++i) {
- enum util_format_swizzle swizzle;
-
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- /*
- * For ZS formats do RGBA = ZZZ1
- */
- if (i == 3) {
- swizzle = UTIL_FORMAT_SWIZZLE_1;
- } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
- swizzle = UTIL_FORMAT_SWIZZLE_0;
- } else {
- swizzle = desc->swizzle[0];
- }
- } else {
- swizzle = desc->swizzle[i];
- }
-
- switch (swizzle) {
- case UTIL_FORMAT_SWIZZLE_X:
- case UTIL_FORMAT_SWIZZLE_Y:
- case UTIL_FORMAT_SWIZZLE_Z:
- case UTIL_FORMAT_SWIZZLE_W:
- swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
- break;
- case UTIL_FORMAT_SWIZZLE_0:
- assert(empty_channel >= 0);
- swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
- break;
- case UTIL_FORMAT_SWIZZLE_1:
- swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
- aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
- break;
- case UTIL_FORMAT_SWIZZLE_NONE:
- swizzles[i] = LLVMGetUndef(LLVMFloatType());
- assert(0);
- break;
- }
- }
-
- return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
+ return scaled;
}
@@ -208,7 +268,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
LLVMValueRef shifted, casted, scaled, unswizzled;
LLVMValueRef shifts[4];
LLVMValueRef scales[4];
- bool normalized;
+ boolean normalized;
unsigned shift;
unsigned i, j;
@@ -263,7 +323,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
}
if (normalized)
- scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
+ scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
else
scaled = unswizzled;
@@ -292,44 +352,152 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
}
+
+
/**
* Fetch a pixel into a 4 float AoS.
*
- * i and j are the sub-block pixel coordinates.
+ * \param format_desc describes format of the image we're fetching from
+ * \param ptr address of the pixel block (or the texel if uncompressed)
+ * \param i, j the sub-block pixel coordinates. For non-compressed formats
+ * these will always be (0, 0).
+ * \return a 4 element vector with the pixel's RGBA values.
*/
LLVMValueRef
lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
- LLVMValueRef ptr,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
LLVMValueRef i,
LLVMValueRef j)
{
+ unsigned num_pixels = type.length / 4;
+ struct lp_build_context bld;
+
+ assert(type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(type.length % 4 == 0);
+
+ lp_build_context_init(&bld, builder, type);
+
+ /*
+ * Trivial case
+ *
+ * The format matches the type (apart of a swizzle) so no need for
+ * scaling or converting.
+ */
+
+ if (format_matches_type(format_desc, type) &&
+ format_desc->block.bits <= type.width * 4 &&
+ util_is_power_of_two(format_desc->block.bits)) {
+ LLVMValueRef packed;
+
+ /*
+ * The format matches the type (apart of a swizzle) so no need for
+ * scaling or converting.
+ */
+
+ packed = lp_build_gather(builder, type.length/4,
+ format_desc->block.bits, type.width*4,
+ base_ptr, offset);
+
+ assert(format_desc->block.bits <= type.width * type.length);
+
+ packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), "");
+
+ return lp_build_format_swizzle_aos(format_desc, &bld, packed);
+ }
+
+ /*
+ * Bit arithmetic
+ */
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
format_desc->block.width == 1 &&
format_desc->block.height == 1 &&
- util_is_pot(format_desc->block.bits) &&
+ util_is_power_of_two(format_desc->block.bits) &&
format_desc->block.bits <= 32 &&
format_desc->is_bitmask &&
!format_desc->is_mixed &&
(format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
- format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
- {
- LLVMValueRef packed;
+ format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {
+
+ LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
+ LLVMValueRef res;
+ unsigned k;
+
+ /*
+ * Unpack a pixel at a time into a <4 x float> RGBA vector
+ */
+
+ for (k = 0; k < num_pixels; ++k) {
+ LLVMValueRef packed;
+
+ packed = lp_build_gather_elem(builder, num_pixels,
+ format_desc->block.bits, 32,
+ base_ptr, offset, k);
+
+ tmps[k] = lp_build_unpack_arith_rgba_aos(builder, format_desc,
+ packed);
+ }
+
+ /*
+ * Type conversion.
+ *
+ * TODO: We could avoid floating conversion for integer to
+ * integer conversions.
+ */
- ptr = LLVMBuildBitCast(builder, ptr,
- LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
- "");
+ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
+ debug_printf("%s: unpacking %s with floating point\n",
+ __FUNCTION__, format_desc->short_name);
+ }
- packed = LLVMBuildLoad(builder, ptr, "packed");
+ lp_build_conv(builder,
+ lp_float32_vec4_type(),
+ type,
+ tmps, num_pixels, &res, 1);
- return lp_build_unpack_rgba_aos(builder, format_desc, packed);
+ return lp_build_format_swizzle_aos(format_desc, &bld, res);
}
- else if (format_desc->fetch_rgba_float) {
+
+ /*
+ * YUV / subsampled formats
+ */
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ struct lp_type tmp_type;
+ LLVMValueRef tmp;
+
+ memset(&tmp_type, 0, sizeof tmp_type);
+ tmp_type.width = 8;
+ tmp_type.length = num_pixels * 4;
+ tmp_type.norm = TRUE;
+
+ tmp = lp_build_fetch_subsampled_rgba_aos(builder,
+ format_desc,
+ num_pixels,
+ base_ptr,
+ offset,
+ i, j);
+
+ lp_build_conv(builder,
+ tmp_type, type,
+ &tmp, 1, &tmp, 1);
+
+ return tmp;
+ }
+
+ /*
+ * Fallback to util_format_description::fetch_rgba_8unorm().
+ */
+
+ if (format_desc->fetch_rgba_8unorm &&
+ !type.floating && type.width == 8 && !type.sign && type.norm) {
/*
- * Fallback to calling util_format_description::fetch_rgba_float.
+ * Fallback to calling util_format_description::fetch_rgba_8unorm.
*
* This is definitely not the most efficient way of fetching pixels, as
* we miss the opportunity to do vectorization, but this it is a
@@ -339,13 +507,125 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
char name[256];
+ LLVMTypeRef i8t = LLVMInt8Type();
+ LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
+ LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef function;
+ LLVMValueRef tmp_ptr;
LLVMValueRef tmp;
- LLVMValueRef args[4];
+ LLVMValueRef res;
+ unsigned k;
+
+ util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
+ format_desc->short_name);
+
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
+ }
+
+ /*
+ * Declare and bind format_desc->fetch_rgba_8unorm().
+ */
+
+ function = LLVMGetNamedFunction(module, name);
+ if (!function) {
+ LLVMTypeRef ret_type;
+ LLVMTypeRef arg_types[4];
+ LLVMTypeRef function_type;
+
+ ret_type = LLVMVoidType();
+ arg_types[0] = pi8t;
+ arg_types[1] = pi8t;
+ arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
+ function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
+ function = LLVMAddFunction(module, name, function_type);
+
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+ LLVMSetLinkage(function, LLVMExternalLinkage);
+
+ assert(LLVMIsDeclaration(function));
+
+ LLVMAddGlobalMapping(lp_build_engine, function,
+ func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm));
+ }
+
+ tmp_ptr = lp_build_alloca(builder, i32t, "");
+
+ res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
+
+ /*
+ * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
+ * in the SoA vectors.
+ */
+
+ for (k = 0; k < num_pixels; ++k) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+ LLVMValueRef args[4];
+
+ args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
+ args[1] = lp_build_gather_elem_ptr(builder, num_pixels,
+ base_ptr, offset, k);
+
+ if (num_pixels == 1) {
+ args[2] = i;
+ args[3] = j;
+ }
+ else {
+ args[2] = LLVMBuildExtractElement(builder, i, index, "");
+ args[3] = LLVMBuildExtractElement(builder, j, index, "");
+ }
+
+ LLVMBuildCall(builder, function, args, Elements(args), "");
+
+ tmp = LLVMBuildLoad(builder, tmp_ptr, "");
+
+ if (num_pixels == 1) {
+ res = tmp;
+ }
+ else {
+ res = LLVMBuildInsertElement(builder, res, tmp, index, "");
+ }
+ }
+
+ /* Bitcast from <n x i32> to <4n x i8> */
+ res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
+
+ return res;
+ }
+
+
+ /*
+ * Fallback to util_format_description::fetch_rgba_float().
+ */
+
+ if (format_desc->fetch_rgba_float) {
+ /*
+ * Fallback to calling util_format_description::fetch_rgba_float.
+ *
+ * This is definitely not the most efficient way of fetching pixels, as
+ * we miss the opportunity to do vectorization, but this it is a
+ * convenient for formats or scenarios for which there was no opportunity
+ * or incentive to optimize.
+ */
+
+ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+ char name[256];
+ LLVMTypeRef f32t = LLVMFloatType();
+ LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
+ LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
+ LLVMValueRef function;
+ LLVMValueRef tmp_ptr;
+ LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
+ LLVMValueRef res;
+ unsigned k;
util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
format_desc->short_name);
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
+ }
+
/*
* Declare and bind format_desc->fetch_rgba_float().
*/
@@ -357,7 +637,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
LLVMTypeRef function_type;
ret_type = LLVMVoidType();
- arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+ arg_types[0] = pf32t;
arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
@@ -368,28 +648,47 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
assert(LLVMIsDeclaration(function));
- LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
+ LLVMAddGlobalMapping(lp_build_engine, function,
+ func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
}
- tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+ tmp_ptr = lp_build_alloca(builder, f32x4t, "");
/*
* Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
* in the SoA vectors.
*/
- args[0] = LLVMBuildBitCast(builder, tmp,
- LLVMPointerType(LLVMFloatType(), 0), "");
- args[1] = ptr;
- args[2] = i;
- args[3] = j;
+ for (k = 0; k < num_pixels; ++k) {
+ LLVMValueRef args[4];
- LLVMBuildCall(builder, function, args, 4, "");
+ args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
+ args[1] = lp_build_gather_elem_ptr(builder, num_pixels,
+ base_ptr, offset, k);
- return LLVMBuildLoad(builder, tmp, "");
- }
- else {
- assert(0);
- return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
+ if (num_pixels == 1) {
+ args[2] = i;
+ args[3] = j;
+ }
+ else {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+ args[2] = LLVMBuildExtractElement(builder, i, index, "");
+ args[3] = LLVMBuildExtractElement(builder, j, index, "");
+ }
+
+ LLVMBuildCall(builder, function, args, Elements(args), "");
+
+ tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
+ }
+
+ lp_build_conv(builder,
+ lp_float32_vec4_type(),
+ type,
+ tmps, num_pixels, &res, 1);
+
+ return res;
}
+
+ assert(0);
+ return lp_build_undef(type);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 26b947b3b1c..ce7e54afc76 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -26,6 +26,8 @@
**************************************************************************/
+#include "pipe/p_defines.h"
+
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_string.h"
@@ -33,51 +35,39 @@
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
-#include "lp_bld_sample.h" /* for lp_build_gather */
+#include "lp_bld_swizzle.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_debug.h"
#include "lp_bld_format.h"
-static LLVMValueRef
-lp_build_format_swizzle_chan_soa(struct lp_type type,
- const LLVMValueRef *unswizzled,
- enum util_format_swizzle swizzle)
-{
- switch (swizzle) {
- case UTIL_FORMAT_SWIZZLE_X:
- case UTIL_FORMAT_SWIZZLE_Y:
- case UTIL_FORMAT_SWIZZLE_Z:
- case UTIL_FORMAT_SWIZZLE_W:
- return unswizzled[swizzle];
- case UTIL_FORMAT_SWIZZLE_0:
- return lp_build_zero(type);
- case UTIL_FORMAT_SWIZZLE_1:
- return lp_build_one(type);
- case UTIL_FORMAT_SWIZZLE_NONE:
- return lp_build_undef(type);
- default:
- assert(0);
- return lp_build_undef(type);
- }
-}
-
-
void
lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
- struct lp_type type,
+ struct lp_build_context *bld,
const LLVMValueRef *unswizzled,
- LLVMValueRef *swizzled)
+ LLVMValueRef swizzled_out[4])
{
- if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
+ assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
+
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ /*
+ * Return zzz1 for depth-stencil formats.
+ *
+ * XXX: Allow to control the depth swizzle with an additional parameter,
+ * as the caller may wish another depth swizzle, or retain the stencil
+ * value.
+ */
enum util_format_swizzle swizzle = format_desc->swizzle[0];
- LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
- swizzled[2] = swizzled[1] = swizzled[0] = depth;
- swizzled[3] = lp_build_one(type);
+ LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
+ swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
+ swizzled_out[3] = bld->one;
}
else {
unsigned chan;
for (chan = 0; chan < 4; ++chan) {
enum util_format_swizzle swizzle = format_desc->swizzle[chan];
- swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+ swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
}
}
}
@@ -100,14 +90,20 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
* It requires that a packed pixel fits into an element of the output
* channels. The common case is when converting pixel with a depth of 32 bit or
* less into floats.
+ *
+ * \param format_desc the format of the 'packed' incoming pixel vector
+ * \param type the desired type for rgba_out (type.length = n, above)
+ * \param packed the incoming vector of packed pixels
+ * \param rgba_out returns the SoA R,G,B,A vectors
*/
void
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
struct lp_type type,
LLVMValueRef packed,
- LLVMValueRef *rgba)
+ LLVMValueRef rgba_out[4])
{
+ struct lp_build_context bld;
LLVMValueRef inputs[4];
unsigned start;
unsigned chan;
@@ -120,11 +116,13 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
assert(type.floating);
assert(type.width == 32);
+ lp_build_context_init(&bld, builder, type);
+
/* Decode the input vector components */
start = 0;
for (chan = 0; chan < format_desc->nr_channels; ++chan) {
- unsigned width = format_desc->channel[chan].size;
- unsigned stop = start + width;
+ const unsigned width = format_desc->channel[chan].size;
+ const unsigned stop = start + width;
LLVMValueRef input;
input = packed;
@@ -200,7 +198,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
if (format_desc->channel[chan].normalized) {
double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
- input = LLVMBuildMul(builder, input, scale_val, "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
}
else {
@@ -230,7 +228,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
LLVMValueRef scale_val = lp_build_const_vec(type, scale);
input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
- input = LLVMBuildMul(builder, input, scale_val, "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
else {
/* FIXME */
@@ -250,14 +248,59 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
start = stop;
}
- lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
+ lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
+}
+
+
+void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
+ unsigned chan;
+
+ packed = LLVMBuildBitCast(builder, packed,
+ lp_build_int_vec_type(dst_type), "");
+
+ /* Decode the input vector components */
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned start = chan*8;
+ unsigned stop = start + 8;
+ LLVMValueRef input;
+
+ input = packed;
+
+ if (start)
+ input = LLVMBuildLShr(builder, input,
+ lp_build_const_int_vec(dst_type, start), "");
+
+ if (stop < 32)
+ input = LLVMBuildAnd(builder, input, mask, "");
+
+ input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+ rgba[chan] = input;
+ }
}
+
/**
- * Fetch a pixel into a SoA.
+ * Fetch a texels from a texture, returning them in SoA layout.
*
- * i and j are the sub-block pixel coordinates.
+ * \param type the desired return type for 'rgba'. The vector length
+ * is the number of texels to fetch
+ *
+ * \param base_ptr points to start of the texture image block. For non-
+ * compressed formats, this simply points to the texel.
+ * For compressed formats, it points to the start of the
+ * compressed data block.
+ *
+ * \param i, j the sub-block pixel coordinates. For non-compressed formats
+ * these will always be (0,0). For compressed formats, i will
+ * be in [0, block_width-1] and j will be in [0, block_height-1].
*/
void
lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
@@ -267,7 +310,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef offset,
LLVMValueRef i,
LLVMValueRef j,
- LLVMValueRef *rgba)
+ LLVMValueRef rgba_out[4])
{
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
@@ -281,7 +324,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
{
/*
* The packed pixel fits into an element of the destination format. Put
- * the packed pixels into a vector and estract each component for all
+ * the packed pixels into a vector and extract each component for all
* vector elements in parallel.
*/
@@ -289,6 +332,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
/*
* gather the texels from the texture
+ * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
*/
packed = lp_build_gather(builder,
type.length,
@@ -302,49 +346,86 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
lp_build_unpack_rgba_soa(builder,
format_desc,
type,
- packed, rgba);
+ packed, rgba_out);
+ return;
}
- else {
- /*
- * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
- *
- * This is not the most efficient way of fetching pixels, as
- * we miss some opportunities to do vectorization, but this it is a
- * convenient for formats or scenarios for which there was no opportunity
- * or incentive to optimize.
- */
+ /*
+ * Try calling lp_build_fetch_rgba_aos for all pixels.
+ */
+
+ if (util_format_fits_8unorm(format_desc) &&
+ type.floating && type.width == 32 && type.length == 4) {
+ struct lp_type tmp_type;
+ LLVMValueRef tmp;
+
+ memset(&tmp_type, 0, sizeof tmp_type);
+ tmp_type.width = 8;
+ tmp_type.length = type.length * 4;
+ tmp_type.norm = TRUE;
+
+ tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
+ base_ptr, offset, i, j);
+
+ lp_build_rgba8_to_f32_soa(builder,
+ type,
+ tmp,
+ rgba_out);
+
+ return;
+ }
+
+ /*
+ * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
+ *
+ * This is not the most efficient way of fetching pixels, as we
+ * miss some opportunities to do vectorization, but this is
+ * convenient for formats or scenarios for which there was no
+ * opportunity or incentive to optimize.
+ */
+
+ {
unsigned k, chan;
+ struct lp_type tmp_type;
- assert(type.floating);
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: scalar unpacking of %s\n",
+ __FUNCTION__, format_desc->short_name);
+ }
+
+ tmp_type = type;
+ tmp_type.length = 4;
for (chan = 0; chan < 4; ++chan) {
- rgba[chan] = lp_build_undef(type);
+ rgba_out[chan] = lp_build_undef(type);
}
+ /* loop over number of pixels */
for(k = 0; k < type.length; ++k) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
LLVMValueRef offset_elem;
- LLVMValueRef ptr;
LLVMValueRef i_elem, j_elem;
LLVMValueRef tmp;
offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
- ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
i_elem = LLVMBuildExtractElement(builder, i, index, "");
j_elem = LLVMBuildExtractElement(builder, j, index, "");
- tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem);
+ /* Get a single float[4]={R,G,B,A} pixel */
+ tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
+ base_ptr, offset_elem,
+ i_elem, j_elem);
/*
- * AoS to SoA
+ * Insert the AoS tmp value channels into the SoA result vectors at
+ * position = 'index'.
*/
-
for (chan = 0; chan < 4; ++chan) {
LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
- rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
+ rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
+ tmp_chan, index, "");
}
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
new file mode 100644
index 00000000000..2bce2895551
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -0,0 +1,445 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * YUV pixel format manipulation.
+ *
+ * @author Jose Fonseca <[email protected]>
+ */
+
+
+#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
+
+#include "lp_bld_arit.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_format.h"
+#include "lp_bld_logic.h"
+
+/**
+ * Extract Y, U, V channels from packed UYVY.
+ * @param packed is a <n x i32> vector with the packed UYVY blocks
+ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
+ */
+static void
+uyvy_to_yuv_soa(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i,
+ LLVMValueRef *y,
+ LLVMValueRef *u,
+ LLVMValueRef *v)
+{
+ struct lp_type type;
+ LLVMValueRef mask;
+
+ memset(&type, 0, sizeof type);
+ type.width = 32;
+ type.length = n;
+
+ assert(lp_check_value(type, packed));
+ assert(lp_check_value(type, i));
+
+ /*
+ * y = (uyvy >> (16*i + 8)) & 0xff
+ * u = (uyvy ) & 0xff
+ * v = (uyvy >> 16 ) & 0xff
+ */
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * Avoid shift with per-element count.
+ * No support on x86, gets translated to roughly 5 instructions
+ * per element. Didn't measure performance but cuts shader size
+ * by quite a bit (less difference if cpu has no sse4.1 support).
+ */
+ if (util_cpu_caps.has_sse2 && n == 4) {
+ LLVMValueRef sel, tmp, tmp2;
+ struct lp_build_context bld32;
+
+ lp_build_context_init(&bld32, builder, type);
+
+ tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
+ tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), "");
+ sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
+ *y = lp_build_select(&bld32, sel, tmp, tmp2);
+ } else
+#endif
+ {
+ LLVMValueRef shift;
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ }
+
+ *u = packed;
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
+
+ mask = lp_build_const_int_vec(type, 0xff);
+
+ *y = LLVMBuildAnd(builder, *y, mask, "y");
+ *u = LLVMBuildAnd(builder, *u, mask, "u");
+ *v = LLVMBuildAnd(builder, *v, mask, "v");
+}
+
+
+/**
+ * Extract Y, U, V channels from packed YUYV.
+ * @param packed is a <n x i32> vector with the packed YUYV blocks
+ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
+ */
+static void
+yuyv_to_yuv_soa(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i,
+ LLVMValueRef *y,
+ LLVMValueRef *u,
+ LLVMValueRef *v)
+{
+ struct lp_type type;
+ LLVMValueRef mask;
+
+ memset(&type, 0, sizeof type);
+ type.width = 32;
+ type.length = n;
+
+ assert(lp_check_value(type, packed));
+ assert(lp_check_value(type, i));
+
+ /*
+ * y = (yuyv >> 16*i) & 0xff
+ * u = (yuyv >> 8 ) & 0xff
+ * v = (yuyv >> 24 ) & 0xff
+ */
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * Avoid shift with per-element count.
+ * No support on x86, gets translated to roughly 5 instructions
+ * per element. Didn't measure performance but cuts shader size
+ * by quite a bit (less difference if cpu has no sse4.1 support).
+ */
+ if (util_cpu_caps.has_sse2 && n == 4) {
+ LLVMValueRef sel, tmp;
+ struct lp_build_context bld32;
+
+ lp_build_context_init(&bld32, builder, type);
+
+ tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
+ sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
+ *y = lp_build_select(&bld32, sel, packed, tmp);
+ } else
+#endif
+ {
+ LLVMValueRef shift;
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ }
+
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");
+
+ mask = lp_build_const_int_vec(type, 0xff);
+
+ *y = LLVMBuildAnd(builder, *y, mask, "y");
+ *u = LLVMBuildAnd(builder, *u, mask, "u");
+ *v = LLVMBuildAnd(builder, *v, mask, "v");
+}
+
+
+static INLINE void
+yuv_to_rgb_soa(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
+ LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
+{
+ struct lp_type type;
+ struct lp_build_context bld;
+
+ LLVMValueRef c0;
+ LLVMValueRef c8;
+ LLVMValueRef c16;
+ LLVMValueRef c128;
+ LLVMValueRef c255;
+
+ LLVMValueRef cy;
+ LLVMValueRef cug;
+ LLVMValueRef cub;
+ LLVMValueRef cvr;
+ LLVMValueRef cvg;
+
+ memset(&type, 0, sizeof type);
+ type.sign = TRUE;
+ type.width = 32;
+ type.length = n;
+
+ lp_build_context_init(&bld, builder, type);
+
+ assert(lp_check_value(type, y));
+ assert(lp_check_value(type, u));
+ assert(lp_check_value(type, v));
+
+ /*
+ * Constants
+ */
+
+ c0 = lp_build_const_int_vec(type, 0);
+ c8 = lp_build_const_int_vec(type, 8);
+ c16 = lp_build_const_int_vec(type, 16);
+ c128 = lp_build_const_int_vec(type, 128);
+ c255 = lp_build_const_int_vec(type, 255);
+
+ cy = lp_build_const_int_vec(type, 298);
+ cug = lp_build_const_int_vec(type, -100);
+ cub = lp_build_const_int_vec(type, 516);
+ cvr = lp_build_const_int_vec(type, 409);
+ cvg = lp_build_const_int_vec(type, -208);
+
+ /*
+ * y -= 16;
+ * u -= 128;
+ * v -= 128;
+ */
+
+ y = LLVMBuildSub(builder, y, c16, "");
+ u = LLVMBuildSub(builder, u, c128, "");
+ v = LLVMBuildSub(builder, v, c128, "");
+
+ /*
+ * r = 298 * _y + 409 * _v + 128;
+ * g = 298 * _y - 100 * _u - 208 * _v + 128;
+ * b = 298 * _y + 516 * _u + 128;
+ */
+
+ y = LLVMBuildMul(builder, y, cy, "");
+ y = LLVMBuildAdd(builder, y, c128, "");
+
+ *r = LLVMBuildMul(builder, v, cvr, "");
+ *g = LLVMBuildAdd(builder,
+ LLVMBuildMul(builder, u, cug, ""),
+ LLVMBuildMul(builder, v, cvg, ""),
+ "");
+ *b = LLVMBuildMul(builder, u, cub, "");
+
+ *r = LLVMBuildAdd(builder, *r, y, "");
+ *g = LLVMBuildAdd(builder, *g, y, "");
+ *b = LLVMBuildAdd(builder, *b, y, "");
+
+ /*
+ * r >>= 8;
+ * g >>= 8;
+ * b >>= 8;
+ */
+
+ *r = LLVMBuildAShr(builder, *r, c8, "r");
+ *g = LLVMBuildAShr(builder, *g, c8, "g");
+ *b = LLVMBuildAShr(builder, *b, c8, "b");
+
+ /*
+ * Clamp
+ */
+
+ *r = lp_build_clamp(&bld, *r, c0, c255);
+ *g = lp_build_clamp(&bld, *g, c0, c255);
+ *b = lp_build_clamp(&bld, *b, c0, c255);
+}
+
+
+static LLVMValueRef
+rgb_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
+{
+ struct lp_type type;
+ LLVMValueRef a;
+ LLVMValueRef rgba;
+
+ memset(&type, 0, sizeof type);
+ type.sign = TRUE;
+ type.width = 32;
+ type.length = n;
+
+ assert(lp_check_value(type, r));
+ assert(lp_check_value(type, g));
+ assert(lp_check_value(type, b));
+
+ /*
+ * Make a 4 x unorm8 vector
+ */
+
+ r = r;
+ g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), "");
+ b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), "");
+ a = lp_build_const_int_vec(type, 0xff000000);
+
+ rgba = r;
+ rgba = LLVMBuildOr(builder, rgba, g, "");
+ rgba = LLVMBuildOr(builder, rgba, b, "");
+ rgba = LLVMBuildOr(builder, rgba, a, "");
+
+ rgba = LLVMBuildBitCast(builder, rgba,
+ LLVMVectorType(LLVMInt8Type(), 4*n), "");
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+uyvy_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef y, u, v;
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+yuyv_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef y, u, v;
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+rgbg_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+grgb_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * @param n is the number of pixels processed
+ * @param packed is a <n x i32> vector with the packed YUYV blocks
+ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
+ * @return a <4*n x i8> vector with the pixel RGBA values in AoS
+ */
+LLVMValueRef
+lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ unsigned n,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef i,
+ LLVMValueRef j)
+{
+ LLVMValueRef packed;
+ LLVMValueRef rgba;
+
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
+ assert(format_desc->block.bits == 32);
+ assert(format_desc->block.width == 2);
+ assert(format_desc->block.height == 1);
+
+ packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset);
+
+ (void)j;
+
+ switch (format_desc->format) {
+ case PIPE_FORMAT_UYVY:
+ rgba = uyvy_to_rgba_aos(builder, n, packed, i);
+ break;
+ case PIPE_FORMAT_YUYV:
+ rgba = yuyv_to_rgba_aos(builder, n, packed, i);
+ break;
+ case PIPE_FORMAT_R8G8_B8G8_UNORM:
+ rgba = rgbg_to_rgba_aos(builder, n, packed, i);
+ break;
+ case PIPE_FORMAT_G8R8_G8B8_UNORM:
+ rgba = grgb_to_rgba_aos(builder, n, packed, i);
+ break;
+ default:
+ assert(0);
+ rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n));
+ break;
+ }
+
+ return rgba;
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
new file mode 100644
index 00000000000..d60472e0656
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_format.h"
+#include "lp_bld_gather.h"
+
+
+/**
+ * Get the pointer to one element from scatter positions in memory.
+ *
+ * @sa lp_build_gather()
+ */
+LLVMValueRef
+lp_build_gather_elem_ptr(LLVMBuilderRef builder,
+ unsigned length,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i)
+{
+ LLVMValueRef offset;
+ LLVMValueRef ptr;
+
+ assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0));
+
+ if (length == 1) {
+ assert(i == 0);
+ offset = offsets;
+ } else {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ offset = LLVMBuildExtractElement(builder, offsets, index, "");
+ }
+
+ ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
+
+ return ptr;
+}
+
+
+/**
+ * Gather one element from scatter positions in memory.
+ *
+ * @sa lp_build_gather()
+ */
+LLVMValueRef
+lp_build_gather_elem(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i)
+{
+ LLVMTypeRef src_type = LLVMIntType(src_width);
+ LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0));
+
+ ptr = lp_build_gather_elem_ptr(builder, length, base_ptr, offsets, i);
+ ptr = LLVMBuildBitCast(builder, ptr, src_ptr_type, "");
+ res = LLVMBuildLoad(builder, ptr, "");
+
+ assert(src_width <= dst_width);
+ if (src_width > dst_width)
+ res = LLVMBuildTrunc(builder, res, dst_elem_type, "");
+ if (src_width < dst_width)
+ res = LLVMBuildZExt(builder, res, dst_elem_type, "");
+
+ return res;
+}
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ * Use for fetching texels from a texture.
+ * For SSE, typical values are length=4, src_width=32, dst_width=32.
+ *
+ * @param length length of the offsets
+ * @param src_width src element width in bits
+ * @param dst_width result element width in bits (src will be expanded to fit)
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets)
+{
+ LLVMValueRef res;
+
+ if (length == 1) {
+ /* Scalar */
+ return lp_build_gather_elem(builder, length,
+ src_width, dst_width,
+ base_ptr, offsets, 0);
+ } else {
+ /* Vector */
+
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+ unsigned i;
+
+ res = LLVMGetUndef(dst_vec_type);
+ for (i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef elem;
+ elem = lp_build_gather_elem(builder, length,
+ src_width, dst_width,
+ base_ptr, offsets, i);
+ res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ }
+ }
+
+ return res;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
new file mode 100644
index 00000000000..131af8ea07e
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_GATHER_H_
+#define LP_BLD_GATHER_H_
+
+
+#include "gallivm/lp_bld.h"
+
+
+LLVMValueRef
+lp_build_gather_elem_ptr(LLVMBuilderRef builder,
+ unsigned length,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i);
+
+LLVMValueRef
+lp_build_gather_elem(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i);
+
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets);
+
+
+#endif /* LP_BLD_GATHER_H_ */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 5067d0a164f..761f33b578d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -29,18 +29,74 @@
#include "pipe/p_compiler.h"
#include "util/u_cpu_detect.h"
#include "util/u_debug.h"
+#include "lp_bld_debug.h"
#include "lp_bld_init.h"
+#include <llvm-c/Transforms/Scalar.h>
+
+
+#ifdef DEBUG
+unsigned gallivm_debug = 0;
+
+static const struct debug_named_value lp_bld_debug_flags[] = {
+ { "tgsi", GALLIVM_DEBUG_TGSI, NULL },
+ { "ir", GALLIVM_DEBUG_IR, NULL },
+ { "asm", GALLIVM_DEBUG_ASM, NULL },
+ { "nopt", GALLIVM_DEBUG_NO_OPT, NULL },
+ { "perf", GALLIVM_DEBUG_PERF, NULL },
+ DEBUG_NAMED_VALUE_END
+};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0)
+#endif
+
LLVMModuleRef lp_build_module = NULL;
LLVMExecutionEngineRef lp_build_engine = NULL;
LLVMModuleProviderRef lp_build_provider = NULL;
LLVMTargetDataRef lp_build_target = NULL;
+LLVMPassManagerRef lp_build_pass = NULL;
+
+
+/*
+ * Optimization values are:
+ * - 0: None (-O0)
+ * - 1: Less (-O1)
+ * - 2: Default (-O2, -Os)
+ * - 3: Aggressive (-O3)
+ *
+ * See also CodeGenOpt::Level in llvm/Target/TargetMachine.h
+ */
+enum LLVM_CodeGenOpt_Level {
+#if HAVE_LLVM >= 0x207
+ None, // -O0
+ Less, // -O1
+ Default, // -O2, -Os
+ Aggressive // -O3
+#else
+ Default,
+ None,
+ Aggressive
+#endif
+};
+
+
+extern void
+lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE);
+
+extern void
+lp_set_target_options(void);
void
lp_build_init(void)
{
+#ifdef DEBUG
+ gallivm_debug = debug_get_option_gallivm_debug();
+#endif
+
+ lp_set_target_options();
+
LLVMInitializeNativeTarget();
LLVMLinkInJIT();
@@ -52,18 +108,58 @@ lp_build_init(void)
lp_build_provider = LLVMCreateModuleProviderForExistingModule(lp_build_module);
if (!lp_build_engine) {
+ enum LLVM_CodeGenOpt_Level optlevel;
char *error = NULL;
- if (LLVMCreateJITCompiler(&lp_build_engine, lp_build_provider, 1, &error)) {
+ if (gallivm_debug & GALLIVM_DEBUG_NO_OPT) {
+ optlevel = None;
+ }
+ else {
+ optlevel = Default;
+ }
+
+ if (LLVMCreateJITCompiler(&lp_build_engine, lp_build_provider,
+ (unsigned)optlevel, &error)) {
_debug_printf("%s\n", error);
LLVMDisposeMessage(error);
assert(0);
}
+
+#if defined(DEBUG) || defined(PROFILE)
+ lp_register_oprofile_jit_event_listener(lp_build_engine);
+#endif
}
if (!lp_build_target)
lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine);
+ if (!lp_build_pass) {
+ lp_build_pass = LLVMCreateFunctionPassManager(lp_build_provider);
+ LLVMAddTargetData(lp_build_target, lp_build_pass);
+
+ if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ /* TODO: Add more passes */
+ LLVMAddCFGSimplificationPass(lp_build_pass);
+ LLVMAddPromoteMemoryToRegisterPass(lp_build_pass);
+ LLVMAddConstantPropagationPass(lp_build_pass);
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(lp_build_pass);
+ }
+ LLVMAddGVNPass(lp_build_pass);
+ } else {
+ /* We need at least this pass to prevent the backends to fail in
+ * unexpected ways.
+ */
+ LLVMAddPromoteMemoryToRegisterPass(lp_build_pass);
+ }
+ }
+
util_cpu_detect();
#if 0
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index 0ec2afcd1be..f26fdac4663 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -38,10 +38,13 @@ extern LLVMModuleRef lp_build_module;
extern LLVMExecutionEngineRef lp_build_engine;
extern LLVMModuleProviderRef lp_build_provider;
extern LLVMTargetDataRef lp_build_target;
+extern LLVMPassManagerRef lp_build_pass;
void
lp_build_init(void);
+extern void
+lp_func_delete_body(LLVMValueRef func);
#endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
new file mode 100644
index 00000000000..369c8121b5c
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -0,0 +1,55 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_LIMITS_H_
+#define LP_BLD_LIMITS_H_
+
+/*
+ * TGSI translation limits.
+ *
+ * Some are slightly above SM 3.0 requirements to give some wiggle room to
+ * the state trackers.
+ */
+
+#define LP_MAX_TGSI_TEMPS 256
+
+#define LP_MAX_TGSI_ADDRS 16
+
+#define LP_MAX_TGSI_IMMEDIATES 256
+
+#define LP_MAX_TGSI_PREDS 16
+
+/**
+ * Maximum control flow nesting
+ *
+ * SM3.0 requires 24
+ */
+#define LP_MAX_TGSI_NESTING 32
+
+
+#endif /* LP_BLD_LIMITS_H_ */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index d13fa1a5d04..ce5d0214b43 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -34,11 +34,13 @@
#include "util/u_cpu_detect.h"
+#include "util/u_memory.h"
#include "util/u_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
+#include "lp_bld_debug.h"
#include "lp_bld_logic.h"
@@ -82,15 +84,31 @@ lp_build_compare(LLVMBuilderRef builder,
assert(func >= PIPE_FUNC_NEVER);
assert(func <= PIPE_FUNC_ALWAYS);
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
if(func == PIPE_FUNC_NEVER)
return zeros;
if(func == PIPE_FUNC_ALWAYS)
return ones;
- /* TODO: optimize the constant case */
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * There are no unsigned integer comparison instructions in SSE.
+ */
- /* XXX: It is not clear if we should use the ordered or unordered operators */
+ if (!type.floating && !type.sign &&
+ type.width * type.length == 128 &&
+ util_cpu_caps.has_sse2 &&
+ (func == PIPE_FUNC_LESS ||
+ func == PIPE_FUNC_LEQUAL ||
+ func == PIPE_FUNC_GREATER ||
+ func == PIPE_FUNC_GEQUAL) &&
+ (gallivm_debug & GALLIVM_DEBUG_PERF)) {
+ debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
+ __FUNCTION__, type.length, type.width);
+ }
+#endif
#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -187,12 +205,10 @@ lp_build_compare(LLVMBuilderRef builder,
return lp_build_undef(type);
}
- /* There are no signed byte and unsigned word/dword comparison
- * instructions. So flip the sign bit so that the results match.
+ /* There are no unsigned comparison instructions. So flip the sign bit
+ * so that the results match.
*/
- if(table[func].gt &&
- ((type.width == 8 && type.sign) ||
- (type.width != 8 && !type.sign))) {
+ if (table[func].gt && !type.sign) {
LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
a = LLVMBuildXor(builder, a, msb, "");
b = LLVMBuildXor(builder, b, msb, "");
@@ -223,6 +239,8 @@ lp_build_compare(LLVMBuilderRef builder,
#endif
#endif /* HAVE_LLVM < 0x0207 */
+ /* XXX: It is not clear if we should use the ordered or unordered operators */
+
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
@@ -324,8 +342,10 @@ lp_build_compare(LLVMBuilderRef builder,
res = LLVMGetUndef(int_vec_type);
- debug_printf("%s: warning: using slow element-wise int"
- " vector comparison\n", __FUNCTION__);
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: using slow element-wise int"
+ " vector comparison\n", __FUNCTION__);
+ }
for(i = 0; i < type.length; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
@@ -364,9 +384,55 @@ lp_build_cmp(struct lp_build_context *bld,
/**
+ * Return (mask & a) | (~mask & b);
+ */
+LLVMValueRef
+lp_build_select_bitwise(struct lp_build_context *bld,
+ LLVMValueRef mask,
+ LLVMValueRef a,
+ LLVMValueRef b)
+{
+ struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ if (a == b) {
+ return a;
+ }
+
+ if(type.floating) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ }
+
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+
+ /* This often gets translated to PANDN, but sometimes the NOT is
+ * pre-computed and stored in another constant. The best strategy depends
+ * on available registers, so it is not a big deal -- hopefully LLVM does
+ * the right decision attending the rest of the program.
+ */
+ b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+
+ res = LLVMBuildOr(bld->builder, a, b, "");
+
+ if(type.floating) {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ }
+
+ return res;
+}
+
+
+/**
* Return mask ? a : b;
*
- * mask is a bitwise mask, composed of 0 or ~0 for each element.
+ * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
+ * will yield unpredictable results.
*/
LLVMValueRef
lp_build_select(struct lp_build_context *bld,
@@ -377,6 +443,9 @@ lp_build_select(struct lp_build_context *bld,
struct lp_type type = bld->type;
LLVMValueRef res;
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == b)
return a;
@@ -384,49 +453,78 @@ lp_build_select(struct lp_build_context *bld,
mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
res = LLVMBuildSelect(bld->builder, mask, a, b, "");
}
- else {
- if(type.floating) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ else if (util_cpu_caps.has_sse4_1 &&
+ type.width * type.length == 128 &&
+ !LLVMIsConstant(a) &&
+ !LLVMIsConstant(b) &&
+ !LLVMIsConstant(mask)) {
+ const char *intrinsic;
+ LLVMTypeRef arg_type;
+ LLVMValueRef args[3];
+
+ if (type.width == 64) {
+ intrinsic = "llvm.x86.sse41.blendvpd";
+ arg_type = LLVMVectorType(LLVMDoubleType(), 2);
+ } else if (type.width == 32) {
+ intrinsic = "llvm.x86.sse41.blendvps";
+ arg_type = LLVMVectorType(LLVMFloatType(), 4);
+ } else {
+ intrinsic = "llvm.x86.sse41.pblendvb";
+ arg_type = LLVMVectorType(LLVMInt8Type(), 16);
}
- a = LLVMBuildAnd(bld->builder, a, mask, "");
+ if (arg_type != bld->int_vec_type) {
+ mask = LLVMBuildBitCast(bld->builder, mask, arg_type, "");
+ }
- /* This often gets translated to PANDN, but sometimes the NOT is
- * pre-computed and stored in another constant. The best strategy depends
- * on available registers, so it is not a big deal -- hopefully LLVM does
- * the right decision attending the rest of the program.
- */
- b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+ if (arg_type != bld->vec_type) {
+ a = LLVMBuildBitCast(bld->builder, a, arg_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, arg_type, "");
+ }
- res = LLVMBuildOr(bld->builder, a, b, "");
+ args[0] = b;
+ args[1] = a;
+ args[2] = mask;
- if(type.floating) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ res = lp_build_intrinsic(bld->builder, intrinsic,
+ arg_type, args, Elements(args));
+
+ if (arg_type != bld->vec_type) {
+ res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
}
}
+ else {
+ res = lp_build_select_bitwise(bld, mask, a, b);
+ }
return res;
}
+/**
+ * Return mask ? a : b;
+ *
+ * mask is a TGSI_WRITEMASK_xxx.
+ */
LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
+ unsigned mask,
LLVMValueRef a,
- LLVMValueRef b,
- const boolean cond[4])
+ LLVMValueRef b)
{
const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
+ assert((mask & ~0xf) == 0);
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
if(a == b)
return a;
- if(cond[0] && cond[1] && cond[2] && cond[3])
+ if((mask & 0xf) == 0xf)
return a;
- if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
+ if((mask & 0xf) == 0x0)
return b;
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -449,7 +547,9 @@ lp_build_select_aos(struct lp_build_context *bld,
for(j = 0; j < n; j += 4)
for(i = 0; i < 4; ++i)
- shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
+ shuffles[j + i] = LLVMConstInt(elem_type,
+ (mask & (1 << i) ? 0 : n) + j + i,
+ 0);
return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
}
@@ -458,26 +558,17 @@ lp_build_select_aos(struct lp_build_context *bld,
/* XXX: Unfortunately select of vectors do not work */
/* Use a select */
LLVMTypeRef elem_type = LLVMInt1Type();
- LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH];
for(j = 0; j < n; j += 4)
for(i = 0; i < 4; ++i)
- cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
+ cond_vec[j + i] = LLVMConstInt(elem_type,
+ mask & (1 << i) ? 1 : 0, 0);
- return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
+ return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, "");
#else
- LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
- return lp_build_select(bld, mask, a, b);
+ LLVMValueRef mask_vec = lp_build_const_mask_aos(type, mask);
+ return lp_build_select(bld, mask_vec, a, b);
#endif
}
}
-
-
-/** Return (a & ~b) */
-LLVMValueRef
-lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
-{
- b = LLVMBuildNot(bld->builder, b, "");
- b = LLVMBuildAnd(bld->builder, a, b, "");
- return b;
-}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index 29f9fc3b205..141fb92058a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -63,6 +63,11 @@ lp_build_cmp(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
+LLVMValueRef
+lp_build_select_bitwise(struct lp_build_context *bld,
+ LLVMValueRef mask,
+ LLVMValueRef a,
+ LLVMValueRef b);
LLVMValueRef
lp_build_select(struct lp_build_context *bld,
@@ -72,13 +77,9 @@ lp_build_select(struct lp_build_context *bld,
LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
+ unsigned mask,
LLVMValueRef a,
- LLVMValueRef b,
- const boolean cond[4]);
-
-
-LLVMValueRef
-lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b);
+ LLVMValueRef b);
#endif /* !LP_BLD_LOGIC_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
new file mode 100644
index 00000000000..48baf7c425c
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -0,0 +1,180 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+
+#include <llvm-c/Core.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm/Target/TargetOptions.h>
+#include <llvm/ExecutionEngine/ExecutionEngine.h>
+#include <llvm/ExecutionEngine/JITEventListener.h>
+#include <llvm/Support/CommandLine.h>
+#include <llvm/Support/PrettyStackTrace.h>
+
+#include "pipe/p_config.h"
+#include "util/u_debug.h"
+
+
+#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBDDED)
+
+#include "llvm/Support/raw_ostream.h"
+
+class raw_debug_ostream :
+ public llvm::raw_ostream
+{
+ uint64_t pos;
+
+ void write_impl(const char *Ptr, size_t Size);
+ uint64_t current_pos() { return pos; }
+ uint64_t current_pos() const { return pos; }
+
+#if HAVE_LLVM >= 0x207
+ uint64_t preferred_buffer_size() { return 512; }
+#else
+ size_t preferred_buffer_size() { return 512; }
+#endif
+};
+
+
+void
+raw_debug_ostream::write_impl(const char *Ptr, size_t Size)
+{
+ if (Size > 0) {
+ char *lastPtr = (char *)&Ptr[Size];
+ char last = *lastPtr;
+ *lastPtr = 0;
+ _debug_printf("%*s", Size, Ptr);
+ *lastPtr = last;
+ pos += Size;
+ }
+}
+
+
+/**
+ * Same as LLVMDumpValue, but through our debugging channels.
+ */
+extern "C" void
+lp_debug_dump_value(LLVMValueRef value)
+{
+ raw_debug_ostream os;
+ llvm::unwrap(value)->print(os);
+ os.flush();
+}
+
+
+#else
+
+
+extern "C" void
+lp_debug_dump_value(LLVMValueRef value)
+{
+ LLVMDumpValue(value);
+}
+
+
+#endif
+
+
+/**
+ * Register the engine with oprofile.
+ *
+ * This allows to see the LLVM IR function names in oprofile output.
+ *
+ * To actually work LLVM needs to be built with the --with-oprofile configure
+ * option.
+ *
+ * Also a oprofile:oprofile user:group is necessary. Which is not created by
+ * default on some distributions.
+ */
+extern "C" void
+lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE)
+{
+ llvm::unwrap(EE)->RegisterJITEventListener(llvm::createOProfileJITEventListener());
+}
+
+
+extern "C" void
+lp_set_target_options(void)
+{
+#if defined(DEBUG)
+#if HAVE_LLVM >= 0x0207
+ llvm::JITEmitDebugInfo = true;
+#endif
+#endif
+
+#if defined(DEBUG) || defined(PROFILE)
+ llvm::NoFramePointerElim = true;
+#endif
+
+ llvm::NoExcessFPPrecision = false;
+
+ /* XXX: Investigate this */
+#if 0
+ llvm::UnsafeFPMath = true;
+#endif
+
+ /*
+ * LLVM will generate MMX instructions for vectors <= 64 bits, leading to
+ * innefficient code, and in 32bit systems, to the corruption of the FPU
+ * stack given that it expects the user to generate the EMMS instructions.
+ *
+ * See also:
+ * - http://llvm.org/bugs/show_bug.cgi?id=3287
+ * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
+ */
+ static boolean first = TRUE;
+ if (first) {
+ static const char* options[] = {
+ "prog",
+ "-disable-mmx"
+ };
+ llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
+ first = FALSE;
+ }
+
+ /*
+ * By default LLVM adds a signal handler to output a pretty stack trace.
+ * This signal handler is never removed, causing problems when unloading the
+ * shared object where the gallium driver resides.
+ */
+ llvm::DisablePrettyStackTrace = true;
+}
+
+
+extern "C" void
+lp_func_delete_body(LLVMValueRef FF)
+{
+ llvm::Function *func = llvm::unwrap<llvm::Function>(FF);
+ func->deleteBody();
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 186f8849b8d..f7eb7148ab8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -111,8 +111,6 @@ lp_build_const_pack_shuffle(unsigned n)
assert(n <= LP_MAX_VECTOR_LENGTH);
- /* TODO: cache results in a static table */
-
for(i = 0; i < n; ++i)
elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
@@ -171,14 +169,13 @@ lp_build_unpack2(LLVMBuilderRef builder,
msb = lp_build_zero(src_type);
/* Interleave bits */
- if(util_cpu_caps.little_endian) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
*dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
*dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
- }
- else {
+#else
*dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
*dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
- }
+#endif
/* Cast the result into the new type (twice as wide) */
@@ -261,13 +258,14 @@ lp_build_pack2(LLVMBuilderRef builder,
#endif
LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
LLVMValueRef shuffle;
- LLVMValueRef res;
+ LLVMValueRef res = NULL;
assert(!src_type.floating);
assert(!dst_type.floating);
assert(src_type.width == dst_type.width * 2);
assert(src_type.length * 2 == dst_type.length);
+ /* Check for special cases first */
if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
switch(src_type.width) {
case 32:
@@ -283,8 +281,8 @@ lp_build_pack2(LLVMBuilderRef builder,
return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
}
else {
- assert(0);
- return LLVMGetUndef(dst_vec_type);
+ /* use generic shuffle below */
+ res = NULL;
}
}
break;
@@ -310,10 +308,13 @@ lp_build_pack2(LLVMBuilderRef builder,
break;
}
- res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
- return res;
+ if (res) {
+ res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+ return res;
+ }
}
+ /* generic shuffle */
lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
@@ -427,3 +428,123 @@ lp_build_pack(LLVMBuilderRef builder,
return tmp[0];
}
+
+
+/**
+ * Truncate or expand the bitwidth.
+ *
+ * NOTE: Getting the right sign flags is crucial here, as we employ some
+ * intrinsics that do saturation.
+ */
+void
+lp_build_resize(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef *src, unsigned num_srcs,
+ LLVMValueRef *dst, unsigned num_dsts)
+{
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ /*
+ * We don't support float <-> int conversion here. That must be done
+ * before/after calling this function.
+ */
+ assert(src_type.floating == dst_type.floating);
+
+ /*
+ * We don't support double <-> float conversion yet, although it could be
+ * added with little effort.
+ */
+ assert((!src_type.floating && !dst_type.floating) ||
+ src_type.width == dst_type.width);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+ /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
+ assert(num_srcs == 1 || num_dsts == 1);
+
+ assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+ assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
+
+ if (src_type.width > dst_type.width) {
+ /*
+ * Truncate bit width.
+ */
+
+ assert(num_dsts == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector packing intrinsics
+ */
+
+ tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
+ }
+ else {
+ /*
+ * Do it element-wise.
+ */
+
+ assert(src_type.length == dst_type.length);
+ tmp[0] = lp_build_undef(dst_type);
+ for (i = 0; i < dst_type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+ val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), "");
+ tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+ }
+ }
+ }
+ else if (src_type.width < dst_type.width) {
+ /*
+ * Expand bit width.
+ */
+
+ assert(num_srcs == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector unpack intrinsics
+ */
+ lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts);
+ }
+ else {
+ /*
+ * Do it element-wise.
+ */
+
+ assert(src_type.length == dst_type.length);
+ tmp[0] = lp_build_undef(dst_type);
+ for (i = 0; i < dst_type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+
+ if (src_type.sign && dst_type.sign) {
+ val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), "");
+ } else {
+ val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), "");
+ }
+ tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+ }
+ }
+ }
+ else {
+ /*
+ * No-op
+ */
+
+ assert(num_srcs == 1);
+ assert(num_dsts == 1);
+
+ tmp[0] = src[0];
+ }
+
+ for(i = 0; i < num_dsts; ++i)
+ dst[i] = tmp[i];
+}
+
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index 41adeed220c..e947b90d164 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -37,6 +37,8 @@
#define LP_BLD_PACK_H
+#include "pipe/p_compiler.h"
+
#include "gallivm/lp_bld.h"
@@ -92,4 +94,12 @@ lp_build_pack(LLVMBuilderRef builder,
const LLVMValueRef *src, unsigned num_srcs);
+void
+lp_build_resize(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef *src, unsigned num_srcs,
+ LLVMValueRef *dst, unsigned num_dsts);
+
+
#endif /* !LP_BLD_PACK_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
new file mode 100644
index 00000000000..7b1088939b9
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_quad.h"
+
+
+static const unsigned char
+swizzle_left[4] = {
+ LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
+ LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_LEFT
+};
+
+static const unsigned char
+swizzle_right[4] = {
+ LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_TOP_RIGHT,
+ LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT
+};
+
+static const unsigned char
+swizzle_top[4] = {
+ LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT,
+ LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT
+};
+
+static const unsigned char
+swizzle_bottom[4] = {
+ LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT,
+ LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT
+};
+
+
+LLVMValueRef
+lp_build_ddx(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left);
+ LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
+ return lp_build_sub(bld, a_right, a_left);
+}
+
+
+LLVMValueRef
+lp_build_ddy(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top);
+ LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
+ return lp_build_sub(bld, a_bottom, a_top);
+}
+
+
+LLVMValueRef
+lp_build_scalar_ddx(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ LLVMValueRef idx_left = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
+ LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
+ LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
+ LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
+ return lp_build_sub(bld, a_right, a_left);
+}
+
+
+LLVMValueRef
+lp_build_scalar_ddy(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ LLVMValueRef idx_top = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
+ LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
+ LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
+ LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
+ return lp_build_sub(bld, a_bottom, a_top);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.h b/src/gallium/auxiliary/gallivm/lp_bld_quad.h
new file mode 100644
index 00000000000..b7992912927
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.h
@@ -0,0 +1,96 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_QUAD_H_
+#define LP_BLD_QUAD_H_
+
+
+#include "gallivm/lp_bld.h"
+
+
+struct lp_build_context;
+
+
+/*
+ * Each quad is composed of four elements.
+ *
+ * #########
+ * # 0 | 1 #
+ * #---+---#
+ * # 2 | 3 #
+ * #########
+ */
+
+enum lp_bld_quad {
+ LP_BLD_QUAD_TOP_LEFT = 0,
+ LP_BLD_QUAD_TOP_RIGHT = 1,
+ LP_BLD_QUAD_BOTTOM_LEFT = 2,
+ LP_BLD_QUAD_BOTTOM_RIGHT = 3
+};
+
+
+/*
+ * (Vector) derivates.
+ *
+ * More than one quad is supported. The only requirement is that the vector
+ * contains a whole number of quads:
+ *
+ * ######### ######### ...
+ * # 0 | 1 # # 4 | 5 #
+ * #---+---# #---+---# ...
+ * # 2 | 3 # # 6 | 7 #
+ * ######### ######### ...
+ */
+
+LLVMValueRef
+lp_build_ddx(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+
+LLVMValueRef
+lp_build_ddy(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+
+/*
+ * Scalar derivatives.
+ *
+ * Same as getting the first value of above.
+ */
+
+LLVMValueRef
+lp_build_scalar_ddx(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+
+LLVMValueRef
+lp_build_scalar_ddy(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+
+#endif /* LP_BLD_QUAD_H_ */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 195a4953ab1..7a64392d3c1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -36,12 +36,54 @@
#include "pipe/p_state.h"
#include "util/u_format.h"
#include "util/u_math.h"
-#include "lp_bld_debug.h"
-#include "lp_bld_const.h"
#include "lp_bld_arit.h"
-#include "lp_bld_type.h"
-#include "lp_bld_format.h"
+#include "lp_bld_const.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_printf.h"
+#include "lp_bld_flow.h"
#include "lp_bld_sample.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_type.h"
+
+
+/*
+ * Bri-linear factor. Use zero or any other number less than one to force
+ * tri-linear filtering.
+ */
+#define BRILINEAR_FACTOR 2
+
+
+/**
+ * Does the given texture wrap mode allow sampling the texture border color?
+ * XXX maybe move this into gallium util code.
+ */
+boolean
+lp_sampler_wrap_mode_uses_border_color(unsigned mode,
+ unsigned min_img_filter,
+ unsigned mag_img_filter)
+{
+ switch (mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return FALSE;
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return TRUE;
+ default:
+ assert(0 && "unexpected wrap mode");
+ return FALSE;
+ }
+}
/**
@@ -83,34 +125,49 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->swizzle_a = view->swizzle_a;
state->target = texture->target;
- state->pot_width = util_is_pot(texture->width0);
- state->pot_height = util_is_pot(texture->height0);
- state->pot_depth = util_is_pot(texture->depth0);
+ state->pot_width = util_is_power_of_two(texture->width0);
+ state->pot_height = util_is_power_of_two(texture->height0);
+ state->pot_depth = util_is_power_of_two(texture->depth0);
state->wrap_s = sampler->wrap_s;
state->wrap_t = sampler->wrap_t;
state->wrap_r = sampler->wrap_r;
state->min_img_filter = sampler->min_img_filter;
state->mag_img_filter = sampler->mag_img_filter;
- if (texture->last_level) {
+
+ if (view->last_level && sampler->max_lod > 0.0f) {
state->min_mip_filter = sampler->min_mip_filter;
} else {
state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
}
+ if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ if (sampler->lod_bias != 0.0f) {
+ state->lod_bias_non_zero = 1;
+ }
+
+ /* If min_lod == max_lod we can greatly simplify mipmap selection.
+ * This is a case that occurs during automatic mipmap generation.
+ */
+ if (sampler->min_lod == sampler->max_lod) {
+ state->min_max_lod_equal = 1;
+ } else {
+ if (sampler->min_lod > 0.0f) {
+ state->apply_min_lod = 1;
+ }
+
+ if (sampler->max_lod < (float)view->last_level) {
+ state->apply_max_lod = 1;
+ }
+ }
+ }
+
state->compare_mode = sampler->compare_mode;
if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
state->compare_func = sampler->compare_func;
}
state->normalized_coords = sampler->normalized_coords;
- state->lod_bias = sampler->lod_bias;
- state->min_lod = sampler->min_lod;
- state->max_lod = sampler->max_lod;
- state->border_color[0] = sampler->border_color[0];
- state->border_color[1] = sampler->border_color[1];
- state->border_color[2] = sampler->border_color[2];
- state->border_color[3] = sampler->border_color[3];
/*
* FIXME: Handle the remainder of pipe_sampler_view.
@@ -119,84 +176,817 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
- * Gather elements from scatter positions in memory into a single vector.
+ * Generate code to compute coordinate gradient (rho).
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
+ *
+ * XXX: The resulting rho is scalar, so we ignore all but the first element of
+ * derivatives that are passed by the shader.
+ */
+static LLVMValueRef
+lp_build_rho(struct lp_build_sample_context *bld,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4])
+{
+ struct lp_build_context *float_size_bld = &bld->float_size_bld;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ const unsigned dims = bld->dims;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+ LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
+ LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
+ LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+ LLVMValueRef rho_x, rho_y;
+ LLVMValueRef rho_vec;
+ LLVMValueRef float_size;
+ LLVMValueRef rho;
+
+ dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
+ dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
+
+ if (dims <= 1) {
+ rho_x = dsdx;
+ rho_y = dsdy;
+ }
+ else {
+ rho_x = float_size_bld->undef;
+ rho_y = float_size_bld->undef;
+
+ rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, "");
+ rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, "");
+
+ dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
+ dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
+
+ rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, "");
+ rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, "");
+
+ if (dims >= 3) {
+ drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
+ drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
+
+ rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, "");
+ rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, "");
+ }
+ }
+
+ rho_x = lp_build_abs(float_size_bld, rho_x);
+ rho_y = lp_build_abs(float_size_bld, rho_y);
+
+ rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);
+
+ float_size = lp_build_int_to_float(float_size_bld, bld->int_size);
+
+ rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
+
+ if (dims <= 1) {
+ rho = rho_vec;
+ }
+ else {
+ if (dims >= 2) {
+ LLVMValueRef rho_s, rho_t, rho_r;
+
+ rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
+ rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, "");
+
+ rho = lp_build_max(float_bld, rho_s, rho_t);
+
+ if (dims >= 3) {
+ rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
+ rho = lp_build_max(float_bld, rho, rho_r);
+ }
+ }
+ }
+
+ return rho;
+}
+
+
+/*
+ * Bri-linear lod computation
+ *
+ * Use a piece-wise linear approximation of log2 such that:
+ * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
+ * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
+ * with the steepness specified in 'factor'
+ * - exact result for 0.5, 1.5, etc.
+ *
+ *
+ * 1.0 - /----*
+ * /
+ * /
+ * /
+ * 0.5 - *
+ * /
+ * /
+ * /
+ * 0.0 - *----/
+ *
+ * | |
+ * 2^0 2^1
+ *
+ * This is a technique also commonly used in hardware:
+ * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
+ *
+ * TODO: For correctness, this should only be applied when texture is known to
+ * have regular mipmaps, i.e., mipmaps derived from the base level.
*
- * @param src_width src element width
- * @param dst_width result element width (source will be expanded to fit)
- * @param length length of the offsets,
- * @param base_ptr base pointer, should be a i8 pointer type.
- * @param offsets vector with offsets
+ * TODO: This could be done in fixed point, where applicable.
+ */
+static void
+lp_build_brilinear_lod(struct lp_build_sample_context *bld,
+ LLVMValueRef lod,
+ double factor,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef lod_fpart;
+ float pre_offset = (factor - 0.5)/factor - 0.5;
+ float post_offset = 1 - factor;
+
+ if (0) {
+ lp_build_printf(bld->builder, "lod = %f\n", lod);
+ }
+
+ lod = lp_build_add(float_bld, lod,
+ lp_build_const_vec(float_bld->type, pre_offset));
+
+ lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, &lod_fpart);
+
+ lod_fpart = lp_build_mul(float_bld, lod_fpart,
+ lp_build_const_vec(float_bld->type, factor));
+
+ lod_fpart = lp_build_add(float_bld, lod_fpart,
+ lp_build_const_vec(float_bld->type, post_offset));
+
+ /*
+ * It's not necessary to clamp lod_fpart since:
+ * - the above expression will never produce numbers greater than one.
+ * - the mip filtering branch is only taken if lod_fpart is positive
+ */
+
+ *out_lod_fpart = lod_fpart;
+
+ if (0) {
+ lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart);
+ lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart);
+ }
+}
+
+
+/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
+ * \param lod_bias optional float vector with the shader lod bias
+ * \param explicit_lod optional float vector with the explicit lod
+ * \param width scalar int texture width
+ * \param height scalar int texture height
+ * \param depth scalar int texture depth
+ *
+ * XXX: The resulting lod is scalar, so ignore all but the first element of
+ * derivatives, lod_bias, etc that are passed by the shader.
+ */
+void
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+ unsigned unit,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ unsigned mip_filter,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
+
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef lod;
+
+ *out_lod_ipart = bld->int_bld.zero;
+ *out_lod_fpart = bld->float_bld.zero;
+
+ if (bld->static_state->min_max_lod_equal) {
+ /* User is forcing sampling from a particular mipmap level.
+ * This is hit during mipmap generation.
+ */
+ LLVMValueRef min_lod =
+ bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
+
+ lod = min_lod;
+ }
+ else {
+ LLVMValueRef sampler_lod_bias =
+ bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
+ LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ if (explicit_lod) {
+ lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
+ index0, "");
+ }
+ else {
+ LLVMValueRef rho;
+
+ rho = lp_build_rho(bld, ddx, ddy);
+
+ /* compute lod = log2(rho) */
+ if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
+ mip_filter == PIPE_TEX_MIPFILTER_NEAREST) &&
+ !lod_bias &&
+ !bld->static_state->lod_bias_non_zero &&
+ !bld->static_state->apply_max_lod &&
+ !bld->static_state->apply_min_lod) {
+ *out_lod_ipart = lp_build_ilog2(float_bld, rho);
+ *out_lod_fpart = bld->float_bld.zero;
+ return;
+ }
+
+ if (0) {
+ lod = lp_build_log2(float_bld, rho);
+ }
+ else {
+ lod = lp_build_fast_log2(float_bld, rho);
+ }
+
+ /* add shader lod bias */
+ if (lod_bias) {
+ lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
+ index0, "");
+ lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+ }
+ }
+
+ /* add sampler lod bias */
+ if (bld->static_state->lod_bias_non_zero)
+ lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+
+
+ /* clamp lod */
+ if (bld->static_state->apply_max_lod) {
+ LLVMValueRef max_lod =
+ bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit);
+
+ lod = lp_build_min(float_bld, lod, max_lod);
+ }
+ if (bld->static_state->apply_min_lod) {
+ LLVMValueRef min_lod =
+ bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
+
+ lod = lp_build_max(float_bld, lod, min_lod);
+ }
+ }
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ if (BRILINEAR_FACTOR > 1.0) {
+ lp_build_brilinear_lod(bld, lod, BRILINEAR_FACTOR,
+ out_lod_ipart, out_lod_fpart);
+ }
+ else {
+ lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
+ }
+
+ lp_build_name(*out_lod_ipart, "lod_ipart");
+ lp_build_name(*out_lod_fpart, "lod_fpart");
+ }
+ else {
+ *out_lod_ipart = lp_build_iround(float_bld, lod);
+ }
+
+ return;
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod scalar float texture level of detail
+ * \param level_out returns integer
+ */
+void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod_ipart,
+ LLVMValueRef *level_out)
+{
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMValueRef last_level, level;
+
+ LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
+
+ /* convert float lod to integer */
+ level = lod_ipart;
+
+ /* clamp level to legal range of levels */
+ *level_out = lp_build_clamp(int_bld, level, zero, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes. Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod_ipart,
+ LLVMValueRef *lod_fpart_inout,
+ LLVMValueRef *level0_out,
+ LLVMValueRef *level1_out)
+{
+ LLVMBuilderRef builder = bld->builder;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef last_level;
+ LLVMValueRef clamp_min;
+ LLVMValueRef clamp_max;
+
+ *level0_out = lod_ipart;
+ *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one);
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
+
+ /*
+ * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the
+ * minimum number of comparisons, and zeroing lod_fpart in the extreme
+ * ends in the process.
+ */
+
+ /* lod_ipart < 0 */
+ clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
+ lod_ipart, int_bld->zero,
+ "clamp_lod_to_zero");
+
+ *level0_out = LLVMBuildSelect(builder, clamp_min,
+ int_bld->zero, *level0_out, "");
+
+ *level1_out = LLVMBuildSelect(builder, clamp_min,
+ int_bld->zero, *level1_out, "");
+
+ *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
+ float_bld->zero, *lod_fpart_inout, "");
+
+ /* lod_ipart >= last_level */
+ clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, last_level,
+ "clamp_lod_to_last");
+
+ *level0_out = LLVMBuildSelect(builder, clamp_max,
+ last_level, *level0_out, "");
+
+ *level1_out = LLVMBuildSelect(builder, clamp_max,
+ last_level, *level1_out, "");
+
+ *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
+ float_bld->zero, *lod_fpart_inout, "");
+
+ lp_build_name(*level0_out, "sampler%u_miplevel0", unit);
+ lp_build_name(*level1_out, "sampler%u_miplevel1", unit);
+ lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit);
+}
+
+
+/**
+ * Return pointer to a single mipmap level.
+ * \param data_array array of pointers to mipmap levels
+ * \param level integer mipmap level
*/
LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets)
-{
- LLVMTypeRef src_type = LLVMIntType(src_width);
- LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
- LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
- LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
- LLVMValueRef res;
- unsigned i;
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef level)
+{
+ LLVMValueRef indexes[2], data_ptr;
+ indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indexes[1] = level;
+ data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, "");
+ data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
+ return data_ptr;
+}
+
+
+LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+ int level)
+{
+ LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+ return lp_build_get_mipmap_level(bld, lvl);
+}
+
+
+/**
+ * Codegen equivalent for u_minify().
+ * Return max(1, base_size >> level);
+ */
+static LLVMValueRef
+lp_build_minify(struct lp_build_context *bld,
+ LLVMValueRef base_size,
+ LLVMValueRef level)
+{
+ assert(lp_check_value(bld->type, base_size));
+ assert(lp_check_value(bld->type, level));
+
+ if (level == bld->zero) {
+ /* if we're using mipmap level zero, no minification is needed */
+ return base_size;
+ }
+ else {
+ LLVMValueRef size =
+ LLVMBuildLShr(bld->builder, base_size, level, "minify");
+ assert(bld->type.sign);
+ size = lp_build_max(bld, size, bld->one);
+ return size;
+ }
+}
- res = LLVMGetUndef(dst_vec_type);
- for(i = 0; i < length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef elem_offset;
- LLVMValueRef elem_ptr;
- LLVMValueRef elem;
- elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
- elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
- elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
- elem = LLVMBuildLoad(builder, elem_ptr, "");
+/**
+ * Dereference stride_array[mipmap_level] array to get a stride.
+ * Return stride as a vector.
+ */
+static LLVMValueRef
+lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
+ LLVMValueRef stride_array, LLVMValueRef level)
+{
+ LLVMValueRef indexes[2], stride;
+ indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indexes[1] = level;
+ stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
+ stride = LLVMBuildLoad(bld->builder, stride, "");
+ stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
+ return stride;
+}
+
+
+/**
+ * When sampling a mipmap, we need to compute the width, height, depth
+ * of the source levels from the level indexes. This helper function
+ * does that.
+ */
+void
+lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
+ LLVMValueRef ilevel,
+ LLVMValueRef *out_width_vec,
+ LLVMValueRef *out_height_vec,
+ LLVMValueRef *out_depth_vec,
+ LLVMValueRef *row_stride_vec,
+ LLVMValueRef *img_stride_vec)
+{
+ const unsigned dims = bld->dims;
+ LLVMValueRef ilevel_vec;
+ LLVMValueRef size_vec;
+ LLVMTypeRef i32t = LLVMInt32Type();
+
+ ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
+
+ /*
+ * Compute width, height, depth at mipmap level 'ilevel'
+ */
+ size_vec = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+
+ *out_width_vec = lp_build_extract_broadcast(bld->builder,
+ bld->int_size_type,
+ bld->int_coord_type,
+ size_vec,
+ LLVMConstInt(i32t, 0, 0));
+ if (dims >= 2) {
+
+ *out_height_vec = lp_build_extract_broadcast(bld->builder,
+ bld->int_size_type,
+ bld->int_coord_type,
+ size_vec,
+ LLVMConstInt(i32t, 1, 0));
+
+ *row_stride_vec = lp_build_get_level_stride_vec(bld,
+ bld->row_stride_array,
+ ilevel);
+ if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ *img_stride_vec = lp_build_get_level_stride_vec(bld,
+ bld->img_stride_array,
+ ilevel);
+ if (dims == 3) {
+ *out_depth_vec = lp_build_extract_broadcast(bld->builder,
+ bld->int_size_type,
+ bld->int_coord_type,
+ size_vec,
+ LLVMConstInt(i32t, 2, 0));
+ }
+ }
+ }
+}
+
+
+
+/** Helper used by lp_build_cube_lookup() */
+static LLVMValueRef
+lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
+{
+ /* ima = -0.5 / abs(coord); */
+ LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
+ LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
+ LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
+ return ima;
+}
+
+
+/**
+ * Helper used by lp_build_cube_lookup()
+ * \param sign scalar +1 or -1
+ * \param coord float vector
+ * \param ima float vector
+ */
+static LLVMValueRef
+lp_build_cube_coord(struct lp_build_context *coord_bld,
+ LLVMValueRef sign, int negate_coord,
+ LLVMValueRef coord, LLVMValueRef ima)
+{
+ /* return negate(coord) * ima * sign + 0.5; */
+ LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
+ LLVMValueRef res;
+
+ assert(negate_coord == +1 || negate_coord == -1);
- assert(src_width <= dst_width);
- if(src_width > dst_width)
- elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
- if(src_width < dst_width)
- elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+ if (negate_coord == -1) {
+ coord = lp_build_negate(coord_bld, coord);
+ }
- res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ res = lp_build_mul(coord_bld, coord, ima);
+ if (sign) {
+ sign = lp_build_broadcast_scalar(coord_bld, sign);
+ res = lp_build_mul(coord_bld, res, sign);
}
+ res = lp_build_add(coord_bld, res, half);
return res;
}
+/** Helper used by lp_build_cube_lookup()
+ * Return (major_coord >= 0) ? pos_face : neg_face;
+ */
+static LLVMValueRef
+lp_build_cube_face(struct lp_build_sample_context *bld,
+ LLVMValueRef major_coord,
+ unsigned pos_face, unsigned neg_face)
+{
+ LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
+ major_coord,
+ bld->float_bld.zero, "");
+ LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
+ LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
+ LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
+ return res;
+}
+
+
+
+/**
+ * Generate code to do cube face selection and compute per-face texcoords.
+ */
+void
+lp_build_cube_lookup(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *face,
+ LLVMValueRef *face_s,
+ LLVMValueRef *face_t)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ LLVMValueRef rx, ry, rz;
+ LLVMValueRef arx, ary, arz;
+ LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
+ LLVMValueRef arx_ge_ary, arx_ge_arz;
+ LLVMValueRef ary_ge_arx, ary_ge_arz;
+ LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
+ LLVMValueRef rx_pos, ry_pos, rz_pos;
+
+ assert(bld->coord_bld.type.length == 4);
+
+ /*
+ * Use the average of the four pixel's texcoords to choose the face.
+ */
+ rx = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, s));
+ ry = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, t));
+ rz = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, r));
+
+ arx = lp_build_abs(float_bld, rx);
+ ary = lp_build_abs(float_bld, ry);
+ arz = lp_build_abs(float_bld, rz);
+
+ /*
+ * Compare sign/magnitude of rx,ry,rz to determine face
+ */
+ arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
+ arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
+ ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
+ ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
+
+ arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
+ ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
+
+ rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
+ ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
+ rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
+
+ {
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+
+ flow_ctx = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx);
+
+ *face_s = bld->coord_bld.undef;
+ *face_t = bld->coord_bld.undef;
+ *face = bld->int_bld.undef;
+
+ lp_build_name(*face_s, "face_s");
+ lp_build_name(*face_t, "face_t");
+ lp_build_name(*face, "face");
+
+ lp_build_flow_scope_declare(flow_ctx, face_s);
+ lp_build_flow_scope_declare(flow_ctx, face_t);
+ lp_build_flow_scope_declare(flow_ctx, face);
+
+ lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
+ {
+ /* +/- X face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, rx);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
+ *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
+ *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ *face = lp_build_cube_face(bld, rx,
+ PIPE_TEX_FACE_POS_X,
+ PIPE_TEX_FACE_NEG_X);
+ }
+ lp_build_else(&if_ctx);
+ {
+ struct lp_build_flow_context *flow_ctx2;
+ struct lp_build_if_state if_ctx2;
+
+ LLVMValueRef face_s2 = bld->coord_bld.undef;
+ LLVMValueRef face_t2 = bld->coord_bld.undef;
+ LLVMValueRef face2 = bld->int_bld.undef;
+
+ flow_ctx2 = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx2);
+ lp_build_flow_scope_declare(flow_ctx2, &face_s2);
+ lp_build_flow_scope_declare(flow_ctx2, &face_t2);
+ lp_build_flow_scope_declare(flow_ctx2, &face2);
+
+ ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
+
+ lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
+ {
+ /* +/- Y face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, ry);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
+ face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
+ face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
+ face2 = lp_build_cube_face(bld, ry,
+ PIPE_TEX_FACE_POS_Y,
+ PIPE_TEX_FACE_NEG_Y);
+ }
+ lp_build_else(&if_ctx2);
+ {
+ /* +/- Z face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, rz);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
+ face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
+ face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ face2 = lp_build_cube_face(bld, rz,
+ PIPE_TEX_FACE_POS_Z,
+ PIPE_TEX_FACE_NEG_Z);
+ }
+ lp_build_endif(&if_ctx2);
+ lp_build_flow_scope_end(flow_ctx2);
+ lp_build_flow_destroy(flow_ctx2);
+ *face_s = face_s2;
+ *face_t = face_t2;
+ *face = face2;
+ }
+
+ lp_build_endif(&if_ctx);
+ lp_build_flow_scope_end(flow_ctx);
+ lp_build_flow_destroy(flow_ctx);
+ }
+}
+
+
+/**
+ * Compute the partial offset of a pixel block along an arbitrary axis.
+ *
+ * @param coord coordinate in pixels
+ * @param stride number of bytes between rows of successive pixel blocks
+ * @param block_length number of pixels in a pixels block along the coordinate
+ * axis
+ * @param out_offset resulting relative offset of the pixel block in bytes
+ * @param out_subcoord resulting sub-block pixel coordinate
+ */
+void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_subcoord)
+{
+ LLVMValueRef offset;
+ LLVMValueRef subcoord;
+
+ if (block_length == 1) {
+ subcoord = bld->zero;
+ }
+ else {
+ /*
+ * Pixel blocks have power of two dimensions. LLVM should convert the
+ * rem/div to bit arithmetic.
+ * TODO: Verify this.
+ * It does indeed BUT it does transform it to scalar (and back) when doing so
+ * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
+ * The generated code looks seriously unfunny and is quite expensive.
+ */
+#if 0
+ LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
+ subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
+ coord = LLVMBuildUDiv(bld->builder, coord, block_width, "");
+#else
+ unsigned logbase2 = util_unsigned_logbase2(block_length);
+ LLVMValueRef block_shift = lp_build_const_int_vec(bld->type, logbase2);
+ LLVMValueRef block_mask = lp_build_const_int_vec(bld->type, block_length - 1);
+ subcoord = LLVMBuildAnd(bld->builder, coord, block_mask, "");
+ coord = LLVMBuildLShr(bld->builder, coord, block_shift, "");
+#endif
+ }
+
+ offset = lp_build_mul(bld, coord, stride);
+
+ assert(out_offset);
+ assert(out_subcoord);
+
+ *out_offset = offset;
+ *out_subcoord = subcoord;
+}
+
+
/**
* Compute the offset of a pixel block.
*
- * x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as
- * per format description, and not individual pixels.
+ * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
+ *
+ * Returns the relative offset and i,j sub-block coordinates
*/
-LLVMValueRef
+void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef z_stride)
+ LLVMValueRef z_stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i,
+ LLVMValueRef *out_j)
{
LLVMValueRef x_stride;
LLVMValueRef offset;
x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
- offset = lp_build_mul(bld, x, x_stride);
+
+ lp_build_sample_partial_offset(bld,
+ format_desc->block.width,
+ x, x_stride,
+ &offset, out_i);
if (y && y_stride) {
- LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
+ LLVMValueRef y_offset;
+ lp_build_sample_partial_offset(bld,
+ format_desc->block.height,
+ y, y_stride,
+ &y_offset, out_j);
offset = lp_build_add(bld, offset, y_offset);
}
+ else {
+ *out_j = bld->zero;
+ }
if (z && z_stride) {
- LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
+ LLVMValueRef z_offset;
+ LLVMValueRef k;
+ lp_build_sample_partial_offset(bld,
+ 1, /* pixel blocks are always 2D */
+ z, z_stride,
+ &z_offset, &k);
offset = lp_build_add(bld, offset, z_offset);
}
- return offset;
+ *out_offset = offset;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 8ceb20473d5..d1a1aa143d8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -36,7 +36,12 @@
#define LP_BLD_SAMPLE_H
+#include "pipe/p_format.h"
+#include "util/u_debug.h"
#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_swizzle.h"
+
struct pipe_resource;
struct pipe_sampler_view;
@@ -56,14 +61,14 @@ struct lp_sampler_static_state
{
/* pipe_sampler_view's state */
enum pipe_format format;
- unsigned swizzle_r:3;
+ unsigned swizzle_r:3; /**< PIPE_SWIZZLE_* */
unsigned swizzle_g:3;
unsigned swizzle_b:3;
unsigned swizzle_a:3;
/* pipe_texture's state */
- unsigned target:3;
- unsigned pot_width:1;
+ unsigned target:3; /**< PIPE_TEXTURE_* */
+ unsigned pot_width:1; /**< is the width a power of two? */
unsigned pot_height:1;
unsigned pot_depth:1;
@@ -77,8 +82,10 @@ struct lp_sampler_static_state
unsigned compare_mode:1;
unsigned compare_func:3;
unsigned normalized_coords:1;
- float lod_bias, min_lod, max_lod;
- float border_color[4];
+ unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */
+ unsigned lod_bias_non_zero:1;
+ unsigned apply_min_lod:1; /**< min_lod > 0 ? */
+ unsigned apply_max_lod:1; /**< max_lod < last_level ? */
};
@@ -95,48 +102,191 @@ struct lp_sampler_static_state
struct lp_sampler_dynamic_state
{
- /** Obtain the base texture width. */
+ /** Obtain the base texture width (returns int32) */
LLVMValueRef
- (*width)( struct lp_sampler_dynamic_state *state,
+ (*width)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain the base texture height. */
+ /** Obtain the base texture height (returns int32) */
LLVMValueRef
- (*height)( struct lp_sampler_dynamic_state *state,
+ (*height)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain the base texture depth. */
+ /** Obtain the base texture depth (returns int32) */
LLVMValueRef
- (*depth)( struct lp_sampler_dynamic_state *state,
+ (*depth)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain the number of mipmap levels (minus one). */
+ /** Obtain the number of mipmap levels minus one (returns int32) */
LLVMValueRef
- (*last_level)( struct lp_sampler_dynamic_state *state,
+ (*last_level)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain stride in bytes between image rows/blocks (returns int32) */
LLVMValueRef
- (*row_stride)( struct lp_sampler_dynamic_state *state,
+ (*row_stride)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain stride in bytes between image slices (returns int32) */
LLVMValueRef
- (*img_stride)( struct lp_sampler_dynamic_state *state,
+ (*img_stride)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain pointer to array of pointers to mimpap levels */
LLVMValueRef
- (*data_ptr)( struct lp_sampler_dynamic_state *state,
+ (*data_ptr)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain texture min lod (returns float) */
+ LLVMValueRef
+ (*min_lod)(const struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder, unsigned unit);
+
+ /** Obtain texture max lod (returns float) */
+ LLVMValueRef
+ (*max_lod)(const struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder, unsigned unit);
+
+ /** Obtain texture lod bias (returns float) */
+ LLVMValueRef
+ (*lod_bias)(const struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder, unsigned unit);
+
+ /** Obtain texture border color (returns ptr to float[4]) */
+ LLVMValueRef
+ (*border_color)(const struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder, unsigned unit);
+};
+
+
+/**
+ * Keep all information for sampling code generation in a single place.
+ */
+struct lp_build_sample_context
+{
+ LLVMBuilderRef builder;
+
+ const struct lp_sampler_static_state *static_state;
+
+ struct lp_sampler_dynamic_state *dynamic_state;
+
+ const struct util_format_description *format_desc;
+
+ /* See texture_dims() */
+ unsigned dims;
+
+ /** regular scalar float type */
+ struct lp_type float_type;
+ struct lp_build_context float_bld;
+
+ /** float vector type */
+ struct lp_build_context float_vec_bld;
+
+ /** regular scalar float type */
+ struct lp_type int_type;
+ struct lp_build_context int_bld;
+
+ /** Incoming coordinates type and build context */
+ struct lp_type coord_type;
+ struct lp_build_context coord_bld;
+
+ /** Unsigned integer coordinates */
+ struct lp_type uint_coord_type;
+ struct lp_build_context uint_coord_bld;
+
+ /** Signed integer coordinates */
+ struct lp_type int_coord_type;
+ struct lp_build_context int_coord_bld;
+
+ /** Unsigned integer texture size */
+ struct lp_type int_size_type;
+ struct lp_build_context int_size_bld;
+
+ /** Unsigned integer texture size */
+ struct lp_type float_size_type;
+ struct lp_build_context float_size_bld;
+
+ /** Output texels type and build context */
+ struct lp_type texel_type;
+ struct lp_build_context texel_bld;
+
+ /* Common dynamic state values */
+ LLVMValueRef width;
+ LLVMValueRef height;
+ LLVMValueRef depth;
+ LLVMValueRef row_stride_array;
+ LLVMValueRef img_stride_array;
+ LLVMValueRef data_array;
+
+ /** Integer vector with texture width, height, depth */
+ LLVMValueRef int_size;
};
+
+/**
+ * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at
+ * this time. Return whether the given mode is supported by that function.
+ */
+static INLINE boolean
+lp_is_simple_wrap_mode(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+
+static INLINE void
+apply_sampler_swizzle(struct lp_build_sample_context *bld,
+ LLVMValueRef *texel)
+{
+ unsigned char swizzles[4];
+
+ swizzles[0] = bld->static_state->swizzle_r;
+ swizzles[1] = bld->static_state->swizzle_g;
+ swizzles[2] = bld->static_state->swizzle_b;
+ swizzles[3] = bld->static_state->swizzle_a;
+
+ lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
+}
+
+
+static INLINE unsigned
+texture_dims(enum pipe_texture_target tex)
+{
+ switch (tex) {
+ case PIPE_TEXTURE_1D:
+ return 1;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_CUBE:
+ return 2;
+ case PIPE_TEXTURE_3D:
+ return 3;
+ default:
+ assert(0 && "bad texture target in texture_dims()");
+ return 2;
+ }
+}
+
+
+boolean
+lp_sampler_wrap_mode_uses_border_color(unsigned mode,
+ unsigned min_img_filter,
+ unsigned mag_img_filter);
+
/**
* Derive the sampler static state.
*/
@@ -146,23 +296,80 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets);
+void
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+ unsigned unit,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ unsigned mip_filter,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart);
+
+void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level_out);
+void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod_ipart,
+ LLVMValueRef *lod_fpart_inout,
+ LLVMValueRef *level0_out,
+ LLVMValueRef *level1_out);
+
+LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef level);
LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+ int level);
+
+
+void
+lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
+ LLVMValueRef ilevel,
+ LLVMValueRef *out_width_vec,
+ LLVMValueRef *out_height_vec,
+ LLVMValueRef *out_depth_vec,
+ LLVMValueRef *row_stride_vec,
+ LLVMValueRef *img_stride_vec);
+
+
+void
+lp_build_cube_lookup(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *face,
+ LLVMValueRef *face_s,
+ LLVMValueRef *face_t);
+
+
+void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i);
+
+
+void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef z_stride);
+ LLVMValueRef z_stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i,
+ LLVMValueRef *out_j);
void
@@ -173,9 +380,15 @@ lp_build_sample_soa(LLVMBuilderRef builder,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- LLVMValueRef lodbias,
- LLVMValueRef *texel);
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias,
+ LLVMValueRef explicit_lod,
+ LLVMValueRef texel_out[4]);
+void
+lp_build_sample_nop(struct lp_type type,
+ LLVMValueRef texel_out[4]);
#endif /* LP_BLD_SAMPLE_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
new file mode 100644
index 00000000000..e7410448c04
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -0,0 +1,1092 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling -- SoA.
+ *
+ * @author Jose Fonseca <[email protected]>
+ * @author Brian Paul <[email protected]>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "util/u_dump.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_format.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_pack.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+#include "lp_bld_sample_aos.h"
+#include "lp_bld_quad.h"
+
+
+/**
+ * Build LLVM code for texture coord wrapping, for nearest filtering,
+ * for scaled integer texcoords.
+ * \param block_length is the length of the pixel block along the
+ * coordinate axis
+ * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length the texture size along one dimension
+ * \param stride pixel stride along the coordinate axis (in bytes)
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param out_offset byte offset for the wrapped coordinate
+ * \param out_i resulting sub-block pixel coordinate for coord0
+ */
+static void
+lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i)
+{
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+
+ length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if(is_pot)
+ coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
+ else {
+ /* Add a bias to the texcoord to handle negative coords */
+ LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+ coord = LLVMBuildAdd(bld->builder, coord, bias, "");
+ coord = LLVMBuildURem(bld->builder, coord, length, "");
+ }
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+ coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ default:
+ assert(0);
+ }
+
+ lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+ out_offset, out_i);
+}
+
+
+/**
+ * Build LLVM code for texture coord wrapping, for linear filtering,
+ * for scaled integer texcoords.
+ * \param block_length is the length of the pixel block along the
+ * coordinate axis
+ * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length the texture size along one dimension
+ * \param stride pixel stride along the coordinate axis (in bytes)
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param offset0 resulting relative offset for coord0
+ * \param offset1 resulting relative offset for coord0 + 1
+ * \param i0 resulting sub-block pixel coordinate for coord0
+ * \param i1 resulting sub-block pixel coordinate for coord0 + 1
+ */
+static void
+lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord0,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *offset0,
+ LLVMValueRef *offset1,
+ LLVMValueRef *i0,
+ LLVMValueRef *i1)
+{
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+ LLVMValueRef lmask, umask, mask;
+
+ if (block_length != 1) {
+ /*
+ * If the pixel block covers more than one pixel then there is no easy
+ * way to calculate offset1 relative to offset0. Instead, compute them
+ * independently.
+ */
+
+ LLVMValueRef coord1;
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord0,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset0, i0);
+
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord1,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset1, i1);
+
+ return;
+ }
+
+ /*
+ * Scalar pixels -- try to compute offset0 and offset1 with a single stride
+ * multiplication.
+ */
+
+ *i0 = uint_coord_bld->zero;
+ *i1 = uint_coord_bld->zero;
+
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if (is_pot) {
+ coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
+ }
+ else {
+ /* Add a bias to the texcoord to handle negative coords */
+ LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+ coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
+ coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
+ }
+
+ mask = lp_build_compare(bld->builder, int_coord_bld->type,
+ PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = LLVMBuildAnd(bld->builder,
+ lp_build_add(uint_coord_bld, *offset0, stride),
+ mask, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
+ umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_LESS, coord0, length_minus_one);
+
+ coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
+ coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
+
+ mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = lp_build_add(uint_coord_bld,
+ *offset0,
+ LLVMBuildAnd(bld->builder, stride, mask, ""));
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ default:
+ assert(0);
+ *offset0 = uint_coord_bld->zero;
+ *offset1 = uint_coord_bld->zero;
+ break;
+ }
+}
+
+
+/**
+ * Sample a single texture image with nearest sampling.
+ * If sampling a cube texture, r = cube face in [0,5].
+ * Return filtered color as two vectors of 16-bit fixed point values.
+ */
+static void
+lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_vec,
+ LLVMValueRef img_stride_vec,
+ LLVMValueRef data_ptr,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *colors_lo,
+ LLVMValueRef *colors_hi)
+{
+ const unsigned dims = bld->dims;
+ LLVMBuilderRef builder = bld->builder;
+ struct lp_build_context i32, h16, u8n;
+ LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+ LLVMValueRef i32_c8;
+ LLVMValueRef s_ipart, t_ipart, r_ipart;
+ LLVMValueRef x_stride;
+ LLVMValueRef x_offset, offset;
+ LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
+
+ lp_build_context_init(&i32, builder, lp_type_int_vec(32));
+ lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+ lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+ i32_vec_type = lp_build_vec_type(i32.type);
+ h16_vec_type = lp_build_vec_type(h16.type);
+ u8n_vec_type = lp_build_vec_type(u8n.type);
+
+ if (bld->static_state->normalized_coords) {
+ /* s = s * width, t = t * height */
+ LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
+ LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
+ coord_vec_type, "");
+ s = lp_build_mul(&bld->coord_bld, s, fp_width);
+ if (dims >= 2) {
+ LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
+ coord_vec_type, "");
+ t = lp_build_mul(&bld->coord_bld, t, fp_height);
+ if (dims >= 3) {
+ LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
+ coord_vec_type, "");
+ r = lp_build_mul(&bld->coord_bld, r, fp_depth);
+ }
+ }
+ }
+
+ /* scale coords by 256 (8 fractional bits) */
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ if (dims >= 2)
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+ if (dims >= 3)
+ r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+
+ /* convert float to int */
+ s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+ if (dims >= 2)
+ t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+ if (dims >= 3)
+ r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
+
+ /* compute floor (shift right 8) */
+ i32_c8 = lp_build_const_int_vec(i32.type, 8);
+ s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+ if (dims >= 2)
+ t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+ if (dims >= 3)
+ r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
+
+ /* get pixel, row, image strides */
+ x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ bld->format_desc->block.bits/8);
+
+ /* Do texcoord wrapping, compute texel offset */
+ lp_build_sample_wrap_nearest_int(bld,
+ bld->format_desc->block.width,
+ s_ipart, width_vec, x_stride,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x_offset, &x_subcoord);
+ offset = x_offset;
+ if (dims >= 2) {
+ LLVMValueRef y_offset;
+ lp_build_sample_wrap_nearest_int(bld,
+ bld->format_desc->block.height,
+ t_ipart, height_vec, row_stride_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y_offset, &y_subcoord);
+ offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
+ if (dims >= 3) {
+ LLVMValueRef z_offset;
+ lp_build_sample_wrap_nearest_int(bld,
+ 1, /* block length (depth) */
+ r_ipart, depth_vec, img_stride_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_r,
+ &z_offset, &z_subcoord);
+ offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+ }
+ else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef z_offset;
+ /* The r coord is the cube face in [0,5] */
+ z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
+ offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+ }
+ }
+
+ /*
+ * Fetch the pixels as 4 x 32bit (rgba order might differ):
+ *
+ * rgba0 rgba1 rgba2 rgba3
+ *
+ * bit cast them into 16 x u8
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * unpack them into two 8 x i16:
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1
+ * r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * The higher 8 bits of the resulting elements will be zero.
+ */
+ {
+ LLVMValueRef rgba8;
+
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /*
+ * Given the format is a rgba8, just read the pixels as is,
+ * without any swizzling. Swizzling will be done later.
+ */
+ rgba8 = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
+
+ rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+ }
+ else {
+ rgba8 = lp_build_fetch_rgba_aos(bld->builder,
+ bld->format_desc,
+ u8n.type,
+ data_ptr, offset,
+ x_subcoord,
+ y_subcoord);
+ }
+
+ /* Expand one 4*rgba8 to two 2*rgba16 */
+ lp_build_unpack2(builder, u8n.type, h16.type,
+ rgba8,
+ colors_lo, colors_hi);
+ }
+}
+
+
+/**
+ * Sample a single texture image with (bi-)(tri-)linear sampling.
+ * Return filtered color as two vectors of 16-bit fixed point values.
+ */
+static void
+lp_build_sample_image_linear(struct lp_build_sample_context *bld,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_vec,
+ LLVMValueRef img_stride_vec,
+ LLVMValueRef data_ptr,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *colors_lo,
+ LLVMValueRef *colors_hi)
+{
+ const unsigned dims = bld->dims;
+ LLVMBuilderRef builder = bld->builder;
+ struct lp_build_context i32, h16, u8n;
+ LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+ LLVMValueRef i32_c8, i32_c128, i32_c255;
+ LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
+ LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
+ LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
+ LLVMValueRef x_stride, y_stride, z_stride;
+ LLVMValueRef x_offset0, x_offset1;
+ LLVMValueRef y_offset0, y_offset1;
+ LLVMValueRef z_offset0, z_offset1;
+ LLVMValueRef offset[2][2][2]; /* [z][y][x] */
+ LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
+ LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
+ LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
+ LLVMValueRef packed_lo, packed_hi;
+ unsigned x, y, z;
+ unsigned i, j, k;
+ unsigned numj, numk;
+
+ lp_build_context_init(&i32, builder, lp_type_int_vec(32));
+ lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+ lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+ i32_vec_type = lp_build_vec_type(i32.type);
+ h16_vec_type = lp_build_vec_type(h16.type);
+ u8n_vec_type = lp_build_vec_type(u8n.type);
+
+ if (bld->static_state->normalized_coords) {
+ /* s = s * width, t = t * height */
+ LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
+ LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
+ coord_vec_type, "");
+ s = lp_build_mul(&bld->coord_bld, s, fp_width);
+ if (dims >= 2) {
+ LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
+ coord_vec_type, "");
+ t = lp_build_mul(&bld->coord_bld, t, fp_height);
+ }
+ if (dims >= 3) {
+ LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
+ coord_vec_type, "");
+ r = lp_build_mul(&bld->coord_bld, r, fp_depth);
+ }
+ }
+
+ /* scale coords by 256 (8 fractional bits) */
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ if (dims >= 2)
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+ if (dims >= 3)
+ r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+
+ /* convert float to int */
+ s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+ if (dims >= 2)
+ t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+ if (dims >= 3)
+ r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
+
+ /* subtract 0.5 (add -128) */
+ i32_c128 = lp_build_const_int_vec(i32.type, -128);
+ s = LLVMBuildAdd(builder, s, i32_c128, "");
+ if (dims >= 2) {
+ t = LLVMBuildAdd(builder, t, i32_c128, "");
+ }
+ if (dims >= 3) {
+ r = LLVMBuildAdd(builder, r, i32_c128, "");
+ }
+
+ /* compute floor (shift right 8) */
+ i32_c8 = lp_build_const_int_vec(i32.type, 8);
+ s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+ if (dims >= 2)
+ t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+ if (dims >= 3)
+ r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
+
+ /* compute fractional part (AND with 0xff) */
+ i32_c255 = lp_build_const_int_vec(i32.type, 255);
+ s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
+ if (dims >= 2)
+ t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
+ if (dims >= 3)
+ r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
+
+ /* get pixel, row and image strides */
+ x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ bld->format_desc->block.bits/8);
+ y_stride = row_stride_vec;
+ z_stride = img_stride_vec;
+
+ /* do texcoord wrapping and compute texel offsets */
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.width,
+ s_ipart, width_vec, x_stride,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x_offset0, &x_offset1,
+ &x_subcoord[0], &x_subcoord[1]);
+ for (z = 0; z < 2; z++) {
+ for (y = 0; y < 2; y++) {
+ offset[z][y][0] = x_offset0;
+ offset[z][y][1] = x_offset1;
+ }
+ }
+
+ if (dims >= 2) {
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.height,
+ t_ipart, height_vec, y_stride,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y_offset0, &y_offset1,
+ &y_subcoord[0], &y_subcoord[1]);
+
+ for (z = 0; z < 2; z++) {
+ for (x = 0; x < 2; x++) {
+ offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[z][0][x], y_offset0);
+ offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[z][1][x], y_offset1);
+ }
+ }
+ }
+
+ if (dims >= 3) {
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.height,
+ r_ipart, depth_vec, z_stride,
+ bld->static_state->pot_depth,
+ bld->static_state->wrap_r,
+ &z_offset0, &z_offset1,
+ &z_subcoord[0], &z_subcoord[1]);
+ for (y = 0; y < 2; y++) {
+ for (x = 0; x < 2; x++) {
+ offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[0][y][x], z_offset0);
+ offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[1][y][x], z_offset1);
+ }
+ }
+ }
+ else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef z_offset;
+ z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
+ for (y = 0; y < 2; y++) {
+ for (x = 0; x < 2; x++) {
+ /* The r coord is the cube face in [0,5] */
+ offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[0][y][x], z_offset);
+ }
+ }
+ }
+
+ /*
+ * Transform 4 x i32 in
+ *
+ * s_fpart = {s0, s1, s2, s3}
+ *
+ * into 8 x i16
+ *
+ * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
+ *
+ * into two 8 x i16
+ *
+ * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
+ * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
+ *
+ * and likewise for t_fpart. There is no risk of loosing precision here
+ * since the fractional parts only use the lower 8bits.
+ */
+ s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
+ if (dims >= 2)
+ t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
+ if (dims >= 3)
+ r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
+
+ {
+ LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffle_lo;
+ LLVMValueRef shuffle_hi;
+
+ for (j = 0; j < h16.type.length; j += 4) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned subindex = 0;
+#else
+ unsigned subindex = 1;
+#endif
+ LLVMValueRef index;
+
+ index = LLVMConstInt(elem_type, j/2 + subindex, 0);
+ for (i = 0; i < 4; ++i)
+ shuffles_lo[j + i] = index;
+
+ index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
+ for (i = 0; i < 4; ++i)
+ shuffles_hi[j + i] = index;
+ }
+
+ shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
+ shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
+
+ s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
+ shuffle_lo, "");
+ s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
+ shuffle_hi, "");
+ if (dims >= 2) {
+ t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
+ shuffle_lo, "");
+ t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
+ shuffle_hi, "");
+ }
+ if (dims >= 3) {
+ r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
+ shuffle_lo, "");
+ r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
+ shuffle_hi, "");
+ }
+ }
+
+ /*
+ * Fetch the pixels as 4 x 32bit (rgba order might differ):
+ *
+ * rgba0 rgba1 rgba2 rgba3
+ *
+ * bit cast them into 16 x u8
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * unpack them into two 8 x i16:
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1
+ * r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * The higher 8 bits of the resulting elements will be zero.
+ */
+ numj = 1 + (dims >= 2);
+ numk = 1 + (dims >= 3);
+
+ for (k = 0; k < numk; k++) {
+ for (j = 0; j < numj; j++) {
+ for (i = 0; i < 2; i++) {
+ LLVMValueRef rgba8;
+
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /*
+ * Given the format is a rgba8, just read the pixels as is,
+ * without any swizzling. Swizzling will be done later.
+ */
+ rgba8 = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset[k][j][i]);
+
+ rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+ }
+ else {
+ rgba8 = lp_build_fetch_rgba_aos(bld->builder,
+ bld->format_desc,
+ u8n.type,
+ data_ptr, offset[k][j][i],
+ x_subcoord[i],
+ y_subcoord[j]);
+ }
+
+ /* Expand one 4*rgba8 to two 2*rgba16 */
+ lp_build_unpack2(builder, u8n.type, h16.type,
+ rgba8,
+ &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
+ }
+ }
+ }
+
+ /*
+ * Linear interpolation with 8.8 fixed point.
+ */
+ if (dims == 1) {
+ /* 1-D lerp */
+ packed_lo = lp_build_lerp(&h16,
+ s_fpart_lo,
+ neighbors_lo[0][0][0],
+ neighbors_lo[0][0][1]);
+
+ packed_hi = lp_build_lerp(&h16,
+ s_fpart_hi,
+ neighbors_hi[0][0][0],
+ neighbors_hi[0][0][1]);
+ }
+ else {
+ /* 2-D lerp */
+ packed_lo = lp_build_lerp_2d(&h16,
+ s_fpart_lo, t_fpart_lo,
+ neighbors_lo[0][0][0],
+ neighbors_lo[0][0][1],
+ neighbors_lo[0][1][0],
+ neighbors_lo[0][1][1]);
+
+ packed_hi = lp_build_lerp_2d(&h16,
+ s_fpart_hi, t_fpart_hi,
+ neighbors_hi[0][0][0],
+ neighbors_hi[0][0][1],
+ neighbors_hi[0][1][0],
+ neighbors_hi[0][1][1]);
+
+ if (dims >= 3) {
+ LLVMValueRef packed_lo2, packed_hi2;
+
+ /* lerp in the second z slice */
+ packed_lo2 = lp_build_lerp_2d(&h16,
+ s_fpart_lo, t_fpart_lo,
+ neighbors_lo[1][0][0],
+ neighbors_lo[1][0][1],
+ neighbors_lo[1][1][0],
+ neighbors_lo[1][1][1]);
+
+ packed_hi2 = lp_build_lerp_2d(&h16,
+ s_fpart_hi, t_fpart_hi,
+ neighbors_hi[1][0][0],
+ neighbors_hi[1][0][1],
+ neighbors_hi[1][1][0],
+ neighbors_hi[1][1][1]);
+ /* interp between two z slices */
+ packed_lo = lp_build_lerp(&h16, r_fpart_lo,
+ packed_lo, packed_lo2);
+ packed_hi = lp_build_lerp(&h16, r_fpart_hi,
+ packed_hi, packed_hi2);
+ }
+ }
+
+ *colors_lo = packed_lo;
+ *colors_hi = packed_hi;
+}
+
+
+/**
+ * Sample the texture/mipmap using given image filter and mip filter.
+ * data0_ptr and data1_ptr point to the two mipmap levels to sample
+ * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
+ * If we're using nearest miplevel sampling the '1' values will be null/unused.
+ */
+static void
+lp_build_sample_mipmap(struct lp_build_sample_context *bld,
+ unsigned img_filter,
+ unsigned mip_filter,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef ilevel0,
+ LLVMValueRef ilevel1,
+ LLVMValueRef lod_fpart,
+ LLVMValueRef colors_lo_var,
+ LLVMValueRef colors_hi_var)
+{
+ LLVMBuilderRef builder = bld->builder;
+ LLVMValueRef width0_vec;
+ LLVMValueRef width1_vec;
+ LLVMValueRef height0_vec;
+ LLVMValueRef height1_vec;
+ LLVMValueRef depth0_vec;
+ LLVMValueRef depth1_vec;
+ LLVMValueRef row_stride0_vec;
+ LLVMValueRef row_stride1_vec;
+ LLVMValueRef img_stride0_vec;
+ LLVMValueRef img_stride1_vec;
+ LLVMValueRef data_ptr0;
+ LLVMValueRef data_ptr1;
+ LLVMValueRef colors0_lo, colors0_hi;
+ LLVMValueRef colors1_lo, colors1_hi;
+
+
+ /* sample the first mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel0,
+ &width0_vec, &height0_vec, &depth0_vec,
+ &row_stride0_vec, &img_stride0_vec);
+ data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ lp_build_sample_image_nearest(bld,
+ width0_vec, height0_vec, depth0_vec,
+ row_stride0_vec, img_stride0_vec,
+ data_ptr0, s, t, r,
+ &colors0_lo, &colors0_hi);
+ }
+ else {
+ assert(img_filter == PIPE_TEX_FILTER_LINEAR);
+ lp_build_sample_image_linear(bld,
+ width0_vec, height0_vec, depth0_vec,
+ row_stride0_vec, img_stride0_vec,
+ data_ptr0, s, t, r,
+ &colors0_lo, &colors0_hi);
+ }
+
+ /* Store the first level's colors in the output variables */
+ LLVMBuildStore(builder, colors0_lo, colors_lo_var);
+ LLVMBuildStore(builder, colors0_hi, colors_hi_var);
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0);
+ LLVMTypeRef i32_type = LLVMIntType(32);
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef need_lerp;
+
+ flow_ctx = lp_build_flow_create(builder);
+
+ lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
+ lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
+
+ /* need_lerp = lod_fpart > 0 */
+ need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
+ lod_fpart, LLVMConstNull(i32_type),
+ "need_lerp");
+
+ lp_build_if(&if_ctx, flow_ctx, builder, need_lerp);
+ {
+ struct lp_build_context h16_bld;
+
+ lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
+
+ /* sample the second mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel1,
+ &width1_vec, &height1_vec, &depth1_vec,
+ &row_stride1_vec, &img_stride1_vec);
+ data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ lp_build_sample_image_nearest(bld,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ &colors1_lo, &colors1_hi);
+ }
+ else {
+ lp_build_sample_image_linear(bld,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ &colors1_lo, &colors1_hi);
+ }
+
+ /* interpolate samples from the two mipmap levels */
+
+ lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
+ lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
+
+ colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
+ colors0_lo, colors1_lo);
+ colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
+ colors0_hi, colors1_hi);
+
+ LLVMBuildStore(builder, colors0_lo, colors_lo_var);
+ LLVMBuildStore(builder, colors0_hi, colors_hi_var);
+ }
+ lp_build_endif(&if_ctx);
+
+ lp_build_flow_destroy(flow_ctx);
+ }
+}
+
+
+
+/**
+ * Texture sampling in AoS format. Used when sampling common 32-bit/texel
+ * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
+ * but only limited texture coord wrap modes.
+ */
+void
+lp_build_sample_aos(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef texel_out[4])
+{
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMBuilderRef builder = bld->builder;
+ const unsigned mip_filter = bld->static_state->min_mip_filter;
+ const unsigned min_filter = bld->static_state->min_img_filter;
+ const unsigned mag_filter = bld->static_state->mag_img_filter;
+ const unsigned dims = bld->dims;
+ LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
+ LLVMValueRef ilevel0, ilevel1 = NULL;
+ LLVMValueRef packed, packed_lo, packed_hi;
+ LLVMValueRef unswizzled[4];
+ LLVMValueRef face_ddx[4], face_ddy[4];
+ struct lp_build_context h16_bld;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);
+
+ /* we only support the common/simple wrap modes at this time */
+ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
+ if (dims >= 2)
+ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
+ if (dims >= 3)
+ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
+
+
+ /* make 16-bit fixed-pt builder context */
+ lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
+
+ /* cube face selection, compute pre-face coords, etc. */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef face, face_s, face_t;
+ lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+ s = face_s; /* vec */
+ t = face_t; /* vec */
+ /* use 'r' to indicate cube face */
+ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
+
+ /* recompute ddx, ddy using the new (s,t) face texcoords */
+ face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[2] = NULL;
+ face_ddx[3] = NULL;
+ face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[2] = NULL;
+ face_ddy[3] = NULL;
+ ddx = face_ddx;
+ ddy = face_ddy;
+ }
+
+ /*
+ * Compute the level of detail (float).
+ */
+ if (min_filter != mag_filter ||
+ mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ /* Need to compute lod either to choose mipmap levels or to
+ * distinguish between minification/magnification with one mipmap level.
+ */
+ lp_build_lod_selector(bld, unit, ddx, ddy,
+ lod_bias, explicit_lod,
+ mip_filter,
+ &lod_ipart, &lod_fpart);
+ } else {
+ lod_ipart = i32t_zero;
+ }
+
+ /*
+ * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
+ */
+ switch (mip_filter) {
+ default:
+ assert(0 && "bad mip_filter value in lp_build_sample_aos()");
+ /* fall-through */
+ case PIPE_TEX_MIPFILTER_NONE:
+ /* always use mip level 0 */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ /* XXX this is a work-around for an apparent bug in LLVM 2.7.
+ * We should be able to set ilevel0 = const(0) but that causes
+ * bad x86 code to be emitted.
+ */
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
+ }
+ else {
+ ilevel0 = i32t_zero;
+ }
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ assert(lod_ipart);
+ assert(lod_fpart);
+ lp_build_linear_mip_levels(bld, unit,
+ lod_ipart, &lod_fpart,
+ &ilevel0, &ilevel1);
+ break;
+ }
+
+ /*
+ * Get/interpolate texture colors.
+ */
+
+ packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo");
+ packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi");
+
+ if (min_filter == mag_filter) {
+ /* no need to distinquish between minification and magnification */
+ lp_build_sample_mipmap(bld,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ packed_lo, packed_hi);
+ }
+ else {
+ /* Emit conditional to choose min image filter or mag image filter
+ * depending on the lod being > 0 or <= 0, respectively.
+ */
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef minify;
+
+ flow_ctx = lp_build_flow_create(builder);
+
+ /* minify = lod >= 0.0 */
+ minify = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, int_bld->zero, "");
+
+ lp_build_if(&if_ctx, flow_ctx, builder, minify);
+ {
+ /* Use the minification filter */
+ lp_build_sample_mipmap(bld,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ packed_lo, packed_hi);
+ }
+ lp_build_else(&if_ctx);
+ {
+ /* Use the magnification filter */
+ lp_build_sample_mipmap(bld,
+ mag_filter, PIPE_TEX_MIPFILTER_NONE,
+ s, t, r,
+ i32t_zero, NULL, NULL,
+ packed_lo, packed_hi);
+ }
+ lp_build_endif(&if_ctx);
+
+ lp_build_flow_destroy(flow_ctx);
+ }
+
+ /*
+ * combine the values stored in 'packed_lo' and 'packed_hi' variables
+ * into 'packed'
+ */
+ packed = lp_build_pack2(builder,
+ h16_bld.type, lp_type_unorm(8),
+ LLVMBuildLoad(builder, packed_lo, ""),
+ LLVMBuildLoad(builder, packed_hi, ""));
+
+ /*
+ * Convert to SoA and swizzle.
+ */
+ lp_build_rgba8_to_f32_soa(builder,
+ bld->texel_type,
+ packed, unswizzled);
+
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ lp_build_format_swizzle_soa(bld->format_desc,
+ &bld->texel_bld,
+ unswizzled, texel_out);
+ }
+ else {
+ texel_out[0] = unswizzled[0];
+ texel_out[1] = unswizzled[1];
+ texel_out[2] = unswizzled[2];
+ texel_out[3] = unswizzled[3];
+ }
+
+ apply_sampler_swizzle(bld, texel_out);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
new file mode 100644
index 00000000000..5d9ecac4d50
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
@@ -0,0 +1,56 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling -- SoA.
+ *
+ * @author Jose Fonseca <[email protected]>
+ * @author Brian Paul <[email protected]>
+ */
+
+#ifndef LP_BLD_SAMPLE_AOS_H
+#define LP_BLD_SAMPLE_AOS_H
+
+
+#include "lp_bld_sample.h"
+
+
+void
+lp_build_sample_aos(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef texel_out[4]);
+
+
+#endif /* LP_BLD_SAMPLE_AOS_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 54ef921678d..b8cf938acfe 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -30,6 +30,7 @@
* Texture sampling -- SoA.
*
* @author Jose Fonseca <[email protected]>
+ * @author Brian Paul <[email protected]>
*/
#include "pipe/p_defines.h"
@@ -39,204 +40,28 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
-#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_arit.h"
+#include "lp_bld_bitarit.h"
#include "lp_bld_logic.h"
+#include "lp_bld_printf.h"
#include "lp_bld_swizzle.h"
-#include "lp_bld_pack.h"
#include "lp_bld_flow.h"
+#include "lp_bld_gather.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
-
-
-/**
- * Keep all information for sampling code generation in a single place.
- */
-struct lp_build_sample_context
-{
- LLVMBuilderRef builder;
-
- const struct lp_sampler_static_state *static_state;
-
- struct lp_sampler_dynamic_state *dynamic_state;
-
- const struct util_format_description *format_desc;
-
- /** regular scalar float type */
- struct lp_type float_type;
- struct lp_build_context float_bld;
-
- /** regular scalar float type */
- struct lp_type int_type;
- struct lp_build_context int_bld;
-
- /** Incoming coordinates type and build context */
- struct lp_type coord_type;
- struct lp_build_context coord_bld;
-
- /** Unsigned integer coordinates */
- struct lp_type uint_coord_type;
- struct lp_build_context uint_coord_bld;
-
- /** Signed integer coordinates */
- struct lp_type int_coord_type;
- struct lp_build_context int_coord_bld;
-
- /** Output texels type and build context */
- struct lp_type texel_type;
- struct lp_build_context texel_bld;
-};
-
-
-/**
- * Does the given texture wrap mode allow sampling the texture border color?
- * XXX maybe move this into gallium util code.
- */
-static boolean
-wrap_mode_uses_border_color(unsigned mode)
-{
- switch (mode) {
- case PIPE_TEX_WRAP_REPEAT:
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- return FALSE;
- case PIPE_TEX_WRAP_CLAMP:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- return TRUE;
- default:
- assert(0 && "unexpected wrap mode");
- return FALSE;
- }
-}
-
-
-static LLVMValueRef
-lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, LLVMValueRef level)
-{
- LLVMValueRef indexes[2], data_ptr;
- indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indexes[1] = level;
- data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
- data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
- return data_ptr;
-}
-
-
-static LLVMValueRef
-lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, int level)
-{
- LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
- return lp_build_get_mipmap_level(bld, data_array, lvl);
-}
-
-
-/**
- * Dereference stride_array[mipmap_level] array to get a stride.
- * Return stride as a vector.
- */
-static LLVMValueRef
-lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
- LLVMValueRef stride_array, LLVMValueRef level)
-{
- LLVMValueRef indexes[2], stride;
- indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indexes[1] = level;
- stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
- stride = LLVMBuildLoad(bld->builder, stride, "");
- stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
- return stride;
-}
-
-
-/** Dereference stride_array[0] array to get a stride (as vector). */
-static LLVMValueRef
-lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
- LLVMValueRef stride_array, int level)
-{
- LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
- return lp_build_get_level_stride_vec(bld, stride_array, lvl);
-}
-
-
-static int
-texture_dims(enum pipe_texture_target tex)
-{
- switch (tex) {
- case PIPE_TEXTURE_1D:
- return 1;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_CUBE:
- return 2;
- case PIPE_TEXTURE_3D:
- return 3;
- default:
- assert(0 && "bad texture target in texture_dims()");
- return 2;
- }
-}
-
-
-static LLVMValueRef
-lp_build_swizzle_chan_soa(struct lp_type type,
- const LLVMValueRef *unswizzled,
- enum util_format_swizzle swizzle)
-{
- switch (swizzle) {
- case PIPE_SWIZZLE_RED:
- case PIPE_SWIZZLE_GREEN:
- case PIPE_SWIZZLE_BLUE:
- case PIPE_SWIZZLE_ALPHA:
- return unswizzled[swizzle];
- case PIPE_SWIZZLE_ZERO:
- return lp_build_zero(type);
- case PIPE_SWIZZLE_ONE:
- return lp_build_one(type);
- default:
- assert(0);
- return lp_build_undef(type);
- }
-}
-
-
-static void
-lp_build_swizzle_soa(struct lp_build_sample_context *bld,
- LLVMValueRef *texel)
-{
- LLVMValueRef unswizzled[4];
- unsigned char swizzles[4];
- unsigned chan;
-
- for (chan = 0; chan < 4; ++chan) {
- unswizzled[chan] = texel[chan];
- }
-
- swizzles[0] = bld->static_state->swizzle_r;
- swizzles[1] = bld->static_state->swizzle_g;
- swizzles[2] = bld->static_state->swizzle_b;
- swizzles[3] = bld->static_state->swizzle_a;
-
- for (chan = 0; chan < 4; ++chan) {
- unsigned swizzle = swizzles[chan];
- texel[chan] = lp_build_swizzle_chan_soa(bld->texel_type,
- unswizzled, swizzle);
- }
-}
-
+#include "lp_bld_sample_aos.h"
+#include "lp_bld_struct.h"
+#include "lp_bld_quad.h"
/**
* Generate code to fetch a texel from a texture at int coords (x, y, z).
* The computation depends on whether the texture is 1D, 2D or 3D.
- * The result, texel, will be:
+ * The result, texel, will be float vectors:
* texel[0] = red values
* texel[1] = green values
* texel[2] = blue values
@@ -244,6 +69,7 @@ lp_build_swizzle_soa(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+ unsigned unit,
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth,
@@ -253,23 +79,29 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
LLVMValueRef y_stride,
LLVMValueRef z_stride,
LLVMValueRef data_ptr,
- LLVMValueRef *texel)
+ LLVMValueRef texel_out[4])
{
- const int dims = texture_dims(bld->static_state->target);
+ const struct lp_sampler_static_state *static_state = bld->static_state;
+ const unsigned dims = bld->dims;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef offset;
LLVMValueRef i, j;
LLVMValueRef use_border = NULL;
/* use_border = x < 0 || x >= width || y < 0 || y >= height */
- if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
+ if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
+ static_state->min_img_filter,
+ static_state->mag_img_filter)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
}
- if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
+ if (dims >= 2 &&
+ lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
+ static_state->min_img_filter,
+ static_state->mag_img_filter)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
@@ -282,7 +114,10 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
}
- if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
+ if (dims == 3 &&
+ lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
+ static_state->min_img_filter,
+ static_state->mag_img_filter)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
@@ -295,44 +130,30 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
}
- /*
- * Describe the coordinates in terms of pixel blocks.
- *
- * TODO: pixel blocks are power of two. LLVM should convert rem/div to
- * bit arithmetic. Verify this.
- */
-
- if (bld->format_desc->block.width == 1) {
- i = bld->uint_coord_bld.zero;
- }
- else {
- LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
- i = LLVMBuildURem(bld->builder, x, block_width, "");
- x = LLVMBuildUDiv(bld->builder, x, block_width, "");
- }
+ /* convert x,y,z coords to linear offset from start of texture, in bytes */
+ lp_build_sample_offset(&bld->uint_coord_bld,
+ bld->format_desc,
+ x, y, z, y_stride, z_stride,
+ &offset, &i, &j);
- if (bld->format_desc->block.height == 1) {
- j = bld->uint_coord_bld.zero;
- }
- else {
- LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
- j = LLVMBuildURem(bld->builder, y, block_height, "");
- y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+ if (use_border) {
+ /* If we can sample the border color, it means that texcoords may
+ * lie outside the bounds of the texture image. We need to do
+ * something to prevent reading out of bounds and causing a segfault.
+ *
+ * Simply AND the texture coords with !use_border. This will cause
+ * coords which are out of bounds to become zero. Zero's guaranteed
+ * to be inside the texture image.
+ */
+ offset = lp_build_andnot(&bld->uint_coord_bld, offset, use_border);
}
- /* convert x,y,z coords to linear offset from start of texture, in bytes */
- offset = lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, z, y_stride, z_stride);
-
lp_build_fetch_rgba_soa(bld->builder,
bld->format_desc,
bld->texel_type,
data_ptr, offset,
i, j,
- texel);
-
- lp_build_swizzle_soa(bld, texel);
+ texel_out);
/*
* Note: if we find an app which frequently samples the texture border
@@ -351,44 +172,22 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
if (use_border) {
/* select texel color or border color depending on use_border */
+ LLVMValueRef border_color_ptr =
+ bld->dynamic_state->border_color(bld->dynamic_state,
+ bld->builder, unit);
int chan;
for (chan = 0; chan < 4; chan++) {
LLVMValueRef border_chan =
- lp_build_const_vec(bld->texel_type,
- bld->static_state->border_color[chan]);
- texel[chan] = lp_build_select(&bld->texel_bld, use_border,
- border_chan, texel[chan]);
+ lp_build_array_get(bld->builder, border_color_ptr,
+ lp_build_const_int32(chan));
+ LLVMValueRef border_chan_vec =
+ lp_build_broadcast_scalar(&bld->float_vec_bld, border_chan);
+ texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
+ border_chan_vec, texel_out[chan]);
}
}
-}
-
-static LLVMValueRef
-lp_build_sample_packed(struct lp_build_sample_context *bld,
- LLVMValueRef x,
- LLVMValueRef y,
- LLVMValueRef y_stride,
- LLVMValueRef data_array)
-{
- LLVMValueRef offset;
- LLVMValueRef data_ptr;
-
- offset = lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, NULL, y_stride, NULL);
-
- assert(bld->format_desc->block.width == 1);
- assert(bld->format_desc->block.height == 1);
- assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
- /* get pointer to mipmap level 0 data */
- data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
-
- return lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset);
+ apply_sampler_swizzle(bld, texel_out);
}
@@ -426,81 +225,6 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
/**
- * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
- * Return whether the given mode is supported by that function.
- */
-static boolean
-is_simple_wrap_mode(unsigned mode)
-{
- switch (mode) {
- case PIPE_TEX_WRAP_REPEAT:
- case PIPE_TEX_WRAP_CLAMP:
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return TRUE;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- default:
- return FALSE;
- }
-}
-
-
-/**
- * Build LLVM code for texture wrap mode, for scaled integer texcoords.
- * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
- * \param length the texture size along one dimension
- * \param is_pot if TRUE, length is a power of two
- * \param wrap_mode one of PIPE_TEX_WRAP_x
- */
-static LLVMValueRef
-lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
- LLVMValueRef coord,
- LLVMValueRef length,
- boolean is_pot,
- unsigned wrap_mode)
-{
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef length_minus_one;
-
- length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
-
- switch(wrap_mode) {
- case PIPE_TEX_WRAP_REPEAT:
- if(is_pot)
- coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
- else
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
- coord = LLVMBuildURem(bld->builder, coord, length, "");
- break;
-
- case PIPE_TEX_WRAP_CLAMP:
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
- coord = lp_build_min(int_coord_bld, coord, length_minus_one);
- break;
-
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- /* FIXME */
- _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
- util_dump_tex_wrap(wrap_mode, TRUE));
- coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
- coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
- break;
-
- default:
- assert(0);
- }
-
- return coord;
-}
-
-
-/**
* Build LLVM code for texture wrap mode for linear filtering.
* \param x0_out returns first integer texcoord
* \param x1_out returns second integer texcoord
@@ -519,11 +243,9 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
- LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
LLVMValueRef coord0, coord1, weight;
switch(wrap_mode) {
@@ -531,19 +253,19 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
/* mul by size and subtract 0.5 */
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
/* repeat wrap */
if (is_pot) {
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
}
else {
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
+ /* Add a bias to the texcoord to handle negative coords */
+ LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+ coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
+ coord1 = LLVMBuildAdd(bld->builder, coord1, bias, "");
coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
}
@@ -551,16 +273,18 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_CLAMP:
if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
}
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
- length_f_minus_one);
- coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
- coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
- length_f_minus_one);
- coord0 = lp_build_ifloor(coord_bld, coord0);
- coord1 = lp_build_ifloor(coord_bld, coord1);
+
+ /* clamp to [0, length] */
+ coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
+
+ coord = lp_build_sub(coord_bld, coord, half);
+
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
@@ -574,14 +298,12 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
else {
LLVMValueRef min, max;
/* clamp to [0.5, length - 0.5] */
- min = lp_build_const_vec(coord_bld->type, 0.5F);
+ min = half;
max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
}
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* coord0 = floor(coord); */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
/* coord0 = max(coord0, 0) */
coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
@@ -593,29 +315,16 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
{
LLVMValueRef min, max;
if (bld->static_state->normalized_coords) {
- /* min = -1.0 / (2 * length) = -0.5 / length */
- min = lp_build_mul(coord_bld,
- lp_build_const_vec(coord_bld->type, -0.5F),
- lp_build_rcp(coord_bld, length_f));
- /* max = 1.0 - min */
- max = lp_build_sub(coord_bld, coord_bld->one, min);
- /* coord = clamp(coord, min, max) */
- coord = lp_build_clamp(coord_bld, coord, min, max);
- /* scale coord to length (and sub 0.5?) */
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_sub(coord_bld, coord, half);
- }
- else {
- /* clamp to [-0.5, length + 0.5] */
- min = lp_build_const_vec(coord_bld->type, -0.5F);
- max = lp_build_sub(coord_bld, length_f, min);
- coord = lp_build_clamp(coord_bld, coord, min, max);
- coord = lp_build_sub(coord_bld, coord, half);
}
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* clamp to [-0.5, length + 0.5] */
+ min = lp_build_const_vec(coord_bld->type, -0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ coord = lp_build_sub(coord_bld, coord, half);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
@@ -628,11 +337,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
-
- /* convert to int coords */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
/* coord0 = max(coord0, 0) */
@@ -642,37 +348,43 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
- {
- LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = 1.0 - min */
- max = lp_build_sub(coord_bld, coord_bld->one, min);
+ coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_clamp(coord_bld, coord, min, max);
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
- if(0)coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
+
+ /* clamp to [0, length] */
+ coord = lp_build_min(coord_bld, coord, length_f);
+
+ coord = lp_build_sub(coord_bld, coord, half);
+
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
{
LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = 1.0 - min */
- max = lp_build_sub(coord_bld, coord_bld->one, min);
coord = lp_build_abs(coord_bld, coord);
+
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+
+ /* clamp to [0.5, length - 0.5] */
+ min = half;
+ max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
- coord = lp_build_mul(coord_bld, coord, length_f);
+
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
@@ -680,19 +392,23 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
{
LLVMValueRef min, max;
- /* min = -1.0 / (2 * length) = -0.5 / length */
- min = lp_build_mul(coord_bld,
- lp_build_const_vec(coord_bld->type, -0.5F),
- lp_build_rcp(coord_bld, length_f));
- /* max = 1.0 - min */
- max = lp_build_sub(coord_bld, coord_bld->one, min);
coord = lp_build_abs(coord_bld, coord);
+
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+
+ /* clamp to [-0.5, length + 0.5] */
+ min = lp_build_negate(coord_bld, half);
+ max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
- coord = lp_build_mul(coord_bld, coord, length_f);
+
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
@@ -713,7 +429,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
/**
* Build LLVM code for texture wrap mode for nearest filtering.
* \param coord the incoming texcoord (nominally in [0,1])
- * \param length the texture size along one dimension, as int
+ * \param length the texture size along one dimension, as int vector
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
*/
@@ -727,10 +443,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
- LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
LLVMValueRef icoord;
switch(wrap_mode) {
@@ -739,127 +453,89 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
icoord = lp_build_ifloor(coord_bld, coord);
if (is_pot)
icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
- else
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
+ else {
+ /* Add a bias to the texcoord to handle negative coords */
+ LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+ icoord = LLVMBuildAdd(bld->builder, icoord, bias, "");
icoord = LLVMBuildURem(bld->builder, icoord, length, "");
+ }
break;
case PIPE_TEX_WRAP_CLAMP:
- /* mul by size */
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
}
+
/* floor */
icoord = lp_build_ifloor(coord_bld, coord);
- /* clamp to [0, size-1]. Note: int coord builder type */
+
+ /* clamp to [0, length - 1]. */
icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
length_minus_one);
break;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- {
- LLVMValueRef min, max;
- if (bld->static_state->normalized_coords) {
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
- /* scale coord to length */
- coord = lp_build_mul(coord_bld, coord, length_f);
- }
- else {
- /* clamp to [0.5, length - 0.5] */
- min = lp_build_const_vec(coord_bld->type, 0.5F);
- max = lp_build_sub(coord_bld, length_f, min);
- }
- /* coord = clamp(coord, min, max) */
- coord = lp_build_clamp(coord_bld, coord, min, max);
- icoord = lp_build_ifloor(coord_bld, coord);
- }
- break;
-
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
/* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
{
LLVMValueRef min, max;
+
if (bld->static_state->normalized_coords) {
- /* min = -1.0 / (2 * length) = -0.5 / length */
- min = lp_build_mul(coord_bld,
- lp_build_const_vec(coord_bld->type, -0.5F),
- lp_build_rcp(coord_bld, length_f));
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
/* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
}
- else {
- /* clamp to [-0.5, length + 0.5] */
- min = lp_build_const_vec(coord_bld->type, -0.5F);
- max = lp_build_sub(coord_bld, length_f, min);
- }
- /* coord = clamp(coord, min, max) */
- coord = lp_build_clamp(coord_bld, coord, min, max);
+
icoord = lp_build_ifloor(coord_bld, coord);
+
+ /* clamp to [-1, length] */
+ min = lp_build_negate(int_coord_bld, int_coord_bld->one);
+ max = length;
+ icoord = lp_build_clamp(int_coord_bld, icoord, min, max);
}
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
- {
- LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
+ /* compute mirror function */
+ coord = lp_build_coord_mirror(bld, coord);
- /* compute mirror function */
- coord = lp_build_coord_mirror(bld, coord);
+ /* scale coord to length */
+ assert(bld->static_state->normalized_coords);
+ coord = lp_build_mul(coord_bld, coord, length_f);
- /* scale coord to length */
- coord = lp_build_mul(coord_bld, coord, length_f);
+ icoord = lp_build_ifloor(coord_bld, coord);
- /* coord = clamp(coord, min, max) */
- coord = lp_build_clamp(coord_bld, coord, min, max);
- icoord = lp_build_ifloor(coord_bld, coord);
- }
+ /* clamp to [0, length - 1] */
+ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
- coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
- icoord = lp_build_ifloor(coord_bld, coord);
- break;
-
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- {
- LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_abs(coord_bld, coord);
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_clamp(coord_bld, coord, min, max);
- icoord = lp_build_ifloor(coord_bld, coord);
}
+
+ icoord = lp_build_ifloor(coord_bld, coord);
+
+ /* clamp to [0, length - 1] */
+ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- {
- LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- min = lp_build_negate(coord_bld, min);
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_abs(coord_bld, coord);
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_clamp(coord_bld, coord, min, max);
- icoord = lp_build_ifloor(coord_bld, coord);
}
+
+ icoord = lp_build_ifloor(coord_bld, coord);
+
+ /* clamp to [0, length] */
+ icoord = lp_build_min(int_coord_bld, icoord, length);
break;
default:
@@ -872,213 +548,12 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
/**
- * Codegen equivalent for u_minify().
- * Return max(1, base_size >> level);
- */
-static LLVMValueRef
-lp_build_minify(struct lp_build_sample_context *bld,
- LLVMValueRef base_size,
- LLVMValueRef level)
-{
- LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
- size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
- return size;
-}
-
-
-/**
- * Generate code to compute texture level of detail (lambda).
- * \param s vector of texcoord s values
- * \param t vector of texcoord t values
- * \param r vector of texcoord r values
- * \param shader_lod_bias vector float with the shader lod bias,
- * \param width scalar int texture width
- * \param height scalar int texture height
- * \param depth scalar int texture depth
- */
-static LLVMValueRef
-lp_build_lod_selector(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef r,
- LLVMValueRef shader_lod_bias,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth)
-
-{
- if (bld->static_state->min_lod == bld->static_state->max_lod) {
- /* User is forcing sampling from a particular mipmap level.
- * This is hit during mipmap generation.
- */
- return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
- }
- else {
- const int dims = texture_dims(bld->static_state->target);
- struct lp_build_context *float_bld = &bld->float_bld;
- LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
- bld->static_state->lod_bias);
- LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
- bld->static_state->min_lod);
- LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
- bld->static_state->max_lod);
-
- LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
- LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
- LLVMValueRef s0, s1, s2;
- LLVMValueRef t0, t1, t2;
- LLVMValueRef r0, r1, r2;
- LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
- LLVMValueRef rho, lod;
-
- /*
- * dsdx = abs(s[1] - s[0]);
- * dsdy = abs(s[2] - s[0]);
- * dtdx = abs(t[1] - t[0]);
- * dtdy = abs(t[2] - t[0]);
- * drdx = abs(r[1] - r[0]);
- * drdy = abs(r[2] - r[0]);
- * XXX we're assuming a four-element quad in 2x2 layout here.
- */
- s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
- s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
- s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
- dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
- dsdx = lp_build_abs(float_bld, dsdx);
- dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
- dsdy = lp_build_abs(float_bld, dsdy);
- if (dims > 1) {
- t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
- t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
- t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
- dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
- dtdx = lp_build_abs(float_bld, dtdx);
- dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
- dtdy = lp_build_abs(float_bld, dtdy);
- if (dims > 2) {
- r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
- r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
- r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
- drdx = LLVMBuildSub(bld->builder, r1, r0, "");
- drdx = lp_build_abs(float_bld, drdx);
- drdy = LLVMBuildSub(bld->builder, r2, r0, "");
- drdy = lp_build_abs(float_bld, drdy);
- }
- }
-
- /* Compute rho = max of all partial derivatives scaled by texture size.
- * XXX this could be vectorized somewhat
- */
- rho = LLVMBuildMul(bld->builder,
- lp_build_max(float_bld, dsdx, dsdy),
- lp_build_int_to_float(float_bld, width), "");
- if (dims > 1) {
- LLVMValueRef max;
- max = LLVMBuildMul(bld->builder,
- lp_build_max(float_bld, dtdx, dtdy),
- lp_build_int_to_float(float_bld, height), "");
- rho = lp_build_max(float_bld, rho, max);
- if (dims > 2) {
- max = LLVMBuildMul(bld->builder,
- lp_build_max(float_bld, drdx, drdy),
- lp_build_int_to_float(float_bld, depth), "");
- rho = lp_build_max(float_bld, rho, max);
- }
- }
-
- /* compute lod = log2(rho) */
- lod = lp_build_log2(float_bld, rho);
-
- /* add sampler lod bias */
- lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias");
-
- /* add shader lod bias */
- /* XXX for now we take only the first element since our lod is scalar */
- shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias,
- LLVMConstInt(LLVMInt32Type(), 0, 0), "");
- lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias");
-
- /* clamp lod */
- lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
-
- return lod;
- }
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
- * mipmap level index.
- * Note: this is all scalar code.
- * \param lod scalar float texture level of detail
- * \param level_out returns integer
- */
-static void
-lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
- unsigned unit,
- LLVMValueRef lod,
- LLVMValueRef *level_out)
-{
- struct lp_build_context *float_bld = &bld->float_bld;
- struct lp_build_context *int_bld = &bld->int_bld;
- LLVMValueRef last_level, level;
-
- LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
-
- last_level = bld->dynamic_state->last_level(bld->dynamic_state,
- bld->builder, unit);
-
- /* convert float lod to integer */
- level = lp_build_iround(float_bld, lod);
-
- /* clamp level to legal range of levels */
- *level_out = lp_build_clamp(int_bld, level, zero, last_level);
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
- * two (adjacent) mipmap level indexes. Later, we'll sample from those
- * two mipmap levels and interpolate between them.
- */
-static void
-lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
- unsigned unit,
- LLVMValueRef lod,
- LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out)
-{
- struct lp_build_context *float_bld = &bld->float_bld;
- struct lp_build_context *int_bld = &bld->int_bld;
- LLVMValueRef last_level, level;
-
- last_level = bld->dynamic_state->last_level(bld->dynamic_state,
- bld->builder, unit);
-
- /* convert float lod to integer */
- level = lp_build_ifloor(float_bld, lod);
-
- /* compute level 0 and clamp to legal range of levels */
- *level0_out = lp_build_clamp(int_bld, level,
- int_bld->zero,
- last_level);
- /* compute level 1 and clamp to legal range of levels */
- *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
- *level1_out = lp_build_min(int_bld, *level1_out, last_level);
-
- *weight_out = lp_build_fract(float_bld, lod);
-}
-
-
-/**
* Generate code to sample a mipmap level with nearest filtering.
* If sampling a cube texture, r = cube face in [0,5].
*/
static void
lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
+ unsigned unit,
LLVMValueRef width_vec,
LLVMValueRef height_vec,
LLVMValueRef depth_vec,
@@ -1090,7 +565,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef r,
LLVMValueRef colors_out[4])
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
LLVMValueRef x, y, z;
/*
@@ -1109,7 +584,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
if (dims == 3) {
z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
- bld->static_state->pot_height,
+ bld->static_state->pot_depth,
bld->static_state->wrap_r);
lp_build_name(z, "tex.z.wrapped");
}
@@ -1127,7 +602,8 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
/*
* Get texture colors.
*/
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x, y, z,
row_stride_vec, img_stride_vec,
data_ptr, colors_out);
@@ -1140,6 +616,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_image_linear(struct lp_build_sample_context *bld,
+ unsigned unit,
LLVMValueRef width_vec,
LLVMValueRef height_vec,
LLVMValueRef depth_vec,
@@ -1151,7 +628,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef r,
LLVMValueRef colors_out[4])
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
LLVMValueRef x0, y0, z0, x1, y1, z1;
LLVMValueRef s_fpart, t_fpart, r_fpart;
LLVMValueRef neighbors[2][2][4];
@@ -1201,11 +678,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
* Get texture colors.
*/
/* get x0/x1 texels */
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x0, y0, z0,
row_stride_vec, img_stride_vec,
data_ptr, neighbors[0][0]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x1, y0, z0,
row_stride_vec, img_stride_vec,
data_ptr, neighbors[0][1]);
@@ -1223,11 +702,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef colors0[4];
/* get x0/x1 texels at y1 */
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x0, y1, z0,
row_stride_vec, img_stride_vec,
data_ptr, neighbors[1][0]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x1, y1, z0,
row_stride_vec, img_stride_vec,
data_ptr, neighbors[1][1]);
@@ -1247,19 +728,23 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef colors1[4];
/* get x0/x1/y0/y1 texels at z1 */
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x0, y0, z1,
row_stride_vec, img_stride_vec,
data_ptr, neighbors1[0][0]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x1, y0, z1,
row_stride_vec, img_stride_vec,
data_ptr, neighbors1[0][1]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x0, y1, z1,
row_stride_vec, img_stride_vec,
data_ptr, neighbors1[1][0]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x1, y1, z1,
row_stride_vec, img_stride_vec,
data_ptr, neighbors1[1][1]);
@@ -1291,209 +776,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
}
-/** Helper used by lp_build_cube_lookup() */
-static LLVMValueRef
-lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
-{
- /* ima = -0.5 / abs(coord); */
- LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
- LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
- LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
- lp_build_rcp(coord_bld, absCoord));
- return ima;
-}
-
-
-/**
- * Helper used by lp_build_cube_lookup()
- * \param sign scalar +1 or -1
- * \param coord float vector
- * \param ima float vector
- */
-static LLVMValueRef
-lp_build_cube_coord(struct lp_build_context *coord_bld,
- LLVMValueRef sign, int negate_coord,
- LLVMValueRef coord, LLVMValueRef ima)
-{
- /* return negate(coord) * ima * sign + 0.5; */
- LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
- LLVMValueRef res;
-
- assert(negate_coord == +1 || negate_coord == -1);
-
- if (negate_coord == -1) {
- coord = lp_build_negate(coord_bld, coord);
- }
-
- res = lp_build_mul(coord_bld, coord, ima);
- if (sign) {
- sign = lp_build_broadcast_scalar(coord_bld, sign);
- res = lp_build_mul(coord_bld, res, sign);
- }
- res = lp_build_add(coord_bld, res, half);
-
- return res;
-}
-
-
-/** Helper used by lp_build_cube_lookup()
- * Return (major_coord >= 0) ? pos_face : neg_face;
- */
-static LLVMValueRef
-lp_build_cube_face(struct lp_build_sample_context *bld,
- LLVMValueRef major_coord,
- unsigned pos_face, unsigned neg_face)
-{
- LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
- major_coord,
- bld->float_bld.zero, "");
- LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
- LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
- LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
- return res;
-}
-
-
-
-/**
- * Generate code to do cube face selection and per-face texcoords.
- */
-static void
-lp_build_cube_lookup(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef r,
- LLVMValueRef *face,
- LLVMValueRef *face_s,
- LLVMValueRef *face_t)
-{
- struct lp_build_context *float_bld = &bld->float_bld;
- struct lp_build_context *coord_bld = &bld->coord_bld;
- LLVMValueRef rx, ry, rz;
- LLVMValueRef arx, ary, arz;
- LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
- LLVMValueRef arx_ge_ary, arx_ge_arz;
- LLVMValueRef ary_ge_arx, ary_ge_arz;
- LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
- LLVMValueRef rx_pos, ry_pos, rz_pos;
-
- assert(bld->coord_bld.type.length == 4);
-
- /*
- * Use the average of the four pixel's texcoords to choose the face.
- */
- rx = lp_build_mul(float_bld, c25,
- lp_build_sum_vector(&bld->coord_bld, s));
- ry = lp_build_mul(float_bld, c25,
- lp_build_sum_vector(&bld->coord_bld, t));
- rz = lp_build_mul(float_bld, c25,
- lp_build_sum_vector(&bld->coord_bld, r));
-
- arx = lp_build_abs(float_bld, rx);
- ary = lp_build_abs(float_bld, ry);
- arz = lp_build_abs(float_bld, rz);
-
- /*
- * Compare sign/magnitude of rx,ry,rz to determine face
- */
- arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
- arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
- ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
- ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
-
- arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
- ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
-
- rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
- ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
- rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
-
- {
- struct lp_build_flow_context *flow_ctx;
- struct lp_build_if_state if_ctx;
-
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
-
- *face_s = bld->coord_bld.undef;
- *face_t = bld->coord_bld.undef;
- *face = bld->int_bld.undef;
-
- lp_build_name(*face_s, "face_s");
- lp_build_name(*face_t, "face_t");
- lp_build_name(*face, "face");
-
- lp_build_flow_scope_declare(flow_ctx, face_s);
- lp_build_flow_scope_declare(flow_ctx, face_t);
- lp_build_flow_scope_declare(flow_ctx, face);
-
- lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
- {
- /* +/- X face */
- LLVMValueRef sign = lp_build_sgn(float_bld, rx);
- LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
- *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
- *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
- *face = lp_build_cube_face(bld, rx,
- PIPE_TEX_FACE_POS_X,
- PIPE_TEX_FACE_NEG_X);
- }
- lp_build_else(&if_ctx);
- {
- struct lp_build_flow_context *flow_ctx2;
- struct lp_build_if_state if_ctx2;
-
- LLVMValueRef face_s2 = bld->coord_bld.undef;
- LLVMValueRef face_t2 = bld->coord_bld.undef;
- LLVMValueRef face2 = bld->int_bld.undef;
-
- flow_ctx2 = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx2);
- lp_build_flow_scope_declare(flow_ctx2, &face_s2);
- lp_build_flow_scope_declare(flow_ctx2, &face_t2);
- lp_build_flow_scope_declare(flow_ctx2, &face2);
-
- ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
-
- lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
- {
- /* +/- Y face */
- LLVMValueRef sign = lp_build_sgn(float_bld, ry);
- LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
- face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
- face2 = lp_build_cube_face(bld, ry,
- PIPE_TEX_FACE_POS_Y,
- PIPE_TEX_FACE_NEG_Y);
- }
- lp_build_else(&if_ctx2);
- {
- /* +/- Z face */
- LLVMValueRef sign = lp_build_sgn(float_bld, rz);
- LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
- face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
- face2 = lp_build_cube_face(bld, rz,
- PIPE_TEX_FACE_POS_Z,
- PIPE_TEX_FACE_NEG_Z);
- }
- lp_build_endif(&if_ctx2);
- lp_build_flow_scope_end(flow_ctx2);
- lp_build_flow_destroy(flow_ctx2);
-
- *face_s = face_s2;
- *face_t = face_t2;
- *face = face2;
- }
-
- lp_build_endif(&if_ctx);
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
- }
-}
-
-
-
/**
* Sample the texture/mipmap using given image filter and mip filter.
* data0_ptr and data1_ptr point to the two mipmap levels to sample
@@ -1502,72 +784,107 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_mipmap(struct lp_build_sample_context *bld,
+ unsigned unit,
unsigned img_filter,
unsigned mip_filter,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
+ LLVMValueRef ilevel0,
+ LLVMValueRef ilevel1,
LLVMValueRef lod_fpart,
- LLVMValueRef width0_vec,
- LLVMValueRef width1_vec,
- LLVMValueRef height0_vec,
- LLVMValueRef height1_vec,
- LLVMValueRef depth0_vec,
- LLVMValueRef depth1_vec,
- LLVMValueRef row_stride0_vec,
- LLVMValueRef row_stride1_vec,
- LLVMValueRef img_stride0_vec,
- LLVMValueRef img_stride1_vec,
- LLVMValueRef data_ptr0,
- LLVMValueRef data_ptr1,
LLVMValueRef *colors_out)
{
+ LLVMBuilderRef builder = bld->builder;
+ LLVMValueRef width0_vec;
+ LLVMValueRef width1_vec;
+ LLVMValueRef height0_vec;
+ LLVMValueRef height1_vec;
+ LLVMValueRef depth0_vec;
+ LLVMValueRef depth1_vec;
+ LLVMValueRef row_stride0_vec;
+ LLVMValueRef row_stride1_vec;
+ LLVMValueRef img_stride0_vec;
+ LLVMValueRef img_stride1_vec;
+ LLVMValueRef data_ptr0;
+ LLVMValueRef data_ptr1;
LLVMValueRef colors0[4], colors1[4];
- int chan;
+ unsigned chan;
+ /* sample the first mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel0,
+ &width0_vec, &height0_vec, &depth0_vec,
+ &row_stride0_vec, &img_stride0_vec);
+ data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
- lp_build_sample_image_nearest(bld,
+ lp_build_sample_image_nearest(bld, unit,
width0_vec, height0_vec, depth0_vec,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r, colors0);
-
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level, and interp */
- lp_build_sample_image_nearest(bld,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r, colors1);
- }
+ data_ptr0, s, t, r,
+ colors0);
}
else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
-
- lp_build_sample_image_linear(bld,
+ lp_build_sample_image_linear(bld, unit,
width0_vec, height0_vec, depth0_vec,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r, colors0);
-
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level, and interp */
- lp_build_sample_image_linear(bld,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r, colors1);
- }
+ data_ptr0, s, t, r,
+ colors0);
+ }
+
+ /* Store the first level's colors in the output variables */
+ for (chan = 0; chan < 4; chan++) {
+ LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
}
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* interpolate samples from the two mipmap levels */
- for (chan = 0; chan < 4; chan++) {
- colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef need_lerp;
+
+ flow_ctx = lp_build_flow_create(builder);
+
+ /* need_lerp = lod_fpart > 0 */
+ need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
+ lod_fpart,
+ bld->float_bld.zero,
+ "need_lerp");
+
+ lp_build_if(&if_ctx, flow_ctx, builder, need_lerp);
+ {
+ /* sample the second mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel1,
+ &width1_vec, &height1_vec, &depth1_vec,
+ &row_stride1_vec, &img_stride1_vec);
+ data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ lp_build_sample_image_nearest(bld, unit,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ colors1);
+ }
+ else {
+ lp_build_sample_image_linear(bld, unit,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ colors1);
+ }
+
+ /* interpolate samples from the two mipmap levels */
+
+ lod_fpart = lp_build_broadcast_scalar(&bld->texel_bld, lod_fpart);
+
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
colors0[chan], colors1[chan]);
+ LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
+ }
}
- }
- else {
- /* use first/only level's colors */
- for (chan = 0; chan < 4; chan++) {
- colors_out[chan] = colors0[chan];
- }
+ lp_build_endif(&if_ctx);
+
+ lp_build_flow_destroy(flow_ctx);
}
}
@@ -1584,30 +901,24 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
- LLVMValueRef lodbias,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef data_array,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
LLVMValueRef *colors_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMBuilderRef builder = bld->builder;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef lod = NULL, lod_fpart = NULL;
- LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
- LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
- LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
- LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
- LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
- LLVMValueRef data_ptr0, data_ptr1 = NULL;
+ LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
+ LLVMValueRef ilevel0, ilevel1 = NULL;
+ LLVMValueRef face_ddx[4], face_ddy[4];
+ LLVMValueRef texels[4];
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);
+ unsigned chan;
/*
printf("%s mip %d min %d mag %d\n", __FUNCTION__,
@@ -1615,6 +926,30 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
*/
/*
+ * Choose cube face, recompute texcoords and derivatives for the chosen face.
+ */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef face, face_s, face_t;
+ lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+ s = face_s; /* vec */
+ t = face_t; /* vec */
+ /* use 'r' to indicate cube face */
+ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
+
+ /* recompute ddx, ddy using the new (s,t) face texcoords */
+ face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[2] = NULL;
+ face_ddx[3] = NULL;
+ face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[2] = NULL;
+ face_ddy[3] = NULL;
+ ddx = face_ddx;
+ ddy = face_ddy;
+ }
+
+ /*
* Compute the level of detail (float).
*/
if (min_filter != mag_filter ||
@@ -1622,432 +957,173 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, s, t, r, lodbias, width, height, depth);
+ lp_build_lod_selector(bld, unit, ddx, ddy,
+ lod_bias, explicit_lod,
+ mip_filter,
+ &lod_ipart, &lod_fpart);
+ } else {
+ lod_ipart = i32t_zero;
}
/*
- * Compute integer mipmap level(s) to fetch texels from.
+ * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
*/
- if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ switch (mip_filter) {
+ default:
+ assert(0 && "bad mip_filter value in lp_build_sample_soa()");
+ /* fall-through */
+ case PIPE_TEX_MIPFILTER_NONE:
/* always use mip level 0 */
- ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- }
- else {
- if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ /* XXX this is a work-around for an apparent bug in LLVM 2.7.
+ * We should be able to set ilevel0 = const(0) but that causes
+ * bad x86 code to be emitted.
+ */
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
- assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
- lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
- &lod_fpart);
- lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
- }
- }
-
- /*
- * Convert scalar integer mipmap levels into vectors.
- */
- ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
- ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
-
- /*
- * Compute width, height at mipmap level 'ilevel0'
- */
- width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
- if (dims >= 2) {
- height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
- row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
- ilevel0);
- if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- img_stride0_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel0);
- if (dims == 3) {
- depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
- }
- }
- }
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* compute width, height, depth for second mipmap level at 'ilevel1' */
- width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
- if (dims >= 2) {
- height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
- row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
- ilevel1);
- if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- img_stride1_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel1);
- if (dims ==3) {
- depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
- }
- }
+ ilevel0 = i32t_zero;
}
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ assert(lod_ipart);
+ assert(lod_fpart);
+ lp_build_linear_mip_levels(bld, unit,
+ lod_ipart, &lod_fpart,
+ &ilevel0, &ilevel1);
+ break;
}
/*
- * Choose cube face, recompute per-face texcoords.
+ * Get/interpolate texture colors.
*/
- if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
- LLVMValueRef face, face_s, face_t;
- lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
- s = face_s; /* vec */
- t = face_t; /* vec */
- /* use 'r' to indicate cube face */
- r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
- }
- /*
- * Get pointer(s) to image data for mipmap level(s).
- */
- data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
+ for (chan = 0; chan < 4; ++chan) {
+ texels[chan] = lp_build_alloca(builder, bld->texel_bld.vec_type, "");
+ lp_build_name(texels[chan], "sampler%u_texel_%c_var", unit, "xyzw"[chan]);
}
- /*
- * Get/interpolate texture colors.
- */
if (min_filter == mag_filter) {
/* no need to distinquish between minification and magnification */
- lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- colors_out);
+ lp_build_sample_mipmap(bld, unit,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ texels);
}
else {
/* Emit conditional to choose min image filter or mag image filter
- * depending on the lod being >0 or <= 0, respectively.
+ * depending on the lod being > 0 or <= 0, respectively.
*/
struct lp_build_flow_context *flow_ctx;
struct lp_build_if_state if_ctx;
LLVMValueRef minify;
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
-
- lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
- lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
- lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
- lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
+ flow_ctx = lp_build_flow_create(builder);
- /* minify = lod > 0.0 */
- minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
- lod, float_bld->zero, "");
+ /* minify = lod >= 0.0 */
+ minify = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, int_bld->zero, "");
- lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
+ lp_build_if(&if_ctx, flow_ctx, builder, minify);
{
/* Use the minification filter */
- lp_build_sample_mipmap(bld, min_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- colors_out);
+ lp_build_sample_mipmap(bld, unit,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ texels);
}
lp_build_else(&if_ctx);
{
/* Use the magnification filter */
- lp_build_sample_mipmap(bld, mag_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- colors_out);
+ lp_build_sample_mipmap(bld, unit,
+ mag_filter, PIPE_TEX_MIPFILTER_NONE,
+ s, t, r,
+ i32t_zero, NULL, NULL,
+ texels);
}
lp_build_endif(&if_ctx);
- lp_build_flow_scope_end(flow_ctx);
lp_build_flow_destroy(flow_ctx);
}
-}
-
-
-static void
-lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
- struct lp_type dst_type,
- LLVMValueRef packed,
- LLVMValueRef *rgba)
-{
- LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
- unsigned chan;
-
- /* Decode the input vector components */
for (chan = 0; chan < 4; ++chan) {
- unsigned start = chan*8;
- unsigned stop = start + 8;
- LLVMValueRef input;
-
- input = packed;
-
- if(start)
- input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
-
- if(stop < 32)
- input = LLVMBuildAnd(builder, input, mask, "");
-
- input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
-
- rgba[chan] = input;
+ colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
+ lp_build_name(colors_out[chan], "sampler%u_texel_%c", unit, "xyzw"[chan]);
}
}
-static void
-lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef stride_array,
- LLVMValueRef data_array,
- LLVMValueRef *texel)
-{
- LLVMBuilderRef builder = bld->builder;
- struct lp_build_context i32, h16, u8n;
- LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
- LLVMValueRef i32_c8, i32_c128, i32_c255;
- LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
- LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
- LLVMValueRef x0, x1;
- LLVMValueRef y0, y1;
- LLVMValueRef neighbors[2][2];
- LLVMValueRef neighbors_lo[2][2];
- LLVMValueRef neighbors_hi[2][2];
- LLVMValueRef packed, packed_lo, packed_hi;
- LLVMValueRef unswizzled[4];
- LLVMValueRef stride;
-
- lp_build_context_init(&i32, builder, lp_type_int_vec(32));
- lp_build_context_init(&h16, builder, lp_type_ufixed(16));
- lp_build_context_init(&u8n, builder, lp_type_unorm(8));
-
- i32_vec_type = lp_build_vec_type(i32.type);
- h16_vec_type = lp_build_vec_type(h16.type);
- u8n_vec_type = lp_build_vec_type(u8n.type);
-
- if (bld->static_state->normalized_coords) {
- LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
- LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
- LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
- s = lp_build_mul(&bld->coord_bld, s, fp_width);
- t = lp_build_mul(&bld->coord_bld, t, fp_height);
- }
-
- /* scale coords by 256 (8 fractional bits) */
- s = lp_build_mul_imm(&bld->coord_bld, s, 256);
- t = lp_build_mul_imm(&bld->coord_bld, t, 256);
-
- /* convert float to int */
- s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
- t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
-
- /* subtract 0.5 (add -128) */
- i32_c128 = lp_build_const_int_vec(i32.type, -128);
- s = LLVMBuildAdd(builder, s, i32_c128, "");
- t = LLVMBuildAdd(builder, t, i32_c128, "");
-
- /* compute floor (shift right 8) */
- i32_c8 = lp_build_const_int_vec(i32.type, 8);
- s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
- t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
-
- /* compute fractional part (AND with 0xff) */
- i32_c255 = lp_build_const_int_vec(i32.type, 255);
- s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
- t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
-
- x0 = s_ipart;
- y0 = t_ipart;
-
- x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
- y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
-
- x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
-
- x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
-
- /*
- * Transform 4 x i32 in
- *
- * s_fpart = {s0, s1, s2, s3}
- *
- * into 8 x i16
- *
- * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
- *
- * into two 8 x i16
- *
- * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
- * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
- *
- * and likewise for t_fpart. There is no risk of loosing precision here
- * since the fractional parts only use the lower 8bits.
- */
-
- s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
- t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
-
- {
- LLVMTypeRef elem_type = LLVMInt32Type();
- LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef shuffle_lo;
- LLVMValueRef shuffle_hi;
- unsigned i, j;
-
- for(j = 0; j < h16.type.length; j += 4) {
- unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
- LLVMValueRef index;
-
- index = LLVMConstInt(elem_type, j/2 + subindex, 0);
- for(i = 0; i < 4; ++i)
- shuffles_lo[j + i] = index;
-
- index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
- for(i = 0; i < 4; ++i)
- shuffles_hi[j + i] = index;
- }
-
- shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
- shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
-
- s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
- t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
- s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
- t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
- }
-
- stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
-
- /*
- * Fetch the pixels as 4 x 32bit (rgba order might differ):
- *
- * rgba0 rgba1 rgba2 rgba3
- *
- * bit cast them into 16 x u8
- *
- * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
- *
- * unpack them into two 8 x i16:
- *
- * r0 g0 b0 a0 r1 g1 b1 a1
- * r2 g2 b2 a2 r3 g3 b3 a3
- *
- * The higher 8 bits of the resulting elements will be zero.
- */
-
- neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
- neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
- neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
- neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
-
- neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
- neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
- neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
- neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
-
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
-
- /*
- * Linear interpolate with 8.8 fixed point.
- */
-
- packed_lo = lp_build_lerp_2d(&h16,
- s_fpart_lo, t_fpart_lo,
- neighbors_lo[0][0],
- neighbors_lo[0][1],
- neighbors_lo[1][0],
- neighbors_lo[1][1]);
-
- packed_hi = lp_build_lerp_2d(&h16,
- s_fpart_hi, t_fpart_hi,
- neighbors_hi[0][0],
- neighbors_hi[0][1],
- neighbors_hi[1][0],
- neighbors_hi[1][1]);
-
- packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
-
- /*
- * Convert to SoA and swizzle.
- */
-
- packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
-
- lp_build_rgba8_to_f32_soa(bld->builder,
- bld->texel_type,
- packed, unswizzled);
-
- lp_build_format_swizzle_soa(bld->format_desc,
- bld->texel_type, unswizzled,
- texel);
-
- lp_build_swizzle_soa(bld, texel);
-}
-
-
+/**
+ * Do shadow test/comparison.
+ * \param p the texcoord Z (aka R, aka P) component
+ * \param texel the texel to compare against (use the X channel)
+ */
static void
lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef p,
- LLVMValueRef *texel)
+ LLVMValueRef texel[4])
{
struct lp_build_context *texel_bld = &bld->texel_bld;
LLVMValueRef res;
- unsigned chan;
+ const unsigned chan = 0;
- if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
+ if (bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
return;
- /* TODO: Compare before swizzling, to avoid redundant computations */
- res = NULL;
- for(chan = 0; chan < 4; ++chan) {
- LLVMValueRef cmp;
- cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
- cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
-
- if(res)
- res = lp_build_add(texel_bld, res, cmp);
- else
- res = cmp;
+ /* debug code */
+ if (0) {
+ LLVMValueRef indx = lp_build_const_int32(0);
+ LLVMValueRef coord = LLVMBuildExtractElement(bld->builder, p, indx, "");
+ LLVMValueRef tex = LLVMBuildExtractElement(bld->builder,
+ texel[chan], indx, "");
+ lp_build_printf(bld->builder, "shadow compare coord %f to texture %f\n",
+ coord, tex);
}
- assert(res);
- res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
+ /* result = (p FUNC texel) ? 1 : 0 */
+ res = lp_build_cmp(texel_bld, bld->static_state->compare_func,
+ p, texel[chan]);
+ res = lp_build_select(texel_bld, res, texel_bld->one, texel_bld->zero);
/* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
- for(chan = 0; chan < 3; ++chan)
- texel[chan] = res;
+ texel[0] =
+ texel[1] =
+ texel[2] = res;
texel[3] = texel_bld->one;
}
/**
+ * Just set texels to white instead of actually sampling the texture.
+ * For debugging.
+ */
+void
+lp_build_sample_nop(struct lp_type type,
+ LLVMValueRef texel_out[4])
+{
+ LLVMValueRef one = lp_build_one(type);
+ unsigned chan;
+
+ for (chan = 0; chan < 4; chan++) {
+ texel_out[chan] = one;
+ }
+}
+
+
+/**
* Build texture sampling code.
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
* \param type vector float type to use for coords, etc.
+ * \param ddx partial derivatives of (s,t,r,q) with respect to x
+ * \param ddy partial derivatives of (s,t,r,q) with respect to y
*/
void
lp_build_sample_soa(LLVMBuilderRef builder,
@@ -2057,18 +1133,27 @@ lp_build_sample_soa(LLVMBuilderRef builder,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- LLVMValueRef lodbias,
- LLVMValueRef *texel)
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef texel_out[4])
{
+ unsigned dims = texture_dims(static_state->target);
struct lp_build_sample_context bld;
- LLVMValueRef width, width_vec;
- LLVMValueRef height, height_vec;
- LLVMValueRef depth, depth_vec;
- LLVMValueRef row_stride_array, img_stride_array;
- LLVMValueRef data_array;
+ LLVMTypeRef i32t = LLVMInt32Type();
+
LLVMValueRef s;
LLVMValueRef t;
LLVMValueRef r;
+ struct lp_type float_vec_type;
+
+ if (0) {
+ enum pipe_format fmt = static_state->format;
+ debug_printf("Sample from %s\n", util_format_name(fmt));
+ }
+
+ assert(type.floating);
/* Setup our build context */
memset(&bld, 0, sizeof bld);
@@ -2076,57 +1161,90 @@ lp_build_sample_soa(LLVMBuilderRef builder,
bld.static_state = static_state;
bld.dynamic_state = dynamic_state;
bld.format_desc = util_format_description(static_state->format);
+ bld.dims = dims;
bld.float_type = lp_type_float(32);
bld.int_type = lp_type_int(32);
bld.coord_type = type;
bld.uint_coord_type = lp_uint_type(type);
bld.int_coord_type = lp_int_type(type);
+ bld.float_size_type = lp_type_float(32);
+ bld.float_size_type.length = dims > 1 ? 4 : 1;
+ bld.int_size_type = lp_int_type(bld.float_size_type);
bld.texel_type = type;
+ float_vec_type = lp_type_float_vec(32);
+
lp_build_context_init(&bld.float_bld, builder, bld.float_type);
+ lp_build_context_init(&bld.float_vec_bld, builder, float_vec_type);
lp_build_context_init(&bld.int_bld, builder, bld.int_type);
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
+ lp_build_context_init(&bld.int_size_bld, builder, bld.int_size_type);
+ lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type);
lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
/* Get the dynamic state */
- width = dynamic_state->width(dynamic_state, builder, unit);
- height = dynamic_state->height(dynamic_state, builder, unit);
- depth = dynamic_state->depth(dynamic_state, builder, unit);
- row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
- img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
- data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
+ bld.width = dynamic_state->width(dynamic_state, builder, unit);
+ bld.height = dynamic_state->height(dynamic_state, builder, unit);
+ bld.depth = dynamic_state->depth(dynamic_state, builder, unit);
+ bld.row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+ bld.img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
+ bld.data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
/* Note that data_array is an array[level] of pointers to texture images */
s = coords[0];
t = coords[1];
r = coords[2];
- width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
- height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
- depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
-
- if (util_format_is_rgba8_variant(bld.format_desc) &&
- static_state->target == PIPE_TEXTURE_2D &&
- static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
- is_simple_wrap_mode(static_state->wrap_s) &&
- is_simple_wrap_mode(static_state->wrap_t)) {
- /* special case */
- lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
- row_stride_array, data_array, texel);
+ /* width, height, depth as single int vector */
+ if (dims <= 1) {
+ bld.int_size = bld.width;
}
else {
- lp_build_sample_general(&bld, unit, s, t, r, lodbias,
- width, height, depth,
- width_vec, height_vec, depth_vec,
- row_stride_array, img_stride_array,
- data_array,
- texel);
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_bld.undef,
+ bld.width, LLVMConstInt(i32t, 0, 0), "");
+ if (dims >= 2) {
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
+ bld.height, LLVMConstInt(i32t, 1, 0), "");
+ if (dims >= 3) {
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
+ bld.depth, LLVMConstInt(i32t, 2, 0), "");
+ }
+ }
+ }
+
+ if (0) {
+ /* For debug: no-op texture sampling */
+ lp_build_sample_nop(bld.texel_type, texel_out);
+ }
+ else if (util_format_fits_8unorm(bld.format_desc) &&
+ lp_is_simple_wrap_mode(static_state->wrap_s) &&
+ lp_is_simple_wrap_mode(static_state->wrap_t)) {
+ /* do sampling/filtering with fixed pt arithmetic */
+ lp_build_sample_aos(&bld, unit, s, t, r, ddx, ddy,
+ lod_bias, explicit_lod,
+ texel_out);
+ }
+
+ else {
+ if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
+ util_format_fits_8unorm(bld.format_desc)) {
+ debug_printf("%s: using floating point linear filtering for %s\n",
+ __FUNCTION__, bld.format_desc->short_name);
+ debug_printf(" min_img %d mag_img %d mip %d wraps %d wrapt %d\n",
+ static_state->min_img_filter,
+ static_state->mag_img_filter,
+ static_state->min_mip_filter,
+ static_state->wrap_s,
+ static_state->wrap_t);
+ }
+
+ lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
+ lod_bias, explicit_lod,
+ texel_out);
}
- lp_build_sample_compare(&bld, r, texel);
+ lp_build_sample_compare(&bld, r, texel_out);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.c b/src/gallium/auxiliary/gallivm/lp_bld_struct.c
index 3998ac374fe..4693c2de6f9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_struct.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.c
@@ -49,6 +49,8 @@ lp_build_struct_get_ptr(LLVMBuilderRef builder,
{
LLVMValueRef indices[2];
LLVMValueRef member_ptr;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind);
indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
@@ -65,8 +67,91 @@ lp_build_struct_get(LLVMBuilderRef builder,
{
LLVMValueRef member_ptr;
LLVMValueRef res;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind);
member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name);
res = LLVMBuildLoad(builder, member_ptr, "");
lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name);
return res;
}
+
+
+LLVMValueRef
+lp_build_array_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index)
+{
+ LLVMValueRef indices[2];
+ LLVMValueRef element_ptr;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind);
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[1] = index;
+ element_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+#ifdef DEBUG
+ lp_build_name(element_ptr, "&%s[%s]",
+ LLVMGetValueName(ptr), LLVMGetValueName(index));
+#endif
+ return element_ptr;
+}
+
+
+LLVMValueRef
+lp_build_array_get(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index)
+{
+ LLVMValueRef element_ptr;
+ LLVMValueRef res;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind);
+ element_ptr = lp_build_array_get_ptr(builder, ptr, index);
+ res = LLVMBuildLoad(builder, element_ptr, "");
+#ifdef DEBUG
+ lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index));
+#endif
+ return res;
+}
+
+
+void
+lp_build_array_set(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index,
+ LLVMValueRef value)
+{
+ LLVMValueRef element_ptr;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind);
+ element_ptr = lp_build_array_get_ptr(builder, ptr, index);
+ LLVMBuildStore(builder, value, element_ptr);
+}
+
+
+LLVMValueRef
+lp_build_pointer_get(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index)
+{
+ LLVMValueRef element_ptr;
+ LLVMValueRef res;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
+ res = LLVMBuildLoad(builder, element_ptr, "");
+#ifdef DEBUG
+ lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index));
+#endif
+ return res;
+}
+
+
+void
+lp_build_pointer_set(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index,
+ LLVMValueRef value)
+{
+ LLVMValueRef element_ptr;
+ element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
+ LLVMBuildStore(builder, value, element_ptr);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
index 147336edb4b..eb87a8eee9e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
@@ -71,5 +71,46 @@ lp_build_struct_get(LLVMBuilderRef builder,
unsigned member,
const char *name);
+/**
+ * Get value pointer to an array element.
+ */
+LLVMValueRef
+lp_build_array_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index);
+
+/**
+ * Get the value of an array element.
+ */
+LLVMValueRef
+lp_build_array_get(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index);
+
+/**
+ * Set the value of an array element.
+ */
+void
+lp_build_array_set(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index,
+ LLVMValueRef value);
+
+/**
+ * Get the value of an array element.
+ */
+LLVMValueRef
+lp_build_pointer_get(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index);
+
+/**
+ * Set the value of an array element.
+ */
+void
+lp_build_pointer_set(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index,
+ LLVMValueRef value);
#endif /* !LP_BLD_STRUCT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 278c838eaca..4685a90e418 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -60,28 +60,130 @@ lp_build_broadcast(LLVMBuilderRef builder,
}
+/**
+ * Broadcast
+ */
LLVMValueRef
lp_build_broadcast_scalar(struct lp_build_context *bld,
LLVMValueRef scalar)
{
const struct lp_type type = bld->type;
+
+ assert(lp_check_elem_type(type, LLVMTypeOf(scalar)));
+
+ if (type.length == 1) {
+ return scalar;
+ }
+ else {
+ LLVMValueRef res;
+
+#if HAVE_LLVM >= 0x207
+ /* The shuffle vector is always made of int32 elements */
+ struct lp_type i32_vec_type = lp_type_int_vec(32);
+ i32_vec_type.length = type.length;
+
+ res = LLVMBuildInsertElement(bld->builder, bld->undef, scalar,
+ LLVMConstInt(LLVMInt32Type(), 0, 0), "");
+ res = LLVMBuildShuffleVector(bld->builder, res, bld->undef,
+ lp_build_const_int_vec(i32_vec_type, 0), "");
+#else
+ /* XXX: The above path provokes a bug in LLVM 2.6 */
+ unsigned i;
+ res = bld->undef;
+ for(i = 0; i < type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res = LLVMBuildInsertElement(bld->builder, res, scalar, index, "");
+ }
+#endif
+ return res;
+ }
+}
+
+
+/**
+ * Combined extract and broadcast (or a mere shuffle when the two types match)
+ */
+LLVMValueRef
+lp_build_extract_broadcast(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef vector,
+ LLVMValueRef index)
+{
+ LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef res;
- unsigned i;
- res = bld->undef;
- for(i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- res = LLVMBuildInsertElement(bld->builder, res, scalar, index, "");
+ assert(src_type.floating == dst_type.floating);
+ assert(src_type.width == dst_type.width);
+
+ assert(lp_check_value(src_type, vector));
+ assert(LLVMTypeOf(index) == i32t);
+
+ if (src_type.length == 1) {
+ if (dst_type.length == 1) {
+ /*
+ * Trivial scalar -> scalar.
+ */
+
+ res = vector;
+ }
+ else {
+ /*
+ * Broadcast scalar -> vector.
+ */
+
+ res = lp_build_broadcast(builder,
+ lp_build_vec_type(dst_type),
+ vector);
+ }
+ }
+ else {
+ if (dst_type.length == src_type.length) {
+ /*
+ * Special shuffle of the same size.
+ */
+
+ LLVMValueRef shuffle;
+ shuffle = lp_build_broadcast(builder,
+ LLVMVectorType(i32t, dst_type.length),
+ index);
+ res = LLVMBuildShuffleVector(builder, vector,
+ LLVMGetUndef(lp_build_vec_type(dst_type)),
+ shuffle, "");
+ }
+ else {
+ LLVMValueRef scalar;
+ scalar = LLVMBuildExtractElement(builder, vector, index, "");
+ if (dst_type.length == 1) {
+ /*
+ * Trivial extract scalar from vector.
+ */
+
+ res = scalar;
+ }
+ else {
+ /*
+ * General case of different sized vectors.
+ */
+
+ res = lp_build_broadcast(builder,
+ lp_build_vec_type(dst_type),
+ vector);
+ }
+ }
}
return res;
}
+/**
+ * Swizzle one channel into all other three channels.
+ */
LLVMValueRef
-lp_build_broadcast_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- unsigned channel)
+lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
+ LLVMValueRef a,
+ unsigned channel)
{
const struct lp_type type = bld->type;
const unsigned n = type.length;
@@ -93,7 +195,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
/* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
* using shuffles here actually causes worst results. More investigation is
* needed. */
- if (n <= 4) {
+ if (type.width >= 16) {
/*
* Shuffle.
*/
@@ -115,21 +217,25 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
* YY00 YY00 .... YY00
* YYYY YYYY .... YYYY <= output
*/
- struct lp_type type4 = type;
+ struct lp_type type4;
const char shifts[4][2] = {
{ 1, 2},
{-1, 2},
{ 1, -2},
{-1, -2}
};
- boolean cond[4];
unsigned i;
- memset(cond, 0, sizeof cond);
- cond[channel] = 1;
+ a = LLVMBuildAnd(bld->builder, a,
+ lp_build_const_mask_aos(type, 1 << channel), "");
- a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
+ /*
+ * Build a type where each element is an integer that cover the four
+ * channels.
+ */
+ type4 = type;
+ type4.floating = FALSE;
type4.width *= 4;
type4.length /= 4;
@@ -159,81 +265,248 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
-lp_build_swizzle1_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- const unsigned char swizzle[4])
+lp_build_swizzle_aos(struct lp_build_context *bld,
+ LLVMValueRef a,
+ const unsigned char swizzles[4])
{
- const unsigned n = bld->type.length;
+ const struct lp_type type = bld->type;
+ const unsigned n = type.length;
unsigned i, j;
- if(a == bld->undef || a == bld->zero || a == bld->one)
+ if (swizzles[0] == PIPE_SWIZZLE_RED &&
+ swizzles[1] == PIPE_SWIZZLE_GREEN &&
+ swizzles[2] == PIPE_SWIZZLE_BLUE &&
+ swizzles[3] == PIPE_SWIZZLE_ALPHA) {
return a;
+ }
- if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
- return lp_build_broadcast_aos(bld, a, swizzle[0]);
+ if (swizzles[0] == swizzles[1] &&
+ swizzles[1] == swizzles[2] &&
+ swizzles[2] == swizzles[3]) {
+ switch (swizzles[0]) {
+ case PIPE_SWIZZLE_RED:
+ case PIPE_SWIZZLE_GREEN:
+ case PIPE_SWIZZLE_BLUE:
+ case PIPE_SWIZZLE_ALPHA:
+ return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]);
+ case PIPE_SWIZZLE_ZERO:
+ return bld->zero;
+ case PIPE_SWIZZLE_ONE:
+ return bld->one;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+ }
- {
+ if (type.width >= 16) {
/*
* Shuffle.
*/
- LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type));
+ LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
+
+ memset(aux, 0, sizeof aux);
+
+ for(j = 0; j < n; j += 4) {
+ for(i = 0; i < 4; ++i) {
+ unsigned shuffle;
+ switch (swizzles[i]) {
+ default:
+ assert(0);
+ /* fall through */
+ case PIPE_SWIZZLE_RED:
+ case PIPE_SWIZZLE_GREEN:
+ case PIPE_SWIZZLE_BLUE:
+ case PIPE_SWIZZLE_ALPHA:
+ shuffle = j + swizzles[i];
+ break;
+ case PIPE_SWIZZLE_ZERO:
+ shuffle = type.length + 0;
+ if (!aux[0]) {
+ aux[0] = lp_build_const_elem(type, 0.0);
+ }
+ break;
+ case PIPE_SWIZZLE_ONE:
+ shuffle = type.length + 1;
+ if (!aux[1]) {
+ aux[1] = lp_build_const_elem(type, 1.0);
+ }
+ break;
+ }
+ shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
+ }
+ }
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
+ for (i = 0; i < n; ++i) {
+ if (!aux[i]) {
+ aux[i] = undef;
+ }
+ }
- return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
+ return LLVMBuildShuffleVector(bld->builder, a,
+ LLVMConstVector(aux, n),
+ LLVMConstVector(shuffles, n), "");
+ } else {
+ /*
+ * Bit mask and shifts.
+ *
+ * For example, this will convert BGRA to RGBA by doing
+ *
+ * rgba = (bgra & 0x00ff0000) >> 16
+ * | (bgra & 0xff00ff00)
+ * | (bgra & 0x000000ff) << 16
+ *
+ * This is necessary not only for faster cause, but because X86 backend
+ * will refuse shuffles of <4 x i8> vectors
+ */
+ LLVMValueRef res;
+ struct lp_type type4;
+ unsigned cond = 0;
+ unsigned chan;
+ int shift;
+
+ /*
+ * Start with a mixture of 1 and 0.
+ */
+ for (chan = 0; chan < 4; ++chan) {
+ if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
+ cond |= 1 << chan;
+ }
+ }
+ res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
+
+ /*
+ * Build a type where each element is an integer that cover the four
+ * channels.
+ */
+ type4 = type;
+ type4.floating = FALSE;
+ type4.width *= 4;
+ type4.length /= 4;
+
+ a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
+ res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), "");
+
+ /*
+ * Mask and shift the channels, trying to group as many channels in the
+ * same shift as possible
+ */
+ for (shift = -3; shift <= 3; ++shift) {
+ unsigned long long mask = 0;
+
+ assert(type4.width <= sizeof(mask)*8);
+
+ for (chan = 0; chan < 4; ++chan) {
+ /* FIXME: big endian */
+ if (swizzles[chan] < 4 &&
+ chan - swizzles[chan] == shift) {
+ mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
+ }
+ }
+
+ if (mask) {
+ LLVMValueRef masked;
+ LLVMValueRef shifted;
+
+ if (0)
+ debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
+
+ masked = LLVMBuildAnd(bld->builder, a,
+ lp_build_const_int_vec(type4, mask), "");
+ if (shift > 0) {
+ shifted = LLVMBuildShl(bld->builder, masked,
+ lp_build_const_int_vec(type4, shift*type.width), "");
+ } else if (shift < 0) {
+ shifted = LLVMBuildLShr(bld->builder, masked,
+ lp_build_const_int_vec(type4, -shift*type.width), "");
+ } else {
+ shifted = masked;
+ }
+
+ res = LLVMBuildOr(bld->builder, res, shifted, "");
+ }
+ }
+
+ return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), "");
}
}
+/**
+ * Extended swizzle of a single channel of a SoA vector.
+ *
+ * @param bld building context
+ * @param unswizzled array with the 4 unswizzled values
+ * @param swizzle one of the PIPE_SWIZZLE_*
+ *
+ * @return the swizzled value.
+ */
LLVMValueRef
-lp_build_swizzle2_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- LLVMValueRef b,
- const unsigned char swizzle[4])
+lp_build_swizzle_soa_channel(struct lp_build_context *bld,
+ const LLVMValueRef *unswizzled,
+ unsigned swizzle)
{
- const unsigned n = bld->type.length;
- unsigned i, j;
+ switch (swizzle) {
+ case PIPE_SWIZZLE_RED:
+ case PIPE_SWIZZLE_GREEN:
+ case PIPE_SWIZZLE_BLUE:
+ case PIPE_SWIZZLE_ALPHA:
+ return unswizzled[swizzle];
+ case PIPE_SWIZZLE_ZERO:
+ return bld->zero;
+ case PIPE_SWIZZLE_ONE:
+ return bld->one;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+}
- if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4)
- return lp_build_swizzle1_aos(bld, a, swizzle);
- if(a == b) {
- unsigned char swizzle1[4];
- swizzle1[0] = swizzle[0] % 4;
- swizzle1[1] = swizzle[1] % 4;
- swizzle1[2] = swizzle[2] % 4;
- swizzle1[3] = swizzle[3] % 4;
- return lp_build_swizzle1_aos(bld, a, swizzle1);
- }
+/**
+ * Extended swizzle of a SoA vector.
+ *
+ * @param bld building context
+ * @param unswizzled array with the 4 unswizzled values
+ * @param swizzles array of PIPE_SWIZZLE_*
+ * @param swizzled output swizzled values
+ */
+void
+lp_build_swizzle_soa(struct lp_build_context *bld,
+ const LLVMValueRef *unswizzled,
+ const unsigned char swizzles[4],
+ LLVMValueRef *swizzled)
+{
+ unsigned chan;
- if(swizzle[0] % 4 == 0 &&
- swizzle[1] % 4 == 1 &&
- swizzle[2] % 4 == 2 &&
- swizzle[3] % 4 == 3) {
- boolean cond[4];
- cond[0] = swizzle[0] / 4;
- cond[1] = swizzle[1] / 4;
- cond[2] = swizzle[2] / 4;
- cond[3] = swizzle[3] / 4;
- return lp_build_select_aos(bld, a, b, cond);
+ for (chan = 0; chan < 4; ++chan) {
+ swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
+ swizzles[chan]);
}
+}
- {
- /*
- * Shuffle.
- */
- LLVMTypeRef elem_type = LLVMInt32Type();
- LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0);
+/**
+ * Do an extended swizzle of a SoA vector inplace.
+ *
+ * @param bld building context
+ * @param values intput/output array with the 4 values
+ * @param swizzles array of PIPE_SWIZZLE_*
+ */
+void
+lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
+ LLVMValueRef *values,
+ const unsigned char swizzles[4])
+{
+ LLVMValueRef unswizzled[4];
+ unsigned chan;
- return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
+ for (chan = 0; chan < 4; ++chan) {
+ unswizzled[chan] = values[chan];
}
-}
-
+ lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
index 138ca620e63..fdea8442aef 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -55,12 +55,20 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
LLVMValueRef scalar);
+LLVMValueRef
+lp_build_extract_broadcast(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef vector,
+ LLVMValueRef index);
+
+
/**
* Broadcast one channel of a vector composed of arrays of XYZW structures into
* all four channel.
*/
LLVMValueRef
-lp_build_broadcast_aos(struct lp_build_context *bld,
+lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
LLVMValueRef a,
unsigned channel);
@@ -68,24 +76,31 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
/**
* Swizzle a vector consisting of an array of XYZW structs.
*
- * @param swizzle is the in [0,4[ range.
+ * @param swizzles is the in [0,4[ range.
*/
LLVMValueRef
-lp_build_swizzle1_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- const unsigned char swizzle[4]);
+lp_build_swizzle_aos(struct lp_build_context *bld,
+ LLVMValueRef a,
+ const unsigned char swizzles[4]);
-/**
- * Swizzle two vector consisting of an array of XYZW structs.
- *
- * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b.
- */
LLVMValueRef
-lp_build_swizzle2_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- LLVMValueRef b,
- const unsigned char swizzle[4]);
+lp_build_swizzle_soa_channel(struct lp_build_context *bld,
+ const LLVMValueRef *unswizzled,
+ unsigned swizzle);
+
+
+void
+lp_build_swizzle_soa(struct lp_build_context *bld,
+ const LLVMValueRef *unswizzled,
+ const unsigned char swizzles[4],
+ LLVMValueRef *swizzled);
+
+
+void
+lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
+ LLVMValueRef *values,
+ const unsigned char swizzles[4]);
#endif /* !LP_BLD_SWIZZLE_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 2eac5da6c69..97318b3456c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -45,6 +45,15 @@ struct lp_build_context;
struct lp_build_mask_context;
+enum lp_build_tex_modifier {
+ LP_BLD_TEX_MODIFIER_NONE = 0,
+ LP_BLD_TEX_MODIFIER_PROJECTED,
+ LP_BLD_TEX_MODIFIER_LOD_BIAS,
+ LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
+ LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
+};
+
+
/**
* Sampler code generation interface.
*
@@ -59,17 +68,34 @@ struct lp_build_sampler_soa
(*destroy)( struct lp_build_sampler_soa *sampler );
void
- (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler,
+ (*emit_fetch_texel)( const struct lp_build_sampler_soa *sampler,
LLVMBuilderRef builder,
struct lp_type type,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- LLVMValueRef lodbias,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
LLVMValueRef *texel);
};
+struct lp_build_sampler_aos
+{
+ LLVMValueRef
+ (*emit_fetch_texel)( struct lp_build_sampler_aos *sampler,
+ struct lp_build_context *bld,
+ unsigned target, /* TGSI_TEXTURE_* */
+ unsigned unit,
+ LLVMValueRef coords,
+ LLVMValueRef ddx,
+ LLVMValueRef ddy,
+ enum lp_build_tex_modifier modifier);
+};
+
+
void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
@@ -80,7 +106,19 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const LLVMValueRef (*inputs)[4],
LLVMValueRef (*outputs)[4],
struct lp_build_sampler_soa *sampler,
- struct tgsi_shader_info *info);
+ const struct tgsi_shader_info *info);
+
+
+void
+lp_build_tgsi_aos(LLVMBuilderRef builder,
+ const struct tgsi_token *tokens,
+ struct lp_type type,
+ const unsigned char swizzles[4],
+ LLVMValueRef consts_ptr,
+ const LLVMValueRef *inputs,
+ LLVMValueRef *outputs,
+ struct lp_build_sampler_aos *sampler,
+ const struct tgsi_shader_info *info);
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
new file mode 100644
index 00000000000..d5f963be58d
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -0,0 +1,1176 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * TGSI to LLVM IR translation -- AoS.
+ *
+ * FIXME:
+ * - No control flow support: the existing control flow code should be factored
+ * out into from the SoA code into a common module and shared.
+ * - No derivatives. Derivate logic should be pluggable, just like the samplers.
+ *
+ * @author Jose Fonseca <[email protected]>
+ */
+
+#include "pipe/p_config.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_scan.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_quad.h"
+#include "lp_bld_tgsi.h"
+#include "lp_bld_limits.h"
+#include "lp_bld_debug.h"
+
+
+#define LP_MAX_INSTRUCTIONS 256
+
+
+struct lp_build_tgsi_aos_context
+{
+ struct lp_build_context base;
+
+ /* Builder for integer masks and indices */
+ struct lp_build_context int_bld;
+
+ /*
+ * AoS swizzle used:
+ * - swizzles[0] = red index
+ * - swizzles[1] = green index
+ * - swizzles[2] = blue index
+ * - swizzles[3] = alpha index
+ */
+ unsigned char swizzles[4];
+ unsigned char inv_swizzles[4];
+
+ LLVMValueRef consts_ptr;
+ const LLVMValueRef *inputs;
+ LLVMValueRef *outputs;
+
+ struct lp_build_sampler_aos *sampler;
+
+ LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
+ LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
+ LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
+ LLVMValueRef preds[LP_MAX_TGSI_PREDS];
+
+ /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+ * set in the indirect_files field.
+ * The temps[] array above is unused then.
+ */
+ LLVMValueRef temps_array;
+
+ /** bitmask indicating which register files are accessed indirectly */
+ unsigned indirect_files;
+
+ struct tgsi_full_instruction *instructions;
+ uint max_instructions;
+};
+
+
+/**
+ * Wrapper around lp_build_swizzle_aos which translates swizzles to another
+ * ordering.
+ */
+static LLVMValueRef
+swizzle_aos(struct lp_build_tgsi_aos_context *bld,
+ LLVMValueRef a,
+ unsigned swizzle_x,
+ unsigned swizzle_y,
+ unsigned swizzle_z,
+ unsigned swizzle_w)
+{
+ unsigned char swizzles[4];
+
+ assert(swizzle_x < 4);
+ assert(swizzle_y < 4);
+ assert(swizzle_z < 4);
+ assert(swizzle_w < 4);
+
+ swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
+ swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
+ swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
+ swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
+
+ return lp_build_swizzle_aos(&bld->base, a, swizzles);
+}
+
+
+static LLVMValueRef
+swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
+ LLVMValueRef a,
+ unsigned chan)
+{
+ chan = bld->swizzles[chan];
+ return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
+}
+
+
+/**
+ * Register fetch.
+ */
+static LLVMValueRef
+emit_fetch(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned src_op)
+{
+ struct lp_type type = bld->base.type;
+ const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+ LLVMValueRef res;
+ unsigned chan;
+
+ assert(!reg->Register.Indirect);
+
+ /*
+ * Fetch the from the register file.
+ */
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_CONSTANT:
+ /*
+ * Get the constants components
+ */
+
+ res = bld->base.undef;
+ for (chan = 0; chan < 4; ++chan) {
+ LLVMValueRef index;
+ LLVMValueRef scalar_ptr;
+ LLVMValueRef scalar;
+ LLVMValueRef swizzle;
+
+ index = LLVMConstInt(LLVMInt32Type(),
+ reg->Register.Index*4 + chan,
+ 0);
+
+ scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
+ &index, 1, "");
+
+ scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
+ lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
+
+ /*
+ * NOTE: constants array is always assumed to be RGBA
+ */
+
+ swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0);
+
+ res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, "");
+ }
+
+ /*
+ * Broadcast the first quaternion to all others.
+ *
+ * XXX: could be factored into a reusable function.
+ */
+
+ if (type.length > 4) {
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ for (chan = 0; chan < 4; ++chan) {
+ shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ }
+
+ for (i = 4; i < type.length; ++i) {
+ shuffles[i] = shuffles[i % 4];
+ }
+
+ res = LLVMBuildShuffleVector(bld->base.builder,
+ res, bld->base.undef,
+ LLVMConstVector(shuffles, type.length),
+ "");
+ }
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ res = bld->immediates[reg->Register.Index];
+ assert(res);
+ break;
+
+ case TGSI_FILE_INPUT:
+ res = bld->inputs[reg->Register.Index];
+ assert(res);
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ {
+ LLVMValueRef temp_ptr;
+ temp_ptr = bld->temps[reg->Register.Index];
+ res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
+ if (!res)
+ return bld->base.undef;
+ }
+ break;
+
+ default:
+ assert(0 && "invalid src register in emit_fetch()");
+ return bld->base.undef;
+ }
+
+ /*
+ * Apply sign modifier.
+ */
+
+ if (reg->Register.Absolute) {
+ res = lp_build_abs(&bld->base, res);
+ }
+
+ if(reg->Register.Negate) {
+ res = lp_build_negate(&bld->base, res);
+ }
+
+ /*
+ * Swizzle the argument
+ */
+
+ res = swizzle_aos(bld, res,
+ reg->Register.SwizzleX,
+ reg->Register.SwizzleY,
+ reg->Register.SwizzleZ,
+ reg->Register.SwizzleW);
+
+ return res;
+}
+
+
+/**
+ * Register store.
+ */
+static void
+emit_store(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ LLVMValueRef value)
+{
+ const struct tgsi_full_dst_register *reg = &inst->Dst[index];
+ LLVMValueRef mask = NULL;
+ LLVMValueRef ptr;
+
+ /*
+ * Saturate the value
+ */
+
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ value = lp_build_max(&bld->base, value, bld->base.zero);
+ value = lp_build_min(&bld->base, value, bld->base.one);
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
+ value = lp_build_min(&bld->base, value, bld->base.one);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ /*
+ * Translate the register file
+ */
+
+ assert(!reg->Register.Indirect);
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ ptr = bld->outputs[reg->Register.Index];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ ptr = bld->temps[reg->Register.Index];
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ ptr = bld->addr[reg->Indirect.Index];
+ break;
+
+ case TGSI_FILE_PREDICATE:
+ ptr = bld->preds[reg->Register.Index];
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+
+ /*
+ * Predicate
+ */
+
+ if (inst->Instruction.Predicate) {
+ LLVMValueRef pred;
+
+ assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
+
+ pred = LLVMBuildLoad(bld->base.builder,
+ bld->preds[inst->Predicate.Index], "");
+
+ /*
+ * Convert the value to an integer mask.
+ */
+ pred = lp_build_compare(bld->base.builder,
+ bld->base.type,
+ PIPE_FUNC_NOTEQUAL,
+ pred,
+ bld->base.zero);
+
+ if (inst->Predicate.Negate) {
+ pred = LLVMBuildNot(bld->base.builder, pred, "");
+ }
+
+ pred = swizzle_aos(bld, pred,
+ inst->Predicate.SwizzleX,
+ inst->Predicate.SwizzleY,
+ inst->Predicate.SwizzleZ,
+ inst->Predicate.SwizzleW);
+
+ if (mask) {
+ mask = LLVMBuildAnd(bld->base.builder, mask, pred, "");
+ } else {
+ mask = pred;
+ }
+ }
+
+ /*
+ * Writemask
+ */
+
+ if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
+ LLVMValueRef writemask;
+
+ writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask);
+
+ if (mask) {
+ mask = LLVMBuildAnd(bld->base.builder, mask, writemask, "");
+ } else {
+ mask = writemask;
+ }
+ }
+
+ if (mask) {
+ LLVMValueRef orig_value;
+
+ orig_value = LLVMBuildLoad(bld->base.builder, ptr, "");
+ value = lp_build_select(&bld->base,
+ mask, value, orig_value);
+ }
+
+ LLVMBuildStore(bld->base.builder, value, ptr);
+}
+
+
+/**
+ * High-level instruction translators.
+ */
+
+static LLVMValueRef
+emit_tex(struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ enum lp_build_tex_modifier modifier)
+{
+ unsigned target;
+ unsigned unit;
+ LLVMValueRef coords;
+ LLVMValueRef ddx;
+ LLVMValueRef ddy;
+
+ if (!bld->sampler) {
+ _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
+ return bld->base.undef;
+ }
+
+ target = inst->Texture.Texture;
+
+ coords = emit_fetch( bld, inst, 0 );
+
+ if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+ ddx = emit_fetch( bld, inst, 1 );
+ ddy = emit_fetch( bld, inst, 2 );
+ unit = inst->Src[3].Register.Index;
+ } else {
+#if 0
+ ddx = lp_build_ddx( &bld->base, coords );
+ ddy = lp_build_ddy( &bld->base, coords );
+#else
+ /* TODO */
+ ddx = bld->base.one;
+ ddy = bld->base.one;
+#endif
+ unit = inst->Src[1].Register.Index;
+ }
+
+ return bld->sampler->emit_fetch_texel(bld->sampler,
+ &bld->base,
+ target, unit,
+ coords, ddx, ddy,
+ modifier);
+}
+
+
+static void
+emit_declaration(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_declaration *decl)
+{
+ LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
+
+ unsigned first = decl->Range.First;
+ unsigned last = decl->Range.Last;
+ unsigned idx;
+
+ for (idx = first; idx <= last; ++idx) {
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ assert(idx < LP_MAX_TGSI_TEMPS);
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
+ last + 1, 0);
+ bld->temps_array = lp_build_array_alloca(bld->base.builder,
+ vec_type, array_size, "");
+ } else {
+ bld->temps[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ }
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ bld->outputs[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ assert(idx < LP_MAX_TGSI_ADDRS);
+ bld->addr[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ case TGSI_FILE_PREDICATE:
+ assert(idx < LP_MAX_TGSI_PREDS);
+ bld->preds[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ default:
+ /* don't need to declare other vars */
+ break;
+ }
+ }
+}
+
+
+/**
+ * Emit LLVM for one TGSI instruction.
+ * \param return TRUE for success, FALSE otherwise
+ */
+static boolean
+emit_instruction(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info,
+ int *pc)
+{
+ LLVMValueRef src0, src1, src2;
+ LLVMValueRef tmp0, tmp1;
+ LLVMValueRef dst0;
+
+ /*
+ * Stores and write masks are handled in a general fashion after the long
+ * instruction opcode switch statement.
+ *
+ * Although not stricitly necessary, we avoid generating instructions for
+ * channels which won't be stored, in cases where's that easy. For some
+ * complex instructions, like texture sampling, it is more convenient to
+ * assume a full writemask and then let LLVM optimization passes eliminate
+ * redundant code.
+ */
+
+ (*pc)++;
+
+ assert(info->num_dst <= 1);
+ if (info->num_dst) {
+ dst0 = bld->base.undef;
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_floor(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_MOV:
+ dst0 = emit_fetch(bld, inst, 0);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ return FALSE;
+
+ case TGSI_OPCODE_RCP:
+ /* TGSI_OPCODE_RECIP */
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_rcp(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ /* TGSI_OPCODE_RECIPSQRT */
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_abs(&bld->base, src0);
+ dst0 = lp_build_rsqrt(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_EXP:
+ return FALSE;
+
+ case TGSI_OPCODE_LOG:
+ return FALSE;
+
+ case TGSI_OPCODE_MUL:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_mul(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_ADD:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_add(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_DP3:
+ /* TGSI_OPCODE_DOT3 */
+ return FALSE;
+
+ case TGSI_OPCODE_DP4:
+ /* TGSI_OPCODE_DOT4 */
+ return FALSE;
+
+ case TGSI_OPCODE_DST:
+ return FALSE;
+
+ case TGSI_OPCODE_MIN:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_max(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_MAX:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_max(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_SLT:
+ /* TGSI_OPCODE_SETLT */
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SGE:
+ /* TGSI_OPCODE_SETGE */
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_MAD:
+ /* TGSI_OPCODE_MADD */
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_mul(&bld->base, src0, src1);
+ dst0 = lp_build_add(&bld->base, tmp0, src2);
+ break;
+
+ case TGSI_OPCODE_SUB:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_sub(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_LRP:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_sub(&bld->base, src1, src2);
+ tmp0 = lp_build_mul(&bld->base, src0, tmp0);
+ dst0 = lp_build_add(&bld->base, tmp0, src2);
+ break;
+
+ case TGSI_OPCODE_CND:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp1 = lp_build_const_vec(bld->base.type, 0.5);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
+ dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
+ break;
+
+ case TGSI_OPCODE_DP2A:
+ return FALSE;
+
+ case TGSI_OPCODE_FRC:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_floor(&bld->base, src0);
+ dst0 = lp_build_sub(&bld->base, src0, tmp0);
+ break;
+
+ case TGSI_OPCODE_CLAMP:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_max(&bld->base, src0, src1);
+ dst0 = lp_build_min(&bld->base, tmp0, src2);
+ break;
+
+ case TGSI_OPCODE_FLR:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_floor(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_ROUND:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_round(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_EX2:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_exp2(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_LG2:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_log2(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_POW:
+ src0 = emit_fetch(bld, inst, 0);
+ src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
+ src1 = emit_fetch(bld, inst, 1);
+ src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
+ dst0 = lp_build_pow(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ return FALSE;
+
+ case TGSI_OPCODE_ABS:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_abs(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_RCC:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+
+ case TGSI_OPCODE_DPH:
+ return FALSE;
+
+ case TGSI_OPCODE_COS:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_cos(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_DDX:
+ return FALSE;
+
+ case TGSI_OPCODE_DDY:
+ return FALSE;
+
+ case TGSI_OPCODE_KILP:
+ /* predicated kill */
+ return FALSE;
+
+ case TGSI_OPCODE_KIL:
+ /* conditional kill */
+ return FALSE;
+
+ case TGSI_OPCODE_PK2H:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_PK2US:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_PK4B:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_PK4UB:
+ return FALSE;
+
+ case TGSI_OPCODE_RFL:
+ return FALSE;
+
+ case TGSI_OPCODE_SEQ:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SFL:
+ dst0 = bld->base.zero;
+ break;
+
+ case TGSI_OPCODE_SGT:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SIN:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_sin(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_SLE:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SNE:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_STR:
+ dst0 = bld->base.one;
+ break;
+
+ case TGSI_OPCODE_TEX:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
+ break;
+
+ case TGSI_OPCODE_TXD:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
+ break;
+
+ case TGSI_OPCODE_UP2H:
+ /* deprecated */
+ assert (0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_UP2US:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_UP4B:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_UP4UB:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_X2D:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ARA:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ARR:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_round(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_BRA:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_CAL:
+ return FALSE;
+
+ case TGSI_OPCODE_RET:
+ return FALSE;
+
+ case TGSI_OPCODE_END:
+ *pc = -1;
+ break;
+
+ case TGSI_OPCODE_SSG:
+ /* TGSI_OPCODE_SGN */
+ tmp0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_sgn(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_CMP:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
+ dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ return FALSE;
+
+ case TGSI_OPCODE_TXB:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
+ break;
+
+ case TGSI_OPCODE_NRM:
+ /* fall-through */
+ case TGSI_OPCODE_NRM4:
+ return FALSE;
+
+ case TGSI_OPCODE_DIV:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_DP2:
+ return FALSE;
+
+ case TGSI_OPCODE_TXL:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
+ break;
+
+ case TGSI_OPCODE_TXP:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
+ break;
+
+ case TGSI_OPCODE_BRK:
+ return FALSE;
+
+ case TGSI_OPCODE_IF:
+ return FALSE;
+
+ case TGSI_OPCODE_BGNLOOP:
+ return FALSE;
+
+ case TGSI_OPCODE_BGNSUB:
+ return FALSE;
+
+ case TGSI_OPCODE_ELSE:
+ return FALSE;
+
+ case TGSI_OPCODE_ENDIF:
+ return FALSE;
+
+ case TGSI_OPCODE_ENDLOOP:
+ return FALSE;
+
+ case TGSI_OPCODE_ENDSUB:
+ return FALSE;
+
+ case TGSI_OPCODE_PUSHA:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_POPA:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_CEIL:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_ceil(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_I2F:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_NOT:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_trunc(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_SHL:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ISHR:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_AND:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_OR:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_MOD:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_XOR:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_SAD:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_TXF:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_CONT:
+ return FALSE;
+
+ case TGSI_OPCODE_EMIT:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ENDPRIM:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
+ default:
+ return FALSE;
+ }
+
+ if (info->num_dst) {
+ emit_store(bld, inst, 0, dst0);
+ }
+
+ return TRUE;
+}
+
+
+void
+lp_build_tgsi_aos(LLVMBuilderRef builder,
+ const struct tgsi_token *tokens,
+ struct lp_type type,
+ const unsigned char swizzles[4],
+ LLVMValueRef consts_ptr,
+ const LLVMValueRef *inputs,
+ LLVMValueRef *outputs,
+ struct lp_build_sampler_aos *sampler,
+ const struct tgsi_shader_info *info)
+{
+ struct lp_build_tgsi_aos_context bld;
+ struct tgsi_parse_context parse;
+ uint num_immediates = 0;
+ uint num_instructions = 0;
+ unsigned chan;
+ int pc = 0;
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, builder, type);
+ lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
+
+ for (chan = 0; chan < 4; ++chan) {
+ bld.swizzles[chan] = swizzles[chan];
+ bld.inv_swizzles[swizzles[chan]] = chan;
+ }
+
+ bld.inputs = inputs;
+ bld.outputs = outputs;
+ bld.consts_ptr = consts_ptr;
+ bld.sampler = sampler;
+ bld.indirect_files = info->indirect_files;
+ bld.instructions = (struct tgsi_full_instruction *)
+ MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
+ bld.max_instructions = LP_MAX_INSTRUCTIONS;
+
+ if (!bld.instructions) {
+ return;
+ }
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch(parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* Inputs already interpolated */
+ emit_declaration(&bld, &parse.FullToken.FullDeclaration);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ /* save expanded instruction */
+ if (num_instructions == bld.max_instructions) {
+ struct tgsi_full_instruction *instructions;
+ instructions = REALLOC(bld.instructions,
+ bld.max_instructions
+ * sizeof(struct tgsi_full_instruction),
+ (bld.max_instructions + LP_MAX_INSTRUCTIONS)
+ * sizeof(struct tgsi_full_instruction));
+ if (!instructions) {
+ break;
+ }
+ bld.instructions = instructions;
+ bld.max_instructions += LP_MAX_INSTRUCTIONS;
+ }
+
+ memcpy(bld.instructions + num_instructions,
+ &parse.FullToken.FullInstruction,
+ sizeof(bld.instructions[0]));
+
+ num_instructions++;
+ }
+
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* simply copy the immediate values into the next immediates[] slot */
+ {
+ const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ float imm[4];
+ assert(size <= 4);
+ assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
+ for (chan = 0; chan < 4; ++chan) {
+ imm[chan] = 0.0f;
+ }
+ for (chan = 0; chan < size; ++chan) {
+ unsigned swizzle = bld.swizzles[chan];
+ imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
+ }
+ bld.immediates[num_immediates] =
+ lp_build_const_aos(type,
+ imm[0], imm[1], imm[2], imm[3],
+ NULL);
+ num_immediates++;
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ while (pc != -1) {
+ struct tgsi_full_instruction *instr = bld.instructions + pc;
+ const struct tgsi_opcode_info *opcode_info =
+ tgsi_get_opcode_info(instr->Instruction.Opcode);
+ if (!emit_instruction(&bld, instr, opcode_info, &pc))
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ opcode_info->mnemonic);
+ }
+
+ if (0) {
+ LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(block);
+ debug_printf("11111111111111111111111111111 \n");
+ tgsi_dump(tokens, 0);
+ lp_debug_dump_value(function);
+ debug_printf("2222222222222222222222222222 \n");
+ }
+ tgsi_parse_free(&parse);
+
+ if (0) {
+ LLVMModuleRef module = LLVMGetGlobalParent(
+ LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
+ LLVMDumpModule(module);
+ }
+
+ FREE(bld.instructions);
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index d3c769e28b8..441aebae298 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -45,22 +45,21 @@
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_scan.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
+#include "lp_bld_bitarit.h"
+#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
+#include "lp_bld_quad.h"
#include "lp_bld_tgsi.h"
+#include "lp_bld_limits.h"
#include "lp_bld_debug.h"
-#define LP_MAX_TEMPS 256
-#define LP_MAX_IMMEDIATES 256
-
-
#define FOR_EACH_CHANNEL( CHAN )\
for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
@@ -78,13 +77,10 @@
#define CHAN_Y 1
#define CHAN_Z 2
#define CHAN_W 3
+#define NUM_CHANNELS 4
-#define QUAD_TOP_LEFT 0
-#define QUAD_TOP_RIGHT 1
-#define QUAD_BOTTOM_LEFT 2
-#define QUAD_BOTTOM_RIGHT 3
+#define LP_MAX_INSTRUCTIONS 256
-#define LP_TGSI_MAX_NESTING 16
struct lp_exec_mask {
struct lp_build_context *bld;
@@ -93,22 +89,28 @@ struct lp_exec_mask {
LLVMTypeRef int_vec_type;
- LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
+ LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
int cond_stack_size;
LLVMValueRef cond_mask;
- LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
- int break_stack_size;
- LLVMValueRef break_mask;
-
- LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
- int cont_stack_size;
+ LLVMBasicBlockRef loop_block;
LLVMValueRef cont_mask;
-
- LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
+ LLVMValueRef break_mask;
+ LLVMValueRef break_var;
+ struct {
+ LLVMBasicBlockRef loop_block;
+ LLVMValueRef cont_mask;
+ LLVMValueRef break_mask;
+ LLVMValueRef break_var;
+ } loop_stack[LP_MAX_TGSI_NESTING];
int loop_stack_size;
- LLVMBasicBlockRef loop_block;
+ LLVMValueRef ret_mask;
+ struct {
+ int pc;
+ LLVMValueRef ret_mask;
+ } call_stack[LP_MAX_TGSI_NESTING];
+ int call_stack_size;
LLVMValueRef exec_mask;
};
@@ -117,48 +119,36 @@ struct lp_build_tgsi_soa_context
{
struct lp_build_context base;
+ /* Builder for integer masks and indices */
+ struct lp_build_context uint_bld;
+
LLVMValueRef consts_ptr;
const LLVMValueRef *pos;
const LLVMValueRef (*inputs)[NUM_CHANNELS];
LLVMValueRef (*outputs)[NUM_CHANNELS];
- struct lp_build_sampler_soa *sampler;
+ const struct lp_build_sampler_soa *sampler;
- LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
- LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
- LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS];
+ LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
+ LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
+ LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
+ LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
- /* we allocate an array of temps if we have indirect
- * addressing and then the temps above is unused */
+ /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+ * set in the indirect_files field.
+ * The temps[] array above is unused then.
+ */
LLVMValueRef temps_array;
- boolean has_indirect_addressing;
+
+ const struct tgsi_shader_info *info;
+ /** bitmask indicating which register files are accessed indirectly */
+ unsigned indirect_files;
struct lp_build_mask_context *mask;
struct lp_exec_mask exec_mask;
-};
-
-static const unsigned char
-swizzle_left[4] = {
- QUAD_TOP_LEFT, QUAD_TOP_LEFT,
- QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
-};
-
-static const unsigned char
-swizzle_right[4] = {
- QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
- QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
-};
-static const unsigned char
-swizzle_top[4] = {
- QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
- QUAD_TOP_LEFT, QUAD_TOP_RIGHT
-};
-
-static const unsigned char
-swizzle_bottom[4] = {
- QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
- QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
+ struct tgsi_full_instruction *instructions;
+ uint max_instructions;
};
static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
@@ -167,10 +157,11 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context
mask->has_mask = FALSE;
mask->cond_stack_size = 0;
mask->loop_stack_size = 0;
- mask->break_stack_size = 0;
- mask->cont_stack_size = 0;
+ mask->call_stack_size = 0;
mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
+ mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
+ LLVMConstAllOnes(mask->int_vec_type);
}
static void lp_exec_mask_update(struct lp_exec_mask *mask)
@@ -190,33 +181,47 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask)
} else
mask->exec_mask = mask->cond_mask;
+ if (mask->call_stack_size) {
+ mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->exec_mask,
+ mask->ret_mask,
+ "callmask");
+ }
mask->has_mask = (mask->cond_stack_size > 0 ||
- mask->loop_stack_size > 0);
+ mask->loop_stack_size > 0 ||
+ mask->call_stack_size > 0);
}
static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
LLVMValueRef val)
{
+ assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
+ if (mask->cond_stack_size == 0) {
+ assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
+ }
mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
- mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
- mask->int_vec_type, "");
-
+ assert(LLVMTypeOf(val) == mask->int_vec_type);
+ mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cond_mask,
+ val,
+ "");
lp_exec_mask_update(mask);
}
static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
{
- LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
- LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
- mask->cond_mask, "");
-
- /* means that we didn't have any mask before and that
- * we were fully enabled */
- if (mask->cond_stack_size <= 1) {
- prev_mask = LLVMConstAllOnes(mask->int_vec_type);
+ LLVMValueRef prev_mask;
+ LLVMValueRef inv_mask;
+
+ assert(mask->cond_stack_size);
+ prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
+ if (mask->cond_stack_size == 1) {
+ assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
}
+ inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
+
mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
inv_mask,
prev_mask, "");
@@ -225,27 +230,37 @@ static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
{
+ assert(mask->cond_stack_size);
mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
lp_exec_mask_update(mask);
}
static void lp_exec_bgnloop(struct lp_exec_mask *mask)
{
+ if (mask->loop_stack_size == 0) {
+ assert(mask->loop_block == NULL);
+ assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
+ assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
+ assert(mask->break_var == NULL);
+ }
- if (mask->cont_stack_size == 0)
- mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
- if (mask->break_stack_size == 0)
- mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
- if (mask->cond_stack_size == 0)
- mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
+ assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
+
+ mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
+ mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
+ mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
+ mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
+ ++mask->loop_stack_size;
+
+ mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
+ LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
- mask->break_stack[mask->break_stack_size++] = mask->break_mask;
- mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
- mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
LLVMBuildBr(mask->bld->builder, mask->loop_block);
LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
+ mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
+
lp_exec_mask_update(mask);
}
@@ -255,16 +270,9 @@ static void lp_exec_break(struct lp_exec_mask *mask)
mask->exec_mask,
"break");
- /* mask->break_stack_size > 1 implies that we encountered a break
- * statemant already and if that's the case we want to make sure
- * our mask is a combination of the previous break and the current
- * execution mask */
- if (mask->break_stack_size > 1) {
- mask->break_mask = LLVMBuildAnd(mask->bld->builder,
- mask->break_mask,
- exec_mask, "break_full");
- } else
- mask->break_mask = exec_mask;
+ mask->break_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->break_mask,
+ exec_mask, "break_full");
lp_exec_mask_update(mask);
}
@@ -275,12 +283,9 @@ static void lp_exec_continue(struct lp_exec_mask *mask)
mask->exec_mask,
"");
- if (mask->cont_stack_size > 1) {
- mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
- mask->cont_mask,
- exec_mask, "");
- } else
- mask->cont_mask = exec_mask;
+ mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cont_mask,
+ exec_mask, "");
lp_exec_mask_update(mask);
}
@@ -295,11 +300,24 @@ static void lp_exec_endloop(struct lp_exec_mask *mask)
assert(mask->break_mask);
+ /*
+ * Restore the cont_mask, but don't pop
+ */
+ assert(mask->loop_stack_size);
+ mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
+ lp_exec_mask_update(mask);
+
+ /*
+ * Unlike the continue mask, the break_mask must be preserved across loop
+ * iterations
+ */
+ LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
+
/* i1cond = (mask == 0) */
i1cond = LLVMBuildICmp(
mask->bld->builder,
LLVMIntNE,
- LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
+ LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
LLVMConstNull(reg_type), "");
endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
@@ -309,15 +327,12 @@ static void lp_exec_endloop(struct lp_exec_mask *mask)
LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
- mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
- /* pop the cont mask */
- if (mask->cont_stack_size) {
- mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
- }
- /* pop the break mask */
- if (mask->break_stack_size) {
- mask->break_mask = mask->break_stack[--mask->break_stack_size];
- }
+ assert(mask->loop_stack_size);
+ --mask->loop_stack_size;
+ mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
+ mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
+ mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
+ mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
lp_exec_mask_update(mask);
}
@@ -328,15 +343,25 @@ static void lp_exec_endloop(struct lp_exec_mask *mask)
* (0 means don't store this bit, 1 means do store).
*/
static void lp_exec_mask_store(struct lp_exec_mask *mask,
+ LLVMValueRef pred,
LLVMValueRef val,
LLVMValueRef dst)
{
+ /* Mix the predicate and execution mask */
if (mask->has_mask) {
+ if (pred) {
+ pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
+ } else {
+ pred = mask->exec_mask;
+ }
+ }
+
+ if (pred) {
LLVMValueRef real_val, dst_val;
dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
real_val = lp_build_select(mask->bld,
- mask->exec_mask,
+ pred,
val, dst_val);
LLVMBuildStore(mask->bld->builder, real_val, dst);
@@ -344,44 +369,149 @@ static void lp_exec_mask_store(struct lp_exec_mask *mask,
LLVMBuildStore(mask->bld->builder, val, dst);
}
+static void lp_exec_mask_call(struct lp_exec_mask *mask,
+ int func,
+ int *pc)
+{
+ assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
+ mask->call_stack[mask->call_stack_size].pc = *pc;
+ mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
+ mask->call_stack_size++;
+ *pc = func;
+}
-static LLVMValueRef
-emit_ddx(struct lp_build_tgsi_soa_context *bld,
- LLVMValueRef src)
+static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
{
- LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
- LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
- return lp_build_sub(&bld->base, src_right, src_left);
+ LLVMValueRef exec_mask;
+
+ if (mask->call_stack_size == 0) {
+ /* returning from main() */
+ *pc = -1;
+ return;
+ }
+ exec_mask = LLVMBuildNot(mask->bld->builder,
+ mask->exec_mask,
+ "ret");
+
+ mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->ret_mask,
+ exec_mask, "ret_full");
+
+ lp_exec_mask_update(mask);
}
+static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
+{
+}
-static LLVMValueRef
-emit_ddy(struct lp_build_tgsi_soa_context *bld,
- LLVMValueRef src)
+static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
{
- LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
- LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
- return lp_build_sub(&bld->base, src_top, src_bottom);
+ assert(mask->call_stack_size);
+ mask->call_stack_size--;
+ *pc = mask->call_stack[mask->call_stack_size].pc;
+ mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
+ lp_exec_mask_update(mask);
}
+
+/**
+ * Return pointer to a temporary register channel (src or dest).
+ * Note that indirect addressing cannot be handled here.
+ * \param index which temporary register
+ * \param chan which channel of the temp register.
+ */
static LLVMValueRef
get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
unsigned index,
- unsigned swizzle,
- boolean is_indirect,
- LLVMValueRef addr)
+ unsigned chan)
{
- if (!bld->has_indirect_addressing) {
- return bld->temps[index][swizzle];
- } else {
- LLVMValueRef lindex =
- LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
- if (is_indirect)
- lindex = lp_build_add(&bld->base, lindex, addr);
+ assert(chan < 4);
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
}
+ else {
+ return bld->temps[index][chan];
+ }
+}
+
+
+/**
+ * Gather vector.
+ * XXX the lp_build_gather() function should be capable of doing this
+ * with a little work.
+ */
+static LLVMValueRef
+build_gather(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef base_ptr,
+ LLVMValueRef indexes)
+{
+ LLVMValueRef res = bld->base.undef;
+ unsigned i;
+
+ /*
+ * Loop over elements of index_vec, load scalar value, insert it into 'res'.
+ */
+ for (i = 0; i < bld->base.type.length; i++) {
+ LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
+ indexes, ii, "");
+ LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
+ &index, 1, "");
+ LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
+ res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
+ }
+
+ return res;
}
+
+/**
+ * Read the current value of the ADDR register, convert the floats to
+ * ints, multiply by four and return the vector of offsets.
+ * The offsets will be used to index into the constant buffer or
+ * temporary register file.
+ */
+static LLVMValueRef
+get_indirect_index(struct lp_build_tgsi_soa_context *bld,
+ unsigned reg_file, unsigned reg_index,
+ const struct tgsi_src_register *indirect_reg)
+{
+ struct lp_build_context *uint_bld = &bld->uint_bld;
+ /* always use X component of address register */
+ unsigned swizzle = indirect_reg->SwizzleX;
+ LLVMValueRef base;
+ LLVMValueRef rel;
+ LLVMValueRef max_index;
+ LLVMValueRef index;
+
+ assert(bld->indirect_files & (1 << reg_file));
+
+ base = lp_build_const_int_vec(uint_bld->type, reg_index);
+
+ assert(swizzle < 4);
+ rel = LLVMBuildLoad(bld->base.builder,
+ bld->addr[indirect_reg->Index][swizzle],
+ "load addr reg");
+
+ /* for indexing we want integers */
+ rel = LLVMBuildFPToSI(bld->base.builder,
+ rel,
+ uint_bld->vec_type, "");
+
+ index = lp_build_add(uint_bld, base, rel);
+
+ max_index = lp_build_const_int_vec(uint_bld->type,
+ bld->info->file_max[reg_file]);
+
+ assert(!uint_bld->type.sign);
+ index = lp_build_min(uint_bld, index, max_index);
+
+ return index;
+}
+
+
/**
* Register fetch.
*/
@@ -389,81 +519,102 @@ static LLVMValueRef
emit_fetch(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
- unsigned index,
+ unsigned src_op,
const unsigned chan_index )
{
- const struct tgsi_full_src_register *reg = &inst->Src[index];
- unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
+ struct lp_build_context *uint_bld = &bld->uint_bld;
+ const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+ const unsigned swizzle =
+ tgsi_util_get_full_src_register_swizzle(reg, chan_index);
LLVMValueRef res;
- LLVMValueRef addr;
+ LLVMValueRef indirect_index = NULL;
+
+ if (swizzle > 3) {
+ assert(0 && "invalid swizzle in emit_fetch()");
+ return bld->base.undef;
+ }
- switch (swizzle) {
- case TGSI_SWIZZLE_X:
- case TGSI_SWIZZLE_Y:
- case TGSI_SWIZZLE_Z:
- case TGSI_SWIZZLE_W:
+ if (reg->Register.Indirect) {
+ indirect_index = get_indirect_index(bld,
+ reg->Register.File,
+ reg->Register.Index,
+ &reg->Indirect);
+ } else {
+ assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
+ }
+ switch (reg->Register.File) {
+ case TGSI_FILE_CONSTANT:
if (reg->Register.Indirect) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
- unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
- addr = LLVMBuildLoad(bld->base.builder,
- bld->addr[reg->Indirect.Index][swizzle],
- "");
- /* for indexing we want integers */
- addr = LLVMBuildFPToSI(bld->base.builder, addr,
- int_vec_type, "");
- addr = LLVMBuildExtractElement(bld->base.builder,
- addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
- "");
- addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
- }
-
- switch (reg->Register.File) {
- case TGSI_FILE_CONSTANT: {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
+ LLVMValueRef swizzle_vec =
+ lp_build_const_int_vec(uint_bld->type, swizzle);
+ LLVMValueRef index_vec; /* index into the const buffer */
+
+ /* index_vec = indirect_index * 4 + swizzle */
+ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+
+ /* Gather values from the constant buffer */
+ res = build_gather(bld, bld->consts_ptr, index_vec);
+ }
+ else {
+ LLVMValueRef index; /* index into the const buffer */
LLVMValueRef scalar, scalar_ptr;
- if (reg->Register.Indirect) {
- /*lp_build_printf(bld->base.builder,
- "\taddr = %d\n", addr);*/
- index = lp_build_add(&bld->base, index, addr);
- }
- scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
+ index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
+
+ scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
+ &index, 1, "");
scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
res = lp_build_broadcast_scalar(&bld->base, scalar);
- break;
}
+ break;
- case TGSI_FILE_IMMEDIATE:
- res = bld->immediates[reg->Register.Index][swizzle];
- assert(res);
- break;
+ case TGSI_FILE_IMMEDIATE:
+ res = bld->immediates[reg->Register.Index][swizzle];
+ assert(res);
+ break;
- case TGSI_FILE_INPUT:
- res = bld->inputs[reg->Register.Index][swizzle];
- assert(res);
- break;
+ case TGSI_FILE_INPUT:
+ res = bld->inputs[reg->Register.Index][swizzle];
+ assert(res);
+ break;
- case TGSI_FILE_TEMPORARY: {
- LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
- swizzle,
- reg->Register.Indirect,
- addr);
+ case TGSI_FILE_TEMPORARY:
+ if (reg->Register.Indirect) {
+ LLVMValueRef swizzle_vec =
+ lp_build_const_int_vec(uint_bld->type, swizzle);
+ LLVMValueRef length_vec =
+ lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
+ LLVMValueRef index_vec; /* index into the const buffer */
+ LLVMValueRef temps_array;
+ LLVMTypeRef float4_ptr_type;
+
+ /* index_vec = (indirect_index * 4 + swizzle) * length */
+ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+ index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+
+ /* cast temps_array pointer to float* */
+ float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
+ temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array,
+ float4_ptr_type, "");
+
+ /* Gather values from the temporary register array */
+ res = build_gather(bld, temps_array, index_vec);
+ }
+ else {
+ LLVMValueRef temp_ptr;
+ temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
- if(!res)
+ if (!res)
return bld->base.undef;
- break;
- }
-
- default:
- assert( 0 );
- return bld->base.undef;
}
break;
default:
- assert( 0 );
+ assert(0 && "invalid src register in emit_fetch()");
return bld->base.undef;
}
@@ -473,13 +624,10 @@ emit_fetch(
break;
case TGSI_UTIL_SIGN_SET:
- /* TODO: Use bitwese OR for floating point */
res = lp_build_abs( &bld->base, res );
- res = LLVMBuildNeg( bld->base.builder, res, "" );
- break;
-
+ /* fall through */
case TGSI_UTIL_SIGN_TOGGLE:
- res = LLVMBuildNeg( bld->base.builder, res, "" );
+ res = lp_build_negate( &bld->base, res );
break;
case TGSI_UTIL_SIGN_KEEP:
@@ -513,10 +661,77 @@ emit_fetch_deriv(
/* TODO: use interpolation coeffs for inputs */
if(ddx)
- *ddx = emit_ddx(bld, src);
+ *ddx = lp_build_ddx(&bld->base, src);
if(ddy)
- *ddy = emit_ddy(bld, src);
+ *ddy = lp_build_ddy(&bld->base, src);
+}
+
+
+/**
+ * Predicate.
+ */
+static void
+emit_fetch_predicate(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ LLVMValueRef *pred)
+{
+ unsigned index;
+ unsigned char swizzles[4];
+ LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
+ LLVMValueRef value;
+ unsigned chan;
+
+ if (!inst->Instruction.Predicate) {
+ FOR_EACH_CHANNEL( chan ) {
+ pred[chan] = NULL;
+ }
+ return;
+ }
+
+ swizzles[0] = inst->Predicate.SwizzleX;
+ swizzles[1] = inst->Predicate.SwizzleY;
+ swizzles[2] = inst->Predicate.SwizzleZ;
+ swizzles[3] = inst->Predicate.SwizzleW;
+
+ index = inst->Predicate.Index;
+ assert(index < LP_MAX_TGSI_PREDS);
+
+ FOR_EACH_CHANNEL( chan ) {
+ unsigned swizzle = swizzles[chan];
+
+ /*
+ * Only fetch the predicate register channels that are actually listed
+ * in the swizzles
+ */
+ if (!unswizzled[swizzle]) {
+ value = LLVMBuildLoad(bld->base.builder,
+ bld->preds[index][swizzle], "");
+
+ /*
+ * Convert the value to an integer mask.
+ *
+ * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
+ * is needlessly causing two comparisons due to storing the intermediate
+ * result as float vector instead of an integer mask vector.
+ */
+ value = lp_build_compare(bld->base.builder,
+ bld->base.type,
+ PIPE_FUNC_NOTEQUAL,
+ value,
+ bld->base.zero);
+ if (inst->Predicate.Negate) {
+ value = LLVMBuildNot(bld->base.builder, value, "");
+ }
+
+ unswizzled[swizzle] = value;
+ } else {
+ value = unswizzled[swizzle];
+ }
+
+ pred[chan] = value;
+ }
}
@@ -529,10 +744,11 @@ emit_store(
const struct tgsi_full_instruction *inst,
unsigned index,
unsigned chan_index,
+ LLVMValueRef pred,
LLVMValueRef value)
{
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
- LLVMValueRef addr;
+ LLVMValueRef indirect_index = NULL;
switch( inst->Instruction.Saturate ) {
case TGSI_SAT_NONE:
@@ -553,43 +769,41 @@ emit_store(
}
if (reg->Register.Indirect) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
- unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
- addr = LLVMBuildLoad(bld->base.builder,
- bld->addr[reg->Indirect.Index][swizzle],
- "");
- /* for indexing we want integers */
- addr = LLVMBuildFPToSI(bld->base.builder, addr,
- int_vec_type, "");
- addr = LLVMBuildExtractElement(bld->base.builder,
- addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
- "");
- addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ indirect_index = get_indirect_index(bld,
+ reg->Register.File,
+ reg->Register.Index,
+ &reg->Indirect);
+ } else {
+ assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
}
switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
- lp_exec_mask_store(&bld->exec_mask, value,
+ lp_exec_mask_store(&bld->exec_mask, pred, value,
bld->outputs[reg->Register.Index][chan_index]);
break;
- case TGSI_FILE_TEMPORARY: {
- LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
- chan_index,
- reg->Register.Indirect,
- addr);
- lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
+ case TGSI_FILE_TEMPORARY:
+ if (reg->Register.Indirect) {
+ /* XXX not done yet */
+ debug_printf("WARNING: LLVM scatter store of temp regs"
+ " not implemented\n");
+ }
+ else {
+ LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+ chan_index);
+ lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
+ }
break;
- }
case TGSI_FILE_ADDRESS:
- lp_exec_mask_store(&bld->exec_mask, value,
+ lp_exec_mask_store(&bld->exec_mask, pred, value,
bld->addr[reg->Indirect.Index][chan_index]);
break;
case TGSI_FILE_PREDICATE:
- /* FIXME */
- assert(0);
+ lp_exec_mask_store(&bld->exec_mask, pred, value,
+ bld->preds[reg->Register.Index][chan_index]);
break;
default:
@@ -602,21 +816,29 @@ emit_store(
* High-level instruction translators.
*/
-
static void
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
- boolean apply_lodbias,
- boolean projected,
+ enum lp_build_tex_modifier modifier,
LLVMValueRef *texel)
{
- const uint unit = inst->Src[1].Register.Index;
- LLVMValueRef lodbias;
+ unsigned unit;
+ LLVMValueRef lod_bias, explicit_lod;
LLVMValueRef oow = NULL;
LLVMValueRef coords[3];
+ LLVMValueRef ddx[3];
+ LLVMValueRef ddy[3];
unsigned num_coords;
unsigned i;
+ if (!bld->sampler) {
+ _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
+ for (i = 0; i < 4; i++) {
+ texel[i] = bld->base.undef;
+ }
+ return;
+ }
+
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_1D:
num_coords = 1;
@@ -637,29 +859,57 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
return;
}
- if(apply_lodbias)
- lodbias = emit_fetch( bld, inst, 0, 3 );
- else
- lodbias = bld->base.zero;
+ if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
+ lod_bias = emit_fetch( bld, inst, 0, 3 );
+ explicit_lod = NULL;
+ }
+ else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+ lod_bias = NULL;
+ explicit_lod = emit_fetch( bld, inst, 0, 3 );
+ }
+ else {
+ lod_bias = NULL;
+ explicit_lod = NULL;
+ }
- if (projected) {
+ if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
oow = emit_fetch( bld, inst, 0, 3 );
oow = lp_build_rcp(&bld->base, oow);
}
for (i = 0; i < num_coords; i++) {
coords[i] = emit_fetch( bld, inst, 0, i );
- if (projected)
+ if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
for (i = num_coords; i < 3; i++) {
coords[i] = bld->base.undef;
}
+ if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+ for (i = 0; i < num_coords; i++) {
+ ddx[i] = emit_fetch( bld, inst, 1, i );
+ ddy[i] = emit_fetch( bld, inst, 2, i );
+ }
+ unit = inst->Src[3].Register.Index;
+ } else {
+ for (i = 0; i < num_coords; i++) {
+ ddx[i] = lp_build_ddx( &bld->base, coords[i] );
+ ddy[i] = lp_build_ddy( &bld->base, coords[i] );
+ }
+ unit = inst->Src[1].Register.Index;
+ }
+ for (i = num_coords; i < 3; i++) {
+ ddx[i] = bld->base.undef;
+ ddy[i] = bld->base.undef;
+ }
+
bld->sampler->emit_fetch_texel(bld->sampler,
bld->base.builder,
bld->base.type,
- unit, num_coords, coords, lodbias,
+ unit, num_coords, coords,
+ ddx, ddy,
+ lod_bias, explicit_lod,
texel);
}
@@ -739,25 +989,27 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
lp_build_mask_update(bld->mask, mask);
}
-static int
+static void
emit_declaration(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_declaration *decl)
{
- LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
+ LLVMTypeRef vec_type = bld->base.vec_type;
unsigned first = decl->Range.First;
unsigned last = decl->Range.Last;
unsigned idx, i;
for (idx = first; idx <= last; ++idx) {
+ assert(last <= bld->info->file_max[decl->Declaration.File]);
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
- if (bld->has_indirect_addressing) {
- LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
- last*4 + 4, 0);
+ assert(idx < LP_MAX_TGSI_TEMPS);
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
+ last*4 + 4, 0);
bld->temps_array = lp_build_array_alloca(bld->base.builder,
- vec_type, val, "");
+ vec_type, array_size, "");
} else {
for (i = 0; i < NUM_CHANNELS; i++)
bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
@@ -772,18 +1024,24 @@ emit_declaration(
break;
case TGSI_FILE_ADDRESS:
+ assert(idx < LP_MAX_TGSI_ADDRS);
for (i = 0; i < NUM_CHANNELS; i++)
bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
vec_type, "");
break;
+ case TGSI_FILE_PREDICATE:
+ assert(idx < LP_MAX_TGSI_PREDS);
+ for (i = 0; i < NUM_CHANNELS; i++)
+ bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
default:
/* don't need to declare other vars */
break;
}
}
-
- return TRUE;
}
@@ -795,7 +1053,8 @@ static boolean
emit_instruction(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
- const struct tgsi_opcode_info *info)
+ const struct tgsi_opcode_info *info,
+ int *pc)
{
unsigned chan_index;
LLVMValueRef src0, src1, src2;
@@ -819,8 +1078,10 @@ emit_instruction(
* redundant code.
*/
+ (*pc)++;
+
assert(info->num_dst <= 1);
- if(info->num_dst) {
+ if (info->num_dst) {
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
dst0[chan_index] = bld->base.undef;
}
@@ -1359,12 +1620,11 @@ emit_instruction(
break;
case TGSI_OPCODE_TEX:
- emit_tex( bld, inst, FALSE, FALSE, dst0 );
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
break;
case TGSI_OPCODE_TXD:
- /* FIXME */
- return FALSE;
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
break;
case TGSI_OPCODE_UP2H:
@@ -1418,16 +1678,18 @@ emit_instruction(
break;
case TGSI_OPCODE_CAL:
- /* FIXME */
- return FALSE;
+ lp_exec_mask_call(&bld->exec_mask,
+ inst->Label.Label,
+ pc);
+
break;
case TGSI_OPCODE_RET:
- /* FIXME */
- return FALSE;
+ lp_exec_mask_ret(&bld->exec_mask, pc);
break;
case TGSI_OPCODE_END:
+ *pc = -1;
break;
case TGSI_OPCODE_SSG:
@@ -1466,7 +1728,7 @@ emit_instruction(
break;
case TGSI_OPCODE_TXB:
- emit_tex( bld, inst, TRUE, FALSE, dst0 );
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
break;
case TGSI_OPCODE_NRM:
@@ -1571,11 +1833,11 @@ emit_instruction(
break;
case TGSI_OPCODE_TXL:
- emit_tex( bld, inst, TRUE, FALSE, dst0 );
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
break;
case TGSI_OPCODE_TXP:
- emit_tex( bld, inst, FALSE, TRUE, dst0 );
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
break;
case TGSI_OPCODE_BRK:
@@ -1593,6 +1855,10 @@ emit_instruction(
lp_exec_bgnloop(&bld->exec_mask);
break;
+ case TGSI_OPCODE_BGNSUB:
+ lp_exec_mask_bgnsub(&bld->exec_mask);
+ break;
+
case TGSI_OPCODE_ELSE:
lp_exec_mask_cond_invert(&bld->exec_mask);
break;
@@ -1605,6 +1871,10 @@ emit_instruction(
lp_exec_endloop(&bld->exec_mask);
break;
+ case TGSI_OPCODE_ENDSUB:
+ lp_exec_mask_endsub(&bld->exec_mask, pc);
+ break;
+
case TGSI_OPCODE_PUSHA:
/* deprecated? */
assert(0);
@@ -1717,8 +1987,12 @@ emit_instruction(
}
if(info->num_dst) {
+ LLVMValueRef pred[NUM_CHANNELS];
+
+ emit_fetch_predicate( bld, inst, pred );
+
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
+ emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
}
}
@@ -1736,24 +2010,42 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const LLVMValueRef (*inputs)[NUM_CHANNELS],
LLVMValueRef (*outputs)[NUM_CHANNELS],
struct lp_build_sampler_soa *sampler,
- struct tgsi_shader_info *info)
+ const struct tgsi_shader_info *info)
{
struct lp_build_tgsi_soa_context bld;
struct tgsi_parse_context parse;
uint num_immediates = 0;
+ uint num_instructions = 0;
unsigned i;
+ int pc = 0;
+
+ struct lp_type res_type;
+
+ assert(type.length <= LP_MAX_VECTOR_LENGTH);
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = type.width;
+ res_type.length = type.length;
+ res_type.sign = 1;
/* Setup build context */
memset(&bld, 0, sizeof bld);
lp_build_context_init(&bld.base, builder, type);
+ lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type));
bld.mask = mask;
bld.pos = pos;
bld.inputs = inputs;
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.sampler = sampler;
- bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
- info->opcode_count[TGSI_OPCODE_ARL] > 0;
+ bld.info = info;
+ bld.indirect_files = info->indirect_files;
+ bld.instructions = (struct tgsi_full_instruction *)
+ MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
+ bld.max_instructions = LP_MAX_INSTRUCTIONS;
+
+ if (!bld.instructions) {
+ return;
+ }
lp_exec_mask_init(&bld.exec_mask, &bld.base);
@@ -1765,19 +2057,31 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
/* Inputs already interpolated */
- {
- if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
- _debug_printf("warning: failed to define LLVM variable\n");
- }
+ emit_declaration( &bld, &parse.FullToken.FullDeclaration );
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
{
- unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
- const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
- if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
- _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
- opcode_info->mnemonic);
+ /* save expanded instruction */
+ if (num_instructions == bld.max_instructions) {
+ struct tgsi_full_instruction *instructions;
+ instructions = REALLOC(bld.instructions,
+ bld.max_instructions
+ * sizeof(struct tgsi_full_instruction),
+ (bld.max_instructions + LP_MAX_INSTRUCTIONS)
+ * sizeof(struct tgsi_full_instruction));
+ if (!instructions) {
+ break;
+ }
+ bld.instructions = instructions;
+ bld.max_instructions += LP_MAX_INSTRUCTIONS;
+ }
+
+ memcpy(bld.instructions + num_instructions,
+ &parse.FullToken.FullInstruction,
+ sizeof(bld.instructions[0]));
+
+ num_instructions++;
}
break;
@@ -1787,7 +2091,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
{
const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
assert(size <= 4);
- assert(num_immediates < LP_MAX_IMMEDIATES);
+ assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
for( i = 0; i < size; ++i )
bld.immediates[num_immediates][i] =
lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
@@ -1804,14 +2108,33 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
assert( 0 );
}
}
+
+ while (pc != -1) {
+ struct tgsi_full_instruction *instr = bld.instructions + pc;
+ const struct tgsi_opcode_info *opcode_info =
+ tgsi_get_opcode_info(instr->Instruction.Opcode);
+ if (!emit_instruction( &bld, instr, opcode_info, &pc ))
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ opcode_info->mnemonic);
+ }
+
if (0) {
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
LLVMValueRef function = LLVMGetBasicBlockParent(block);
debug_printf("11111111111111111111111111111 \n");
tgsi_dump(tokens, 0);
- LLVMDumpValue(function);
+ lp_debug_dump_value(function);
debug_printf("2222222222222222222222222222 \n");
}
tgsi_parse_free( &parse );
+
+ if (0) {
+ LLVMModuleRef module = LLVMGetGlobalParent(
+ LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
+ LLVMDumpModule(module);
+
+ }
+
+ FREE( bld.instructions );
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c
index 796af88caad..06f1aae6dcc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c
@@ -195,6 +195,7 @@ lp_uint_type(struct lp_type type)
{
struct lp_type res_type;
+ assert(type.length <= LP_MAX_VECTOR_LENGTH);
memset(&res_type, 0, sizeof res_type);
res_type.width = type.width;
res_type.length = type.length;
@@ -211,6 +212,7 @@ lp_int_type(struct lp_type type)
{
struct lp_type res_type;
+ assert(type.length <= LP_MAX_VECTOR_LENGTH);
memset(&res_type, 0, sizeof res_type);
res_type.width = type.width;
res_type.length = type.length;
@@ -238,6 +240,131 @@ lp_wider_type(struct lp_type type)
}
+/**
+ * Return the size of the LLVMType in bits.
+ * XXX this function doesn't necessarily handle all LLVM types.
+ */
+unsigned
+lp_sizeof_llvm_type(LLVMTypeRef t)
+{
+ LLVMTypeKind k = LLVMGetTypeKind(t);
+
+ switch (k) {
+ case LLVMIntegerTypeKind:
+ return LLVMGetIntTypeWidth(t);
+ case LLVMFloatTypeKind:
+ return 8 * sizeof(float);
+ case LLVMDoubleTypeKind:
+ return 8 * sizeof(double);
+ case LLVMVectorTypeKind:
+ {
+ LLVMTypeRef elem = LLVMGetElementType(t);
+ unsigned len = LLVMGetVectorSize(t);
+ return len * lp_sizeof_llvm_type(elem);
+ }
+ break;
+ case LLVMArrayTypeKind:
+ {
+ LLVMTypeRef elem = LLVMGetElementType(t);
+ unsigned len = LLVMGetArrayLength(t);
+ return len * lp_sizeof_llvm_type(elem);
+ }
+ break;
+ default:
+ assert(0 && "Unexpected type in lp_get_llvm_type_size()");
+ return 0;
+ }
+}
+
+
+/**
+ * Return string name for a LLVMTypeKind. Useful for debugging.
+ */
+const char *
+lp_typekind_name(LLVMTypeKind t)
+{
+ switch (t) {
+ case LLVMVoidTypeKind:
+ return "LLVMVoidTypeKind";
+ case LLVMFloatTypeKind:
+ return "LLVMFloatTypeKind";
+ case LLVMDoubleTypeKind:
+ return "LLVMDoubleTypeKind";
+ case LLVMX86_FP80TypeKind:
+ return "LLVMX86_FP80TypeKind";
+ case LLVMFP128TypeKind:
+ return "LLVMFP128TypeKind";
+ case LLVMPPC_FP128TypeKind:
+ return "LLVMPPC_FP128TypeKind";
+ case LLVMLabelTypeKind:
+ return "LLVMLabelTypeKind";
+ case LLVMIntegerTypeKind:
+ return "LLVMIntegerTypeKind";
+ case LLVMFunctionTypeKind:
+ return "LLVMFunctionTypeKind";
+ case LLVMStructTypeKind:
+ return "LLVMStructTypeKind";
+ case LLVMArrayTypeKind:
+ return "LLVMArrayTypeKind";
+ case LLVMPointerTypeKind:
+ return "LLVMPointerTypeKind";
+ case LLVMOpaqueTypeKind:
+ return "LLVMOpaqueTypeKind";
+ case LLVMVectorTypeKind:
+ return "LLVMVectorTypeKind";
+ case LLVMMetadataTypeKind:
+ return "LLVMMetadataTypeKind";
+ /* Only in LLVM 2.7 and later???
+ case LLVMUnionTypeKind:
+ return "LLVMUnionTypeKind";
+ */
+ default:
+ return "unknown LLVMTypeKind";
+ }
+}
+
+
+/**
+ * Print an LLVMTypeRef. Like LLVMDumpValue(). For debugging.
+ */
+void
+lp_dump_llvmtype(LLVMTypeRef t)
+{
+ LLVMTypeKind k = LLVMGetTypeKind(t);
+
+ if (k == LLVMVectorTypeKind) {
+ LLVMTypeRef te = LLVMGetElementType(t);
+ LLVMTypeKind ke = LLVMGetTypeKind(te);
+ unsigned len = LLVMGetVectorSize(t);
+ if (ke == LLVMIntegerTypeKind) {
+ unsigned b = LLVMGetIntTypeWidth(te);
+ debug_printf("Vector [%u] of %u-bit Integer\n", len, b);
+ }
+ else {
+ debug_printf("Vector [%u] of %s\n", len, lp_typekind_name(ke));
+ }
+ }
+ else if (k == LLVMArrayTypeKind) {
+ LLVMTypeRef te = LLVMGetElementType(t);
+ LLVMTypeKind ke = LLVMGetTypeKind(te);
+ unsigned len = LLVMGetArrayLength(t);
+ debug_printf("Array [%u] of %s\n", len, lp_typekind_name(ke));
+ }
+ else if (k == LLVMIntegerTypeKind) {
+ unsigned b = LLVMGetIntTypeWidth(t);
+ debug_printf("%u-bit Integer\n", b);
+ }
+ else if (k == LLVMPointerTypeKind) {
+ LLVMTypeRef te = LLVMGetElementType(t);
+ debug_printf("Pointer to ");
+ lp_dump_llvmtype(te);
+ }
+ else {
+ debug_printf("%s\n", lp_typekind_name(k));
+ }
+}
+
+
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
@@ -245,7 +372,23 @@ lp_build_context_init(struct lp_build_context *bld,
{
bld->builder = builder;
bld->type = type;
- bld->undef = lp_build_undef(type);
- bld->zero = lp_build_zero(type);
+
+ bld->int_elem_type = lp_build_int_elem_type(type);
+ if (type.floating)
+ bld->elem_type = lp_build_elem_type(type);
+ else
+ bld->elem_type = bld->int_elem_type;
+
+ if (type.length == 1) {
+ bld->int_vec_type = bld->int_elem_type;
+ bld->vec_type = bld->elem_type;
+ }
+ else {
+ bld->int_vec_type = LLVMVectorType(bld->int_elem_type, type.length);
+ bld->vec_type = LLVMVectorType(bld->elem_type, type.length);
+ }
+
+ bld->undef = LLVMGetUndef(bld->vec_type);
+ bld->zero = LLVMConstNull(bld->vec_type);
bld->one = lp_build_one(type);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index cd59d2faa66..fec1d3dfbc6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -128,6 +128,18 @@ struct lp_build_context
*/
struct lp_type type;
+ /** Same as lp_build_elem_type(type) */
+ LLVMTypeRef elem_type;
+
+ /** Same as lp_build_vec_type(type) */
+ LLVMTypeRef vec_type;
+
+ /** Same as lp_build_int_elem_type(type) */
+ LLVMTypeRef int_elem_type;
+
+ /** Same as lp_build_int_vec_type(type) */
+ LLVMTypeRef int_vec_type;
+
/** Same as lp_build_undef(type) */
LLVMValueRef undef;
@@ -304,6 +316,54 @@ LLVMTypeRef
lp_build_int32_vec4_type(void);
+static INLINE struct lp_type
+lp_float32_vec4_type(void)
+{
+ struct lp_type type;
+
+ memset(&type, 0, sizeof(type));
+ type.floating = TRUE;
+ type.sign = TRUE;
+ type.norm = FALSE;
+ type.width = 32;
+ type.length = 4;
+
+ return type;
+}
+
+
+static INLINE struct lp_type
+lp_int32_vec4_type(void)
+{
+ struct lp_type type;
+
+ memset(&type, 0, sizeof(type));
+ type.floating = FALSE;
+ type.sign = TRUE;
+ type.norm = FALSE;
+ type.width = 32;
+ type.length = 4;
+
+ return type;
+}
+
+
+static INLINE struct lp_type
+lp_unorm8_vec4_type(void)
+{
+ struct lp_type type;
+
+ memset(&type, 0, sizeof(type));
+ type.floating = FALSE;
+ type.sign = FALSE;
+ type.norm = TRUE;
+ type.width = 8;
+ type.length = 4;
+
+ return type;
+}
+
+
struct lp_type
lp_uint_type(struct lp_type type);
@@ -316,6 +376,18 @@ struct lp_type
lp_wider_type(struct lp_type type);
+unsigned
+lp_sizeof_llvm_type(LLVMTypeRef t);
+
+
+const char *
+lp_typekind_name(LLVMTypeKind t);
+
+
+void
+lp_dump_llvmtype(LLVMTypeRef t);
+
+
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
diff --git a/src/gallium/auxiliary/indices/.gitignore b/src/gallium/auxiliary/indices/.gitignore
new file mode 100644
index 00000000000..73740071451
--- /dev/null
+++ b/src/gallium/auxiliary/indices/.gitignore
@@ -0,0 +1,2 @@
+u_indices_gen.c
+u_unfilled_gen.c
diff --git a/src/gallium/auxiliary/os/os_stream.c b/src/gallium/auxiliary/os/os_stream.c
new file mode 100644
index 00000000000..3c55fc00d92
--- /dev/null
+++ b/src/gallium/auxiliary/os/os_stream.c
@@ -0,0 +1,58 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_config.h"
+
+#include "os_stream.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+
+int
+os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+ char buf[1024];
+ int retval;
+ va_list ap2;
+ va_copy(ap2, ap);
+ retval = util_vsnprintf(buf, sizeof(buf), format, ap2);
+ va_end(ap2);
+ if(retval <= 0)
+ {}
+ else if(retval < sizeof(buf))
+ stream->write(stream, buf, retval);
+ else
+ {
+ char* str = MALLOC(retval + 1);
+ if(!str)
+ return -1;
+ retval = util_vsnprintf(str, retval + 1, format, ap);
+ if(retval > 0)
+ stream->write(stream, str, retval);
+ FREE(str);
+ }
+
+ return retval;
+}
diff --git a/src/gallium/auxiliary/os/os_stream.h b/src/gallium/auxiliary/os/os_stream.h
index 693a0621e2d..6c6050bb028 100644
--- a/src/gallium/auxiliary/os/os_stream.h
+++ b/src/gallium/auxiliary/os/os_stream.h
@@ -50,6 +50,9 @@ struct os_stream
void
(*flush)(struct os_stream *stream);
+
+ int
+ (*vprintf)(struct os_stream *stream, const char* format, va_list ap);
};
@@ -90,6 +93,27 @@ os_stream_flush(struct os_stream *stream)
stream->flush(stream);
}
+int
+os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap);
+
+static INLINE int
+os_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+ return stream->vprintf(stream, format, ap);
+}
+
+static INLINE int
+os_stream_printf (struct os_stream* stream, const char *format, ...)
+{
+ int retval;
+ va_list args;
+
+ va_start (args, format);
+ retval = stream->vprintf(stream, format, args);
+ va_end (args);
+
+ return retval;
+}
struct os_stream *
os_file_stream_create(const char *filename);
@@ -118,5 +142,4 @@ os_str_stream_get_and_close(struct os_stream *stream);
#define os_file_stream_create(_filename) os_null_stream_create()
#endif
-
#endif /* _OS_STREAM_H_ */
diff --git a/src/gallium/auxiliary/os/os_stream_log.c b/src/gallium/auxiliary/os/os_stream_log.c
index 7cc2028a22c..b01377c3468 100644
--- a/src/gallium/auxiliary/os/os_stream_log.c
+++ b/src/gallium/auxiliary/os/os_stream_log.c
@@ -73,7 +73,8 @@ static struct os_stream
os_log_stream_struct = {
&os_log_stream_close,
&os_log_stream_write,
- &os_log_stream_flush
+ &os_log_stream_flush,
+ &os_default_stream_vprintf,
};
diff --git a/src/gallium/auxiliary/os/os_stream_null.c b/src/gallium/auxiliary/os/os_stream_null.c
index 128c4e8f0e0..a549a789e62 100644
--- a/src/gallium/auxiliary/os/os_stream_null.c
+++ b/src/gallium/auxiliary/os/os_stream_null.c
@@ -56,12 +56,18 @@ os_null_stream_flush(struct os_stream *stream)
(void)stream;
}
+static int
+os_null_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+ return 0;
+}
static struct os_stream
os_null_stream = {
&os_null_stream_close,
&os_null_stream_write,
- &os_null_stream_flush
+ &os_null_stream_flush,
+ &os_null_stream_vprintf
};
diff --git a/src/gallium/auxiliary/os/os_stream_stdc.c b/src/gallium/auxiliary/os/os_stream_stdc.c
index 9e7ed711076..37e7d063e2b 100644
--- a/src/gallium/auxiliary/os/os_stream_stdc.c
+++ b/src/gallium/auxiliary/os/os_stream_stdc.c
@@ -83,6 +83,14 @@ os_stdc_stream_flush(struct os_stream *_stream)
fflush(stream->file);
}
+static int
+os_stdc_stream_vprintf (struct os_stream* _stream, const char *format, va_list ap)
+{
+ struct os_stdc_stream *stream = os_stdc_stream(_stream);
+
+ return vfprintf(stream->file, format, ap);
+}
+
struct os_stream *
os_file_stream_create(const char *filename)
@@ -96,6 +104,7 @@ os_file_stream_create(const char *filename)
stream->base.close = &os_stdc_stream_close;
stream->base.write = &os_stdc_stream_write;
stream->base.flush = &os_stdc_stream_flush;
+ stream->base.vprintf = &os_stdc_stream_vprintf;
stream->file = fopen(filename, "w");
if(!stream->file)
diff --git a/src/gallium/auxiliary/os/os_stream_str.c b/src/gallium/auxiliary/os/os_stream_str.c
index b5c7270d2ae..be9478b2a17 100644
--- a/src/gallium/auxiliary/os/os_stream_str.c
+++ b/src/gallium/auxiliary/os/os_stream_str.c
@@ -118,6 +118,7 @@ os_str_stream_create(size_t size)
stream->base.close = &os_str_stream_close;
stream->base.write = &os_str_stream_write;
stream->base.flush = &os_str_stream_flush;
+ stream->base.vprintf = &os_default_stream_vprintf;
stream->str = os_malloc(size);
if(!stream->str)
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index c09e8a7a76f..a084310d4ff 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -40,12 +40,11 @@
#include "util/u_debug.h" /* for assert */
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) || defined(PIPE_OS_CYGWIN)
#include <pthread.h> /* POSIX threads headers */
#include <stdio.h> /* for perror() */
-#define PIPE_THREAD_HAVE_CONDVAR
/* pipe_thread
*/
@@ -168,19 +167,59 @@ typedef CRITICAL_SECTION pipe_mutex;
#define pipe_mutex_unlock(mutex) \
LeaveCriticalSection(&mutex)
+/* TODO: Need a macro to declare "I don't care about WinXP compatibilty" */
+#if 0 && defined (_WIN32_WINNT) && (_WIN32_WINNT >= 0x0600)
+/* CONDITION_VARIABLE is only available on newer versions of Windows
+ * (Server 2008/Vista or later).
+ * http://msdn.microsoft.com/en-us/library/ms682052(VS.85).aspx
+ *
+ * pipe_condvar
+ */
+typedef CONDITION_VARIABLE pipe_condvar;
+
+#define pipe_static_condvar(cond) \
+ /*static*/ pipe_condvar cond = CONDITION_VARIABLE_INIT
+
+#define pipe_condvar_init(cond) \
+ InitializeConditionVariable(&(cond))
+
+#define pipe_condvar_destroy(cond) \
+ (void) cond /* nothing to do */
+
+#define pipe_condvar_wait(cond, mutex) \
+ SleepConditionVariableCS(&(cond), &(mutex), INFINITE)
+
+#define pipe_condvar_signal(cond) \
+ WakeConditionVariable(&(cond))
+
+#define pipe_condvar_broadcast(cond) \
+ WakeAllConditionVariable(&(cond))
+
+#else /* need compatibility with pre-Vista Win32 */
/* pipe_condvar (XXX FIX THIS)
+ * See http://www.cs.wustl.edu/~schmidt/win32-cv-1.html
+ * for potential pitfalls in implementation.
*/
-typedef unsigned pipe_condvar;
+typedef DWORD pipe_condvar;
+
+#define pipe_static_condvar(cond) \
+ /*static*/ pipe_condvar cond = 1
#define pipe_condvar_init(cond) \
- (void) cond
+ (void) (cond = 1)
#define pipe_condvar_destroy(cond) \
(void) cond
+/* Poor man's pthread_cond_wait():
+ Just release the mutex and sleep for one millisecond.
+ The caller's while() loop does all the work. */
#define pipe_condvar_wait(cond, mutex) \
- (void) cond; (void) mutex
+ do { pipe_mutex_unlock(mutex); \
+ Sleep(cond); \
+ pipe_mutex_lock(mutex); \
+ } while (0)
#define pipe_condvar_signal(cond) \
(void) cond
@@ -188,9 +227,12 @@ typedef unsigned pipe_condvar;
#define pipe_condvar_broadcast(cond) \
(void) cond
+#endif /* pre-Vista win32 */
#else
+#include "os/os_time.h"
+
/** Dummy definitions */
typedef unsigned pipe_thread;
@@ -214,7 +256,6 @@ static INLINE int pipe_thread_destroy( pipe_thread thread )
}
typedef unsigned pipe_mutex;
-typedef unsigned pipe_condvar;
#define pipe_static_mutex(mutex) \
static pipe_mutex mutex = 0
@@ -231,17 +272,25 @@ typedef unsigned pipe_condvar;
#define pipe_mutex_unlock(mutex) \
(void) mutex
+typedef int64_t pipe_condvar;
+
#define pipe_static_condvar(condvar) \
- static unsigned condvar = 0
+ static pipe_condvar condvar = 1000
#define pipe_condvar_init(condvar) \
- (void) condvar
+ (void) (condvar = 1000)
#define pipe_condvar_destroy(condvar) \
(void) condvar
+/* Poor man's pthread_cond_wait():
+ Just release the mutex and sleep for one millisecond.
+ The caller's while() loop does all the work. */
#define pipe_condvar_wait(condvar, mutex) \
- (void) condvar
+ do { pipe_mutex_unlock(mutex); \
+ os_time_sleep(condvar); \
+ pipe_mutex_lock(mutex); \
+ } while (0)
#define pipe_condvar_signal(condvar) \
(void) condvar
@@ -277,27 +326,7 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
}
-#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
-
-/* XXX FIX THIS */
-typedef unsigned pipe_barrier;
-
-static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
-{
- /* XXX we could implement barriers with a mutex and condition var */
-}
-
-static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
-{
-}
-
-static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
-{
- assert(0);
-}
-
-
-#else
+#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */
typedef struct {
unsigned count;
@@ -405,7 +434,7 @@ pipe_semaphore_wait(pipe_semaphore *sema)
*/
typedef struct {
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) || defined(PIPE_OS_CYGWIN)
pthread_key_t key;
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
DWORD key;
@@ -420,7 +449,7 @@ typedef struct {
static INLINE void
pipe_tsd_init(pipe_tsd *tsd)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) || defined(PIPE_OS_CYGWIN)
if (pthread_key_create(&tsd->key, NULL/*free*/) != 0) {
perror("pthread_key_create(): failed to allocate key for thread specific data");
exit(-1);
@@ -437,7 +466,7 @@ pipe_tsd_get(pipe_tsd *tsd)
if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
pipe_tsd_init(tsd);
}
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) || defined(PIPE_OS_CYGWIN)
return pthread_getspecific(tsd->key);
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
assert(0);
@@ -454,7 +483,7 @@ pipe_tsd_set(pipe_tsd *tsd, void *value)
if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
pipe_tsd_init(tsd);
}
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) || defined(PIPE_OS_CYGWIN)
if (pthread_setspecific(tsd->key, value) != 0) {
perror("pthread_set_specific() failed");
exit(-1);
diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
index 6259142bec0..84907215fe6 100644
--- a/src/gallium/auxiliary/os/os_time.c
+++ b/src/gallium/auxiliary/os/os_time.c
@@ -37,7 +37,7 @@
#if !defined(PIPE_OS_EMBEDDED)
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_CYGWIN)
# include <sys/time.h> /* timeval */
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
# include <windows.h>
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index a6c50dcf0c1..5a13f39849f 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -130,7 +130,7 @@ struct pb_vtbl
* flags is bitmask of PB_USAGE_CPU_READ/WRITE.
*/
void *(*map)( struct pb_buffer *buf,
- unsigned flags );
+ unsigned flags, void *flush_ctx );
void (*unmap)( struct pb_buffer *buf );
@@ -164,13 +164,13 @@ struct pb_vtbl
*/
static INLINE void *
pb_map(struct pb_buffer *buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
assert(buf);
if(!buf)
return NULL;
assert(pipe_is_referenced(&buf->base.reference));
- return buf->vtbl->map(buf, flags);
+ return buf->vtbl->map(buf, flags, flush_ctx);
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index d6cf6405825..c310f28f51f 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -624,7 +624,7 @@ fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf)
assert(fenced_buf->data);
assert(fenced_buf->buffer);
- map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE);
+ map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE, NULL);
if(!map)
return PIPE_ERROR;
@@ -644,7 +644,7 @@ fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf)
assert(fenced_buf->data);
assert(fenced_buf->buffer);
- map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ);
+ map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ, NULL);
if(!map)
return PIPE_ERROR;
@@ -674,7 +674,7 @@ fenced_buffer_destroy(struct pb_buffer *buf)
static void *
fenced_buffer_map(struct pb_buffer *buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_manager *fenced_mgr = fenced_buf->mgr;
@@ -712,7 +712,7 @@ fenced_buffer_map(struct pb_buffer *buf,
}
if(fenced_buf->buffer) {
- map = pb_map(fenced_buf->buffer, flags);
+ map = pb_map(fenced_buf->buffer, flags, flush_ctx);
}
else {
assert(fenced_buf->data);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index b706f429be5..c2322eed19b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -70,7 +70,8 @@ malloc_buffer_destroy(struct pb_buffer *buf)
static void *
malloc_buffer_map(struct pb_buffer *buf,
- unsigned flags)
+ unsigned flags,
+ void *flush_ctx)
{
return malloc_buffer(buf)->data;
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index cec2524da2b..2ef02160f23 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -50,8 +50,7 @@
#define PB_BUFMGR_H_
-#include "pipe/p_compiler.h"
-#include "pipe/p_defines.h"
+#include "pb_buffer.h"
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 88501e8d72d..a6eb4039621 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -167,10 +167,10 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
static void *
pb_cache_buffer_map(struct pb_buffer *_buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
- return pb_map(buf->buffer, flags);
+ return pb_map(buf->buffer, flags, flush_ctx);
}
@@ -222,7 +222,7 @@ pb_cache_buffer_vtbl = {
};
-static INLINE boolean
+static INLINE int
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
pb_size size,
const struct pb_desc *desc)
@@ -230,26 +230,26 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
void *map;
if(buf->base.base.size < size)
- return FALSE;
+ return 0;
/* be lenient with size */
if(buf->base.base.size >= 2*size)
- return FALSE;
+ return 0;
if(!pb_check_alignment(desc->alignment, buf->base.base.alignment))
- return FALSE;
+ return 0;
if(!pb_check_usage(desc->usage, buf->base.base.usage))
- return FALSE;
+ return 0;
- map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK);
+ map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
if (!map) {
- return FALSE;
+ return -1;
}
pb_unmap(buf->buffer);
- return TRUE;
+ return 1;
}
@@ -263,7 +263,8 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
struct pb_cache_buffer *curr_buf;
struct list_head *curr, *next;
int64_t now;
-
+ int ret = 0;
+
pipe_mutex_lock(mgr->mutex);
buf = NULL;
@@ -274,25 +275,30 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
now = os_time_get();
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
- if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc))
- buf = curr_buf;
+ if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0))
+ buf = curr_buf;
else if(os_time_timeout(curr_buf->start, curr_buf->end, now))
- _pb_cache_buffer_destroy(curr_buf);
+ _pb_cache_buffer_destroy(curr_buf);
else
/* This buffer (and all hereafter) are still hot in cache */
break;
+ if (ret == -1)
+ break;
curr = next;
next = curr->next;
}
/* keep searching in the hot buffers */
- if(!buf) {
+ if(!buf && ret != -1) {
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
- if(pb_cache_is_buffer_compat(curr_buf, size, desc)) {
+ ret = pb_cache_is_buffer_compat(curr_buf, size, desc);
+ if (ret > 0) {
buf = curr_buf;
break;
}
+ if (ret == -1)
+ break;
/* no need to check the timeout here */
curr = next;
next = curr->next;
@@ -301,6 +307,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
if(buf) {
LIST_DEL(&buf->head);
+ --mgr->numDelayed;
pipe_mutex_unlock(mgr->mutex);
/* Increase refcount */
pipe_reference_init(&buf->base.base.reference, 1);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 0dc5b31a754..7604e75af8d 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -158,7 +158,7 @@ pb_debug_buffer_fill(struct pb_debug_buffer *buf)
{
uint8_t *map;
- map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE);
+ map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE, NULL);
assert(map);
if(map) {
fill_random_pattern(map, buf->underflow_size);
@@ -181,7 +181,7 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf)
map = pb_map(buf->buffer,
PB_USAGE_CPU_READ |
- PB_USAGE_UNSYNCHRONIZED);
+ PB_USAGE_UNSYNCHRONIZED, NULL);
assert(map);
if(map) {
boolean underflow, overflow;
@@ -247,14 +247,14 @@ pb_debug_buffer_destroy(struct pb_buffer *_buf)
static void *
pb_debug_buffer_map(struct pb_buffer *_buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
void *map;
pb_debug_buffer_check(buf);
- map = pb_map(buf->buffer, flags);
+ map = pb_map(buf->buffer, flags, flush_ctx);
if(!map)
return NULL;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index faf7c352674..88da786216a 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -108,11 +108,14 @@ mm_buffer_destroy(struct pb_buffer *buf)
static void *
mm_buffer_map(struct pb_buffer *buf,
- unsigned flags)
+ unsigned flags,
+ void *flush_ctx)
{
struct mm_buffer *mm_buf = mm_buffer(buf);
struct mm_pb_manager *mm = mm_buf->mgr;
+ /* XXX: it will be necessary to remap here to propagate flush_ctx */
+
return (unsigned char *) mm->map + mm_buf->block->ofs;
}
@@ -269,7 +272,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
mm->map = pb_map(mm->buffer,
PB_USAGE_CPU_READ |
- PB_USAGE_CPU_WRITE);
+ PB_USAGE_CPU_WRITE, NULL);
if(!mm->map)
goto failure;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
index 31f1ebbeb7c..694a092f3c2 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
@@ -103,13 +103,13 @@ pb_ondemand_buffer_destroy(struct pb_buffer *_buf)
static void *
pb_ondemand_buffer_map(struct pb_buffer *_buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
if(buf->buffer) {
assert(!buf->data);
- return pb_map(buf->buffer, flags);
+ return pb_map(buf->buffer, flags, flush_ctx);
}
else {
assert(buf->data);
@@ -150,7 +150,7 @@ pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf)
if(!buf->buffer)
return PIPE_ERROR_OUT_OF_MEMORY;
- map = pb_map(buf->buffer, PB_USAGE_CPU_READ);
+ map = pb_map(buf->buffer, PB_USAGE_CPU_READ, NULL);
if(!map) {
pb_reference(&buf->buffer, NULL);
return PIPE_ERROR;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
index fdcce428784..2f7c7389ff4 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
@@ -118,12 +118,14 @@ pool_buffer_destroy(struct pb_buffer *buf)
static void *
-pool_buffer_map(struct pb_buffer *buf, unsigned flags)
+pool_buffer_map(struct pb_buffer *buf, unsigned flags, void *flush_ctx)
{
struct pool_buffer *pool_buf = pool_buffer(buf);
struct pool_pb_manager *pool = pool_buf->mgr;
void *map;
+ /* XXX: it will be necessary to remap here to propagate flush_ctx */
+
pipe_mutex_lock(pool->mutex);
map = (unsigned char *) pool->map + pool_buf->start;
pipe_mutex_unlock(pool->mutex);
@@ -285,7 +287,7 @@ pool_bufmgr_create(struct pb_manager *provider,
pool->map = pb_map(pool->buffer,
PB_USAGE_CPU_READ |
- PB_USAGE_CPU_WRITE);
+ PB_USAGE_CPU_WRITE, NULL);
if(!pool->map)
goto failure;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 7a3305aaf37..176f9aa38aa 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -227,10 +227,13 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf)
static void *
pb_slab_buffer_map(struct pb_buffer *_buf,
- unsigned flags)
+ unsigned flags,
+ void *flush_ctx)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+ /* XXX: it will be necessary to remap here to propagate flush_ctx */
+
++buf->mapCount;
return (void *) ((uint8_t *) buf->slab->virtual + buf->start);
}
@@ -316,7 +319,7 @@ pb_slab_create(struct pb_slab_manager *mgr)
* through this address so it is required that the buffer is pinned. */
slab->virtual = pb_map(slab->bo,
PB_USAGE_CPU_READ |
- PB_USAGE_CPU_WRITE);
+ PB_USAGE_CPU_WRITE, NULL);
if(!slab->virtual) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out_err1;
diff --git a/src/gallium/auxiliary/rbug/README b/src/gallium/auxiliary/rbug/README
index d984067893c..c5156438a1b 100644
--- a/src/gallium/auxiliary/rbug/README
+++ b/src/gallium/auxiliary/rbug/README
@@ -10,7 +10,7 @@ The code currently uses tcp and ip4v for connections.
Information about driver integration can be found in:
-src/gallium/drivers/trace/README
+src/gallium/drivers/rbug/README
for information about applications look in:
diff --git a/src/gallium/auxiliary/rbug/rbug_context.c b/src/gallium/auxiliary/rbug/rbug_context.c
index 1832425658f..a3fd7e8430e 100644
--- a/src/gallium/auxiliary/rbug/rbug_context.c
+++ b/src/gallium/auxiliary/rbug/rbug_context.c
@@ -480,7 +480,7 @@ struct rbug_proto_context_list * rbug_demarshal_context_list(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST)
return NULL;
pos = 0;
@@ -506,7 +506,7 @@ struct rbug_proto_context_info * rbug_demarshal_context_info(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO)
return NULL;
pos = 0;
@@ -533,7 +533,7 @@ struct rbug_proto_context_draw_block * rbug_demarshal_context_draw_block(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCK)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCK)
return NULL;
pos = 0;
@@ -561,7 +561,7 @@ struct rbug_proto_context_draw_step * rbug_demarshal_context_draw_step(struct rb
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_STEP)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_STEP)
return NULL;
pos = 0;
@@ -589,7 +589,7 @@ struct rbug_proto_context_draw_unblock * rbug_demarshal_context_draw_unblock(str
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK)
return NULL;
pos = 0;
@@ -617,7 +617,7 @@ struct rbug_proto_context_draw_rule * rbug_demarshal_context_draw_rule(struct rb
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_RULE)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_RULE)
return NULL;
pos = 0;
@@ -649,7 +649,7 @@ struct rbug_proto_context_flush * rbug_demarshal_context_flush(struct rbug_proto
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_FLUSH)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_FLUSH)
return NULL;
pos = 0;
@@ -677,7 +677,7 @@ struct rbug_proto_context_list_reply * rbug_demarshal_context_list_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST_REPLY)
return NULL;
pos = 0;
@@ -705,7 +705,7 @@ struct rbug_proto_context_info_reply * rbug_demarshal_context_info_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO_REPLY)
return NULL;
pos = 0;
@@ -739,7 +739,7 @@ struct rbug_proto_context_draw_blocked * rbug_demarshal_context_draw_blocked(str
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCKED)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCKED)
return NULL;
pos = 0;
diff --git a/src/gallium/auxiliary/rbug/rbug_core.c b/src/gallium/auxiliary/rbug/rbug_core.c
index 876ae5a0ce6..1d47d13c9f3 100644
--- a/src/gallium/auxiliary/rbug/rbug_core.c
+++ b/src/gallium/auxiliary/rbug/rbug_core.c
@@ -233,7 +233,7 @@ struct rbug_proto_noop * rbug_demarshal_noop(struct rbug_proto_header *header)
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_NOOP)
+ if (header->opcode != (int32_t)RBUG_OP_NOOP)
return NULL;
pos = 0;
@@ -259,7 +259,7 @@ struct rbug_proto_ping * rbug_demarshal_ping(struct rbug_proto_header *header)
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_PING)
+ if (header->opcode != (int32_t)RBUG_OP_PING)
return NULL;
pos = 0;
@@ -285,7 +285,7 @@ struct rbug_proto_error * rbug_demarshal_error(struct rbug_proto_header *header)
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_ERROR)
+ if (header->opcode != (int32_t)RBUG_OP_ERROR)
return NULL;
pos = 0;
@@ -312,7 +312,7 @@ struct rbug_proto_ping_reply * rbug_demarshal_ping_reply(struct rbug_proto_heade
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_PING_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_PING_REPLY)
return NULL;
pos = 0;
@@ -339,7 +339,7 @@ struct rbug_proto_error_reply * rbug_demarshal_error_reply(struct rbug_proto_hea
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_ERROR_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_ERROR_REPLY)
return NULL;
pos = 0;
diff --git a/src/gallium/auxiliary/rbug/rbug_demarshal.c b/src/gallium/auxiliary/rbug/rbug_demarshal.c
index 47390fbcee7..06caa45469d 100644
--- a/src/gallium/auxiliary/rbug/rbug_demarshal.c
+++ b/src/gallium/auxiliary/rbug/rbug_demarshal.c
@@ -91,3 +91,67 @@ struct rbug_header * rbug_demarshal(struct rbug_proto_header *header)
return NULL;
}
}
+
+const char* rbug_proto_get_name(enum rbug_opcode opcode)
+{
+ switch(opcode) {
+ case RBUG_OP_NOOP:
+ return "RBUG_OP_NOOP";
+ case RBUG_OP_PING:
+ return "RBUG_OP_PING";
+ case RBUG_OP_ERROR:
+ return "RBUG_OP_ERROR";
+ case RBUG_OP_PING_REPLY:
+ return "RBUG_OP_PING_REPLY";
+ case RBUG_OP_ERROR_REPLY:
+ return "RBUG_OP_ERROR_REPLY";
+ case RBUG_OP_TEXTURE_LIST:
+ return "RBUG_OP_TEXTURE_LIST";
+ case RBUG_OP_TEXTURE_INFO:
+ return "RBUG_OP_TEXTURE_INFO";
+ case RBUG_OP_TEXTURE_WRITE:
+ return "RBUG_OP_TEXTURE_WRITE";
+ case RBUG_OP_TEXTURE_READ:
+ return "RBUG_OP_TEXTURE_READ";
+ case RBUG_OP_TEXTURE_LIST_REPLY:
+ return "RBUG_OP_TEXTURE_LIST_REPLY";
+ case RBUG_OP_TEXTURE_INFO_REPLY:
+ return "RBUG_OP_TEXTURE_INFO_REPLY";
+ case RBUG_OP_TEXTURE_READ_REPLY:
+ return "RBUG_OP_TEXTURE_READ_REPLY";
+ case RBUG_OP_CONTEXT_LIST:
+ return "RBUG_OP_CONTEXT_LIST";
+ case RBUG_OP_CONTEXT_INFO:
+ return "RBUG_OP_CONTEXT_INFO";
+ case RBUG_OP_CONTEXT_DRAW_BLOCK:
+ return "RBUG_OP_CONTEXT_DRAW_BLOCK";
+ case RBUG_OP_CONTEXT_DRAW_STEP:
+ return "RBUG_OP_CONTEXT_DRAW_STEP";
+ case RBUG_OP_CONTEXT_DRAW_UNBLOCK:
+ return "RBUG_OP_CONTEXT_DRAW_UNBLOCK";
+ case RBUG_OP_CONTEXT_DRAW_RULE:
+ return "RBUG_OP_CONTEXT_DRAW_RULE";
+ case RBUG_OP_CONTEXT_FLUSH:
+ return "RBUG_OP_CONTEXT_FLUSH";
+ case RBUG_OP_CONTEXT_LIST_REPLY:
+ return "RBUG_OP_CONTEXT_LIST_REPLY";
+ case RBUG_OP_CONTEXT_INFO_REPLY:
+ return "RBUG_OP_CONTEXT_INFO_REPLY";
+ case RBUG_OP_CONTEXT_DRAW_BLOCKED:
+ return "RBUG_OP_CONTEXT_DRAW_BLOCKED";
+ case RBUG_OP_SHADER_LIST:
+ return "RBUG_OP_SHADER_LIST";
+ case RBUG_OP_SHADER_INFO:
+ return "RBUG_OP_SHADER_INFO";
+ case RBUG_OP_SHADER_DISABLE:
+ return "RBUG_OP_SHADER_DISABLE";
+ case RBUG_OP_SHADER_REPLACE:
+ return "RBUG_OP_SHADER_REPLACE";
+ case RBUG_OP_SHADER_LIST_REPLY:
+ return "RBUG_OP_SHADER_LIST_REPLY";
+ case RBUG_OP_SHADER_INFO_REPLY:
+ return "RBUG_OP_SHADER_INFO_REPLY";
+ default:
+ return NULL;
+ }
+}
diff --git a/src/gallium/auxiliary/rbug/rbug_proto.h b/src/gallium/auxiliary/rbug/rbug_proto.h
index 4f3eb75dc4d..2fce725bc9e 100644
--- a/src/gallium/auxiliary/rbug/rbug_proto.h
+++ b/src/gallium/auxiliary/rbug/rbug_proto.h
@@ -91,4 +91,9 @@ struct rbug_proto_header
*/
struct rbug_connection;
+/**
+ * Get printable string for opcode.
+ */
+const char* rbug_proto_get_name(enum rbug_opcode opcode);
+
#endif
diff --git a/src/gallium/auxiliary/rbug/rbug_shader.c b/src/gallium/auxiliary/rbug/rbug_shader.c
index fccd2f55efd..1742941cc17 100644
--- a/src/gallium/auxiliary/rbug/rbug_shader.c
+++ b/src/gallium/auxiliary/rbug/rbug_shader.c
@@ -305,7 +305,7 @@ struct rbug_proto_shader_list * rbug_demarshal_shader_list(struct rbug_proto_hea
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST)
return NULL;
pos = 0;
@@ -332,7 +332,7 @@ struct rbug_proto_shader_info * rbug_demarshal_shader_info(struct rbug_proto_hea
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO)
return NULL;
pos = 0;
@@ -360,7 +360,7 @@ struct rbug_proto_shader_disable * rbug_demarshal_shader_disable(struct rbug_pro
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_DISABLE)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_DISABLE)
return NULL;
pos = 0;
@@ -389,7 +389,7 @@ struct rbug_proto_shader_replace * rbug_demarshal_shader_replace(struct rbug_pro
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_REPLACE)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_REPLACE)
return NULL;
pos = 0;
@@ -418,7 +418,7 @@ struct rbug_proto_shader_list_reply * rbug_demarshal_shader_list_reply(struct rb
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST_REPLY)
return NULL;
pos = 0;
@@ -446,7 +446,7 @@ struct rbug_proto_shader_info_reply * rbug_demarshal_shader_info_reply(struct rb
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO_REPLY)
return NULL;
pos = 0;
diff --git a/src/gallium/auxiliary/rbug/rbug_texture.c b/src/gallium/auxiliary/rbug/rbug_texture.c
index 5a918fe6bc0..2ad577915e8 100644
--- a/src/gallium/auxiliary/rbug/rbug_texture.c
+++ b/src/gallium/auxiliary/rbug/rbug_texture.c
@@ -417,7 +417,7 @@ struct rbug_proto_texture_list * rbug_demarshal_texture_list(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST)
return NULL;
pos = 0;
@@ -443,7 +443,7 @@ struct rbug_proto_texture_info * rbug_demarshal_texture_info(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO)
return NULL;
pos = 0;
@@ -470,7 +470,7 @@ struct rbug_proto_texture_write * rbug_demarshal_texture_write(struct rbug_proto
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_WRITE)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_WRITE)
return NULL;
pos = 0;
@@ -506,7 +506,7 @@ struct rbug_proto_texture_read * rbug_demarshal_texture_read(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ)
return NULL;
pos = 0;
@@ -540,7 +540,7 @@ struct rbug_proto_texture_list_reply * rbug_demarshal_texture_list_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST_REPLY)
return NULL;
pos = 0;
@@ -568,7 +568,7 @@ struct rbug_proto_texture_info_reply * rbug_demarshal_texture_info_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO_REPLY)
return NULL;
pos = 0;
@@ -606,7 +606,7 @@ struct rbug_proto_texture_read_reply * rbug_demarshal_texture_read_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ_REPLY)
return NULL;
pos = 0;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index 2e15751e508..0461c815504 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -30,7 +30,7 @@
#include "rtasm_cpu.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
static boolean rtasm_sse_enabled(void)
{
static boolean firsttime = 1;
@@ -49,7 +49,7 @@ static boolean rtasm_sse_enabled(void)
int rtasm_cpu_has_sse(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
@@ -59,7 +59,7 @@ int rtasm_cpu_has_sse(void)
int rtasm_cpu_has_sse2(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
index 65d5ce795be..fbde1d191a4 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -58,7 +58,6 @@
#include <unistd.h>
#include <sys/mman.h>
-#include "os/os_thread.h"
#include "util/u_mm.h"
#define EXEC_HEAP_SIZE (10*1024*1024)
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 7595214bdf2..75b0f6a68ea 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -22,8 +22,9 @@
**************************************************************************/
#include "pipe/p_config.h"
+#include "util/u_cpu_detect.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
@@ -231,6 +232,10 @@ static void emit_modrm( struct x86_function *p,
assert(reg.mod == mod_REG);
+ /* TODO: support extended x86-64 registers */
+ assert(reg.idx < 8);
+ assert(regmem.idx < 8);
+
val |= regmem.mod << 6; /* mod field */
val |= reg.idx << 3; /* reg field */
val |= regmem.idx; /* r/m field */
@@ -363,6 +368,12 @@ int x86_get_label( struct x86_function *p )
*/
+void x64_rexw(struct x86_function *p)
+{
+ if(x86_target(p) != X86_32)
+ emit_1ub(p, 0x48);
+}
+
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label )
@@ -449,6 +460,52 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
emit_1i(p, imm);
}
+void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ if(dst.mod == mod_REG)
+ x86_mov_reg_imm(p, dst, imm);
+ else
+ {
+ emit_1ub(p, 0xc7);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1i(p, imm);
+ }
+}
+
+void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm )
+{
+ DUMP_RI( dst, imm );
+ emit_1ub(p, 0x66);
+ if(dst.mod == mod_REG)
+ {
+ emit_1ub(p, 0xb8 + dst.idx);
+ emit_2ub(p, imm & 0xff, imm >> 8);
+ }
+ else
+ {
+ emit_1ub(p, 0xc7);
+ emit_modrm_noreg(p, 0, dst);
+ emit_2ub(p, imm & 0xff, imm >> 8);
+ }
+}
+
+void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
+{
+ DUMP_RI( dst, imm );
+ if(dst.mod == mod_REG)
+ {
+ emit_1ub(p, 0xb0 + dst.idx);
+ emit_1ub(p, imm);
+ }
+ else
+ {
+ emit_1ub(p, 0xc6);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1ub(p, imm);
+ }
+}
+
/**
* Immediate group 1 instructions.
*/
@@ -520,7 +577,7 @@ void x86_push( struct x86_function *p,
}
- p->stack_offset += 4;
+ p->stack_offset += sizeof(void*);
}
void x86_push_imm32( struct x86_function *p,
@@ -530,7 +587,7 @@ void x86_push_imm32( struct x86_function *p,
emit_1ub(p, 0x68);
emit_1i(p, imm32);
- p->stack_offset += 4;
+ p->stack_offset += sizeof(void*);
}
@@ -540,23 +597,33 @@ void x86_pop( struct x86_function *p,
DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x58 + reg.idx);
- p->stack_offset -= 4;
+ p->stack_offset -= sizeof(void*);
}
void x86_inc( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x40 + reg.idx);
+ if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+ {
+ emit_1ub(p, 0x40 + reg.idx);
+ return;
+ }
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 0, reg);
}
void x86_dec( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x48 + reg.idx);
+ if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+ {
+ emit_1ub(p, 0x48 + reg.idx);
+ return;
+ }
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 1, reg);
}
void x86_ret( struct x86_function *p )
@@ -583,9 +650,82 @@ void x86_mov( struct x86_function *p,
struct x86_reg src )
{
DUMP_RR( dst, src );
+ /* special hack for reading arguments until we support x86-64 registers everywhere */
+ if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+ {
+ uint8_t rex = 0x40;
+ if(dst.idx >= 8)
+ {
+ rex |= 4;
+ dst.idx -= 8;
+ }
+ if(src.idx >= 8)
+ {
+ rex |= 1;
+ src.idx -= 8;
+ }
+ emit_1ub(p, rex);
+ }
+ emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_mov16( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_1ub(p, 0x66);
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
+void x86_mov8( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_op_modrm( p, 0x8a, 0x88, dst, src );
+}
+
+void x64_mov64( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ uint8_t rex = 0x48;
+ DUMP_RR( dst, src );
+ assert(x86_target(p) != X86_32);
+
+ /* special hack for reading arguments until we support x86-64 registers everywhere */
+ if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+ {
+ if(dst.idx >= 8)
+ {
+ rex |= 4;
+ dst.idx -= 8;
+ }
+ if(src.idx >= 8)
+ {
+ rex |= 1;
+ src.idx -= 8;
+ }
+ }
+ emit_1ub(p, rex);
+ emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb6);
+ emit_modrm(p, dst, src);
+}
+
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb7);
+ emit_modrm(p, dst, src);
+}
+
void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -680,6 +820,61 @@ void x86_div( struct x86_function *p,
emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
}
+void x86_bswap( struct x86_function *p, struct x86_reg reg )
+{
+ DUMP_R(reg);
+ assert(reg.file == file_REG32);
+ assert(reg.mod == mod_REG);
+ emit_2ub(p, 0x0f, 0xc8 + reg.idx);
+}
+
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 5, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 5, reg);
+ emit_1ub(p, imm);
+ }
+}
+
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 7, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 7, reg);
+ emit_1ub(p, imm);
+ }
+}
+
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 4, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 4, reg);
+ emit_1ub(p, imm);
+ }
+}
/***********************************************************************
@@ -1013,6 +1208,77 @@ void sse_movmskps( struct x86_function *p,
* SSE2 instructions
*/
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ if(dst.mod == mod_REG && dst.file == file_REG32)
+ {
+ emit_1ub(p, 0x7e);
+ emit_modrm(p, src, dst);
+ }
+ else
+ {
+ emit_op_modrm(p, 0x6e, 0x7e, dst, src);
+ }
+}
+
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ switch (dst.mod) {
+ case mod_REG:
+ emit_3ub(p, 0xf3, 0x0f, 0x7e);
+ emit_modrm(p, dst, src);
+ break;
+ case mod_INDIRECT:
+ case mod_DISP32:
+ case mod_DISP8:
+ assert(src.mod == mod_REG);
+ emit_3ub(p, 0x66, 0x0f, 0xd6);
+ emit_modrm(p, src, dst);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0xf3, 0x0f);
+ emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0xf2, 0x0f);
+ emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x28, 0x29, dst, src);
+}
+
/**
* Perform a reduced swizzle:
*/
@@ -1027,6 +1293,28 @@ void sse2_pshufd( struct x86_function *p,
emit_1ub(p, shuf);
}
+void sse2_pshuflw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src,
+ unsigned char shuf)
+{
+ DUMP_RRI( dst, src, shuf );
+ emit_3ub(p, 0xf2, X86_TWOB, 0x70);
+ emit_modrm(p, dst, src);
+ emit_1ub(p, shuf);
+}
+
+void sse2_pshufhw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src,
+ unsigned char shuf)
+{
+ DUMP_RRI( dst, src, shuf );
+ emit_3ub(p, 0xf3, X86_TWOB, 0x70);
+ emit_modrm(p, dst, src);
+ emit_1ub(p, shuf);
+}
+
void sse2_cvttps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1045,6 +1333,24 @@ void sse2_cvtps2dq( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_cvtsd2ss( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0xf2, 0x0f, 0x5a);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_cvtpd2ps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x5a);
+ emit_modrm( p, dst, src );
+}
+
void sse2_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1081,6 +1387,97 @@ void sse2_punpcklbw( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x61);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x62);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x6c);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x73);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x73);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 4, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 4, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_3ub(p, 0x66, 0x0f, 0xeb);
+ emit_modrm(p, dst, src);
+}
void sse2_rcpps( struct x86_function *p,
struct x86_reg dst,
@@ -1100,18 +1497,6 @@ void sse2_rcpss( struct x86_function *p,
emit_modrm( p, dst, src );
}
-void sse2_movd( struct x86_function *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( dst, src );
- emit_2ub(p, 0x66, X86_TWOB);
- emit_op_modrm( p, 0x6e, 0x7e, dst, src );
-}
-
-
-
-
/***********************************************************************
* x87 instructions
*/
@@ -1702,23 +2087,80 @@ void x86_cdecl_caller_pop_regs( struct x86_function *p )
}
-/* Retreive a reference to one of the function arguments, taking into
- * account any push/pop activity:
- */
struct x86_reg x86_fn_arg( struct x86_function *p,
- unsigned arg )
+ unsigned arg )
{
- return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ switch(x86_target(p))
+ {
+ case X86_64_WIN64_ABI:
+ /* Microsoft uses a different calling convention than the rest of the world */
+ switch(arg)
+ {
+ case 1:
+ return x86_make_reg(file_REG32, reg_CX);
+ case 2:
+ return x86_make_reg(file_REG32, reg_DX);
+ case 3:
+ return x86_make_reg(file_REG32, reg_R8);
+ case 4:
+ return x86_make_reg(file_REG32, reg_R9);
+ default:
+ /* Win64 allocates stack slots as if it pushed the first 4 arguments too */
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ p->stack_offset + arg * 8);
+ }
+ case X86_64_STD_ABI:
+ switch(arg)
+ {
+ case 1:
+ return x86_make_reg(file_REG32, reg_DI);
+ case 2:
+ return x86_make_reg(file_REG32, reg_SI);
+ case 3:
+ return x86_make_reg(file_REG32, reg_DX);
+ case 4:
+ return x86_make_reg(file_REG32, reg_CX);
+ case 5:
+ return x86_make_reg(file_REG32, reg_R8);
+ case 6:
+ return x86_make_reg(file_REG32, reg_R9);
+ default:
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ p->stack_offset + (arg - 6) * 8); /* ??? */
+ }
+ case X86_32:
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 4); /* ??? */
+ default:
+ abort();
+ }
}
+static void x86_init_func_common( struct x86_function *p )
+{
+ util_cpu_detect();
+ p->caps = 0;
+ if(util_cpu_caps.has_mmx)
+ p->caps |= X86_MMX;
+ if(util_cpu_caps.has_mmx2)
+ p->caps |= X86_MMX2;
+ if(util_cpu_caps.has_sse)
+ p->caps |= X86_SSE;
+ if(util_cpu_caps.has_sse2)
+ p->caps |= X86_SSE2;
+ if(util_cpu_caps.has_sse3)
+ p->caps |= X86_SSE3;
+ if(util_cpu_caps.has_sse4_1)
+ p->caps |= X86_SSE4_1;
+ p->csr = p->store;
+ DUMP_START();
+}
void x86_init_func( struct x86_function *p )
{
p->size = 0;
p->store = NULL;
- p->csr = p->store;
- DUMP_START();
+ x86_init_func_common(p);
}
void x86_init_func_size( struct x86_function *p, unsigned code_size )
@@ -1728,8 +2170,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size )
if (p->store == NULL) {
p->store = p->error_overflow;
}
- p->csr = p->store;
- DUMP_START();
+ x86_init_func_common(p);
}
void x86_release_func( struct x86_function *p )
@@ -1743,20 +2184,35 @@ void x86_release_func( struct x86_function *p )
}
-void (*x86_get_func( struct x86_function *p ))(void)
+static INLINE x86_func
+voidptr_to_x86_func(void *v)
+{
+ union {
+ void *v;
+ x86_func f;
+ } u;
+ assert(sizeof(u.v) == sizeof(u.f));
+ u.v = v;
+ return u.f;
+}
+
+
+x86_func x86_get_func( struct x86_function *p )
{
DUMP_END();
if (DISASSEM && p->store)
debug_printf("disassemble %p %p\n", p->store, p->csr);
if (p->store == p->error_overflow)
- return (void (*)(void)) NULL;
+ return voidptr_to_x86_func(NULL);
else
- return (void (*)(void)) p->store;
+ return voidptr_to_x86_func(p->store);
}
#else
+void x86sse_dummy( void );
+
void x86sse_dummy( void )
{
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 319b836ffb1..2b9678b1765 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -24,22 +24,31 @@
#ifndef _RTASM_X86SSE_H_
#define _RTASM_X86SSE_H_
+#include "pipe/p_compiler.h"
#include "pipe/p_config.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module
* for mmx/sse/sse2 support on the cpu.
*/
struct x86_reg {
- unsigned file:3;
- unsigned idx:3;
+ unsigned file:2;
+ unsigned idx:4;
unsigned mod:2; /* mod_REG if this is just a register */
int disp:24; /* only +/- 23bits of offset - should be enough... */
};
+#define X86_MMX 1
+#define X86_MMX2 2
+#define X86_SSE 4
+#define X86_SSE2 8
+#define X86_SSE3 0x10
+#define X86_SSE4_1 0x20
+
struct x86_function {
+ unsigned caps;
unsigned size;
unsigned char *store;
unsigned char *csr;
@@ -75,7 +84,15 @@ enum x86_reg_name {
reg_SP,
reg_BP,
reg_SI,
- reg_DI
+ reg_DI,
+ reg_R8,
+ reg_R9,
+ reg_R10,
+ reg_R11,
+ reg_R12,
+ reg_R13,
+ reg_R14,
+ reg_R15
};
@@ -102,14 +119,42 @@ enum sse_cc {
#define cc_Z cc_E
#define cc_NZ cc_NE
+
+/** generic pointer to function */
+typedef void (*x86_func)(void);
+
+
/* Begin/end/retrieve function creation:
*/
+enum x86_target
+{
+ X86_32,
+ X86_64_STD_ABI,
+ X86_64_WIN64_ABI
+};
+
+/* make this read a member of x86_function if target != host is desired */
+static INLINE enum x86_target x86_target( struct x86_function* p )
+{
+#ifdef PIPE_ARCH_X86
+ return X86_32;
+#elif defined(_WIN64)
+ return X86_64_WIN64_ABI;
+#elif defined(PIPE_ARCH_X86_64)
+ return X86_64_STD_ABI;
+#endif
+}
+
+static INLINE unsigned x86_target_caps( struct x86_function* p )
+{
+ return p->caps;
+}
void x86_init_func( struct x86_function *p );
void x86_init_func_size( struct x86_function *p, unsigned code_size );
void x86_release_func( struct x86_function *p );
-void (*x86_get_func( struct x86_function *p ))( void );
+x86_func x86_get_func( struct x86_function *p );
/* Debugging:
*/
@@ -133,6 +178,8 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
*/
int x86_get_label( struct x86_function *p );
+void x64_rexw(struct x86_function *p);
+
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label );
@@ -173,18 +220,54 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
@@ -222,7 +305,6 @@ void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
-void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -232,6 +314,14 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
+void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
+void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -245,7 +335,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
void x86_div( struct x86_function *p, struct x86_reg src );
-
+void x86_bswap( struct x86_function *p, struct x86_reg src );
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
void x86_cdecl_caller_push_regs( struct x86_function *p );
void x86_cdecl_caller_pop_regs( struct x86_function *p );
diff --git a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
new file mode 100644
index 00000000000..0433da6141d
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
@@ -0,0 +1,44 @@
+
+#ifndef INLINE_DEBUG_HELPER_H
+#define INLINE_DEBUG_HELPER_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+
+/* Helper function to wrap a screen with
+ * one or more debug driver: rbug, trace.
+ */
+
+#ifdef GALLIUM_TRACE
+#include "trace/tr_public.h"
+#endif
+
+#ifdef GALLIUM_RBUG
+#include "rbug/rbug_public.h"
+#endif
+
+#ifdef GALLIUM_GALAHAD
+#include "galahad/glhd_public.h"
+#endif
+
+static INLINE struct pipe_screen *
+debug_screen_wrap(struct pipe_screen *screen)
+{
+
+#if defined(GALLIUM_RBUG)
+ screen = rbug_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_TRACE)
+ screen = trace_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_GALAHAD)
+ screen = galahad_screen_create(screen);
+#endif
+
+ return screen;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
new file mode 100644
index 00000000000..036c1ee48a8
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -0,0 +1,63 @@
+
+#ifndef INLINE_SW_HELPER_H
+#define INLINE_SW_HELPER_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+#include "state_tracker/sw_winsys.h"
+
+
+/* Helper function to choose and instantiate one of the software rasterizers:
+ * cell, llvmpipe, softpipe.
+ */
+
+#ifdef GALLIUM_SOFTPIPE
+#include "softpipe/sp_public.h"
+#endif
+
+#ifdef GALLIUM_LLVMPIPE
+#include "llvmpipe/lp_public.h"
+#endif
+
+#ifdef GALLIUM_CELL
+#include "cell/ppu/cell_public.h"
+#endif
+
+static INLINE struct pipe_screen *
+sw_screen_create(struct sw_winsys *winsys)
+{
+ const char *default_driver;
+ const char *driver;
+ struct pipe_screen *screen = NULL;
+
+#if defined(GALLIUM_CELL)
+ default_driver = "cell";
+#elif defined(GALLIUM_LLVMPIPE)
+ default_driver = "llvmpipe";
+#elif defined(GALLIUM_SOFTPIPE)
+ default_driver = "softpipe";
+#else
+ default_driver = "";
+#endif
+
+ driver = debug_get_option("GALLIUM_DRIVER", default_driver);
+
+#if defined(GALLIUM_CELL)
+ if (screen == NULL && strcmp(driver, "cell") == 0)
+ screen = cell_create_screen(winsys);
+#endif
+
+#if defined(GALLIUM_LLVMPIPE)
+ if (screen == NULL && strcmp(driver, "llvmpipe") == 0)
+ screen = llvmpipe_create_screen(winsys);
+#endif
+
+#if defined(GALLIUM_SOFTPIPE)
+ if (screen == NULL)
+ screen = softpipe_create_screen(winsys);
+#endif
+
+ return screen;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
new file mode 100644
index 00000000000..0b4e7404034
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
@@ -0,0 +1,34 @@
+
+#ifndef INLINE_WRAPPER_SW_HELPER_H
+#define INLINE_WRAPPER_SW_HELPER_H
+
+#include "target-helpers/inline_sw_helper.h"
+#include "sw/wrapper/wrapper_sw_winsys.h"
+
+/**
+ * Try to wrap a hw screen with a software screen.
+ * On failure will return given screen.
+ */
+static INLINE struct pipe_screen *
+sw_screen_wrap(struct pipe_screen *screen)
+{
+ struct sw_winsys *sws;
+ struct pipe_screen *sw_screen;
+
+ sws = wrapper_sw_winsys_warp_pipe_screen(screen);
+ if (!sws)
+ goto err;
+
+ sw_screen = sw_screen_create(sws);
+ if (sw_screen == screen)
+ goto err_winsys;
+
+ return sw_screen;
+
+err_winsys:
+ sws->destroy(sws);
+err:
+ return screen;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/target-helpers/wrap_screen.c b/src/gallium/auxiliary/target-helpers/wrap_screen.c
index eb475123198..df5d56a53c9 100644
--- a/src/gallium/auxiliary/target-helpers/wrap_screen.c
+++ b/src/gallium/auxiliary/target-helpers/wrap_screen.c
@@ -33,6 +33,7 @@
#include "target-helpers/wrap_screen.h"
#include "trace/tr_public.h"
+#include "rbug/rbug_public.h"
#include "identity/id_public.h"
#include "util/u_debug.h"
@@ -56,6 +57,9 @@ gallium_wrap_screen( struct pipe_screen *screen )
/* Trace does its own checking if it should run */
screen = trace_screen_create(screen);
+ /* Rbug does its own checking if it should run */
+ screen = rbug_screen_create(screen);
+
return screen;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
deleted file mode 100644
index 5d9eed92580..00000000000
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ /dev/null
@@ -1,1127 +0,0 @@
-TGSI Instruction Specification
-==============================
-==============================
-
-
-1 Instruction Set Operations
-=============================
-
-
-1.1 GL_NV_vertex_program
--------------------------
-
-
-1.1.1 ARL - Address Register Load
-
- dst.x = floor(src.x)
- dst.y = floor(src.y)
- dst.z = floor(src.z)
- dst.w = floor(src.w)
-
-
-1.1.2 MOV - Move
-
- dst.x = src.x
- dst.y = src.y
- dst.z = src.z
- dst.w = src.w
-
-
-1.1.3 LIT - Light Coefficients
-
- dst.x = 1.0
- dst.y = max(src.x, 0.0)
- dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0
- dst.w = 1.0
-
-
-1.1.4 RCP - Reciprocal
-
- dst.x = 1.0 / src.x
- dst.y = 1.0 / src.x
- dst.z = 1.0 / src.x
- dst.w = 1.0 / src.x
-
-
-1.1.5 RSQ - Reciprocal Square Root
-
- dst.x = 1.0 / sqrt(abs(src.x))
- dst.y = 1.0 / sqrt(abs(src.x))
- dst.z = 1.0 / sqrt(abs(src.x))
- dst.w = 1.0 / sqrt(abs(src.x))
-
-
-1.1.6 EXP - Approximate Exponential Base 2
-
- dst.x = pow(2.0, floor(src.x))
- dst.y = src.x - floor(src.x)
- dst.z = pow(2.0, src.x)
- dst.w = 1.0
-
-
-1.1.7 LOG - Approximate Logarithm Base 2
-
- dst.x = floor(lg2(abs(src.x)))
- dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x))))
- dst.z = lg2(abs(src.x))
- dst.w = 1.0
-
-
-1.1.8 MUL - Multiply
-
- dst.x = src0.x * src1.x
- dst.y = src0.y * src1.y
- dst.z = src0.z * src1.z
- dst.w = src0.w * src1.w
-
-
-1.1.9 ADD - Add
-
- dst.x = src0.x + src1.x
- dst.y = src0.y + src1.y
- dst.z = src0.z + src1.z
- dst.w = src0.w + src1.w
-
-
-1.1.10 DP3 - 3-component Dot Product
-
- dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
- dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
- dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
- dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
-
-
-1.1.11 DP4 - 4-component Dot Product
-
- dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
- dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
- dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
- dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
-
-
-1.1.12 DST - Distance Vector
-
- dst.x = 1.0
- dst.y = src0.y * src1.y
- dst.z = src0.z
- dst.w = src1.w
-
-
-1.1.13 MIN - Minimum
-
- dst.x = min(src0.x, src1.x)
- dst.y = min(src0.y, src1.y)
- dst.z = min(src0.z, src1.z)
- dst.w = min(src0.w, src1.w)
-
-
-1.1.14 MAX - Maximum
-
- dst.x = max(src0.x, src1.x)
- dst.y = max(src0.y, src1.y)
- dst.z = max(src0.z, src1.z)
- dst.w = max(src0.w, src1.w)
-
-
-1.1.15 SLT - Set On Less Than
-
- dst.x = (src0.x < src1.x) ? 1.0 : 0.0
- dst.y = (src0.y < src1.y) ? 1.0 : 0.0
- dst.z = (src0.z < src1.z) ? 1.0 : 0.0
- dst.w = (src0.w < src1.w) ? 1.0 : 0.0
-
-
-1.1.16 SGE - Set On Greater Equal Than
-
- dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
- dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
- dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
- dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
-
-
-1.1.17 MAD - Multiply And Add
-
- dst.x = src0.x * src1.x + src2.x
- dst.y = src0.y * src1.y + src2.y
- dst.z = src0.z * src1.z + src2.z
- dst.w = src0.w * src1.w + src2.w
-
-
-1.2 GL_ATI_fragment_shader
----------------------------
-
-
-1.2.1 SUB - Subtract
-
- dst.x = src0.x - src1.x
- dst.y = src0.y - src1.y
- dst.z = src0.z - src1.z
- dst.w = src0.w - src1.w
-
-
-1.2.2 DOT3 - 3-component Dot Product
-
- Alias for DP3.
-
-
-1.2.3 DOT4 - 4-component Dot Product
-
- Alias for DP4.
-
-
-1.2.4 LERP - Linear Interpolate
-
- dst.x = src0.x * (src1.x - src2.x) + src2.x
- dst.y = src0.y * (src1.y - src2.y) + src2.y
- dst.z = src0.z * (src1.z - src2.z) + src2.z
- dst.w = src0.w * (src1.w - src2.w) + src2.w
-
-
-1.2.5 CND - Condition
-
- dst.x = (src2.x > 0.5) ? src0.x : src1.x
- dst.y = (src2.y > 0.5) ? src0.y : src1.y
- dst.z = (src2.z > 0.5) ? src0.z : src1.z
- dst.w = (src2.w > 0.5) ? src0.w : src1.w
-
-
-1.2.6 CND0 - Condition Zero
-
- Removed. Use (CMP src2, src1, src0) instead.
-
-1.2.7 DOT2ADD - 2-component Dot Product And Add
-
- dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
- dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
- dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
- dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
-
-
-1.3 GL_EXT_vertex_shader
--------------------------
-
-
-1.3.1 INDEX - Array Lookup
-
- Considered for removal from language.
-
-
-1.3.2 NEGATE - Negate
-
- Considered for removal from language.
-
-
-1.3.3 MADD - Multiply And Add
-
- Alias for MAD.
-
-
-1.3.4 FRAC - Fraction
-
- dst.x = src.x - floor(src.x)
- dst.y = src.y - floor(src.y)
- dst.z = src.z - floor(src.z)
- dst.w = src.w - floor(src.w)
-
-
-1.3.5 SETGE - Set On Greater Equal
-
- Alias for SGE.
-
-
-1.3.6 SETLT - Set On Less Than
-
- Alias for SLT.
-
-
-1.3.7 CLAMP - Clamp
-
- dst.x = clamp(src0.x, src1.x, src2.x)
- dst.y = clamp(src0.y, src1.y, src2.y)
- dst.z = clamp(src0.z, src1.z, src2.z)
- dst.w = clamp(src0.w, src1.w, src2.w)
-
-
-1.3.8 FLOOR - Floor
-
- dst.x = floor(src.x)
- dst.y = floor(src.y)
- dst.z = floor(src.z)
- dst.w = floor(src.w)
-
-
-1.3.9 ROUND - Round
-
- dst.x = round(src.x)
- dst.y = round(src.y)
- dst.z = round(src.z)
- dst.w = round(src.w)
-
-
-1.3.10 EXPBASE2 - Exponential Base 2
-
- dst.x = pow(2.0, src.x)
- dst.y = pow(2.0, src.x)
- dst.z = pow(2.0, src.x)
- dst.w = pow(2.0, src.x)
-
-
-1.3.11 LOGBASE2 - Logarithm Base 2
-
- dst.x = lg2(src.x)
- dst.y = lg2(src.x)
- dst.z = lg2(src.x)
- dst.w = lg2(src.x)
-
-
-1.3.12 POWER - Power
-
- dst.x = pow(src0.x, src1.x)
- dst.y = pow(src0.x, src1.x)
- dst.z = pow(src0.x, src1.x)
- dst.w = pow(src0.x, src1.x)
-
-
-1.3.13 RECIP - Reciprocal
-
- Alias for RCP.
-
-
-1.3.14 RECIPSQRT - Reciprocal Square Root
-
- Alias for RSQ.
-
-
-1.3.15 CROSSPRODUCT - Cross Product
-
- dst.x = src0.y * src1.z - src1.y * src0.z
- dst.y = src0.z * src1.x - src1.z * src0.x
- dst.z = src0.x * src1.y - src1.x * src0.y
- dst.w = 1.0
-
-
-1.3.16 MULTIPLYMATRIX - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.4 GL_NV_vertex_program1_1
-----------------------------
-
-
-1.4.1 ABS - Absolute
-
- dst.x = abs(src.x)
- dst.y = abs(src.y)
- dst.z = abs(src.z)
- dst.w = abs(src.w)
-
-
-1.4.2 RCC - Reciprocal Clamped
-
- dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
- dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
- dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
- dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
-
-
-1.4.3 DPH - Homogeneous Dot Product
-
- dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
- dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
- dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
- dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
-
-
-1.5 GL_NV_fragment_program
----------------------------
-
-
-1.5.1 COS - Cosine
-
- dst.x = cos(src.x)
- dst.y = cos(src.x)
- dst.z = cos(src.x)
- dst.w = cos(src.w)
-
-
-1.5.2 DDX - Derivative Relative To X
-
- dst.x = partialx(src.x)
- dst.y = partialx(src.y)
- dst.z = partialx(src.z)
- dst.w = partialx(src.w)
-
-
-1.5.3 DDY - Derivative Relative To Y
-
- dst.x = partialy(src.x)
- dst.y = partialy(src.y)
- dst.z = partialy(src.z)
- dst.w = partialy(src.w)
-
-
-1.5.4 EX2 - Exponential Base 2
-
- Alias for EXPBASE2.
-
-
-1.5.5 FLR - Floor
-
- Alias for FLOOR.
-
-
-1.5.6 FRC - Fraction
-
- Alias for FRAC.
-
-
-1.5.7 KILP - Predicated Discard
-
- discard
-
-
-1.5.8 LG2 - Logarithm Base 2
-
- Alias for LOGBASE2.
-
-
-1.5.9 LRP - Linear Interpolate
-
- Alias for LERP.
-
-
-1.5.10 PK2H - Pack Two 16-bit Floats
-
- TBD
-
-
-1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars
-
- TBD
-
-
-1.5.12 PK4B - Pack Four Signed 8-bit Scalars
-
- TBD
-
-
-1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars
-
- TBD
-
-
-1.5.14 POW - Power
-
- Alias for POWER.
-
-
-1.5.15 RFL - Reflection Vector
-
- dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
- dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
- dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
- dst.w = 1.0
-
-
-1.5.16 SEQ - Set On Equal
-
- dst.x = (src0.x == src1.x) ? 1.0 : 0.0
- dst.y = (src0.y == src1.y) ? 1.0 : 0.0
- dst.z = (src0.z == src1.z) ? 1.0 : 0.0
- dst.w = (src0.w == src1.w) ? 1.0 : 0.0
-
-
-1.5.17 SFL - Set On False
-
- dst.x = 0.0
- dst.y = 0.0
- dst.z = 0.0
- dst.w = 0.0
-
-
-1.5.18 SGT - Set On Greater Than
-
- dst.x = (src0.x > src1.x) ? 1.0 : 0.0
- dst.y = (src0.y > src1.y) ? 1.0 : 0.0
- dst.z = (src0.z > src1.z) ? 1.0 : 0.0
- dst.w = (src0.w > src1.w) ? 1.0 : 0.0
-
-
-1.5.19 SIN - Sine
-
- dst.x = sin(src.x)
- dst.y = sin(src.x)
- dst.z = sin(src.x)
- dst.w = sin(src.w)
-
-
-1.5.20 SLE - Set On Less Equal Than
-
- dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
- dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
- dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
- dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
-
-
-1.5.21 SNE - Set On Not Equal
-
- dst.x = (src0.x != src1.x) ? 1.0 : 0.0
- dst.y = (src0.y != src1.y) ? 1.0 : 0.0
- dst.z = (src0.z != src1.z) ? 1.0 : 0.0
- dst.w = (src0.w != src1.w) ? 1.0 : 0.0
-
-
-1.5.22 STR - Set On True
-
- dst.x = 1.0
- dst.y = 1.0
- dst.z = 1.0
- dst.w = 1.0
-
-
-1.5.23 TEX - Texture Lookup
-
- TBD
-
-
-1.5.24 TXD - Texture Lookup with Derivatives
-
- TBD
-
-
-1.5.25 TXP - Projective Texture Lookup
-
- TBD
-
-
-1.5.26 UP2H - Unpack Two 16-Bit Floats
-
- TBD
-
-
-1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars
-
- TBD
-
-
-1.5.28 UP4B - Unpack Four Signed 8-Bit Values
-
- TBD
-
-
-1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars
-
- TBD
-
-
-1.5.30 X2D - 2D Coordinate Transformation
-
- dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
- dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
- dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
- dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
-
-
-1.6 GL_NV_vertex_program2
---------------------------
-
-
-1.6.1 ARA - Address Register Add
-
- TBD
-
-
-1.6.2 ARR - Address Register Load With Round
-
- dst.x = round(src.x)
- dst.y = round(src.y)
- dst.z = round(src.z)
- dst.w = round(src.w)
-
-
-1.6.3 BRA - Branch
-
- pc = target
-
-
-1.6.4 CAL - Subroutine Call
-
- push(pc)
- pc = target
-
-
-1.6.5 RET - Subroutine Call Return
-
- pc = pop()
-
-
-1.6.6 SSG - Set Sign
-
- dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
- dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
- dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
- dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
-
-
-1.7 GL_ARB_vertex_program
---------------------------
-
-
-1.7.1 SWZ - Extended Swizzle
-
- dst.x = src.x
- dst.y = src.y
- dst.z = src.z
- dst.w = src.w
-
-
-1.7.2 XPD - Cross Product
-
- Alias for CROSSPRODUCT.
-
-
-1.8 GL_ARB_fragment_program
-----------------------------
-
-
-1.8.1 CMP - Compare
-
- dst.x = (src0.x < 0.0) ? src1.x : src2.x
- dst.y = (src0.y < 0.0) ? src1.y : src2.y
- dst.z = (src0.z < 0.0) ? src1.z : src2.z
- dst.w = (src0.w < 0.0) ? src1.w : src2.w
-
-
-1.8.2 KIL - Conditional Discard
-
- if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0)
- discard
- endif
-
-
-1.8.3 SCS - Sine Cosine
-
- dst.x = cos(src.x)
- dst.y = sin(src.x)
- dst.z = 0.0
- dst.y = 1.0
-
-
-1.8.4 TXB - Texture Lookup With Bias
-
- TBD
-
-
-1.9 GL_NV_fragment_program2
-----------------------------
-
-
-1.9.1 NRM - 3-component Vector Normalise
-
- dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
- dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
- dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
- dst.w = 1.0
-
-
-1.9.2 DIV - Divide
-
- dst.x = src0.x / src1.x
- dst.y = src0.y / src1.y
- dst.z = src0.z / src1.z
- dst.w = src0.w / src1.w
-
-
-1.9.3 DP2 - 2-component Dot Product
-
- dst.x = src0.x * src1.x + src0.y * src1.y
- dst.y = src0.x * src1.x + src0.y * src1.y
- dst.z = src0.x * src1.x + src0.y * src1.y
- dst.w = src0.x * src1.x + src0.y * src1.y
-
-
-1.9.4 DP2A - 2-component Dot Product And Add
-
- Alias for DOT2ADD.
-
-
-1.9.5 TXL - Texture Lookup With LOD
-
- TBD
-
-
-1.9.6 BRK - Break
-
- TBD
-
-
-1.9.7 IF - If
-
- TBD
-
-
-1.9.10 ELSE - Else
-
- TBD
-
-
-1.9.11 ENDIF - End If
-
- TBD
-
-
-1.10 GL_NV_vertex_program3
----------------------------
-
-
-1.10.1 PUSHA - Push Address Register On Stack
-
- push(src.x)
- push(src.y)
- push(src.z)
- push(src.w)
-
-
-1.10.2 POPA - Pop Address Register From Stack
-
- dst.w = pop()
- dst.z = pop()
- dst.y = pop()
- dst.x = pop()
-
-
-1.11 GL_NV_gpu_program4
-------------------------
-
-
-1.11.1 CEIL - Ceiling
-
- dst.x = ceil(src.x)
- dst.y = ceil(src.y)
- dst.z = ceil(src.z)
- dst.w = ceil(src.w)
-
-
-1.11.2 I2F - Integer To Float
-
- dst.x = (float) src.x
- dst.y = (float) src.y
- dst.z = (float) src.z
- dst.w = (float) src.w
-
-
-1.11.3 NOT - Bitwise Not
-
- dst.x = ~src.x
- dst.y = ~src.y
- dst.z = ~src.z
- dst.w = ~src.w
-
-
-1.11.4 TRUNC - Truncate
-
- dst.x = trunc(src.x)
- dst.y = trunc(src.y)
- dst.z = trunc(src.z)
- dst.w = trunc(src.w)
-
-
-1.11.5 SHL - Shift Left
-
- dst.x = src0.x << src1.x
- dst.y = src0.y << src1.x
- dst.z = src0.z << src1.x
- dst.w = src0.w << src1.x
-
-
-1.11.6 SHR - Shift Right
-
- dst.x = src0.x >> src1.x
- dst.y = src0.y >> src1.x
- dst.z = src0.z >> src1.x
- dst.w = src0.w >> src1.x
-
-
-1.11.7 AND - Bitwise And
-
- dst.x = src0.x & src1.x
- dst.y = src0.y & src1.y
- dst.z = src0.z & src1.z
- dst.w = src0.w & src1.w
-
-
-1.11.8 OR - Bitwise Or
-
- dst.x = src0.x | src1.x
- dst.y = src0.y | src1.y
- dst.z = src0.z | src1.z
- dst.w = src0.w | src1.w
-
-
-1.11.9 MOD - Modulus
-
- dst.x = src0.x % src1.x
- dst.y = src0.y % src1.y
- dst.z = src0.z % src1.z
- dst.w = src0.w % src1.w
-
-
-1.11.10 XOR - Bitwise Xor
-
- dst.x = src0.x ^ src1.x
- dst.y = src0.y ^ src1.y
- dst.z = src0.z ^ src1.z
- dst.w = src0.w ^ src1.w
-
-
-1.11.11 SAD - Sum Of Absolute Differences
-
- dst.x = abs(src0.x - src1.x) + src2.x
- dst.y = abs(src0.y - src1.y) + src2.y
- dst.z = abs(src0.z - src1.z) + src2.z
- dst.w = abs(src0.w - src1.w) + src2.w
-
-
-1.11.12 TXF - Texel Fetch
-
- TBD
-
-
-1.11.13 TXQ - Texture Size Query
-
- TBD
-
-
-1.11.14 CONT - Continue
-
- TBD
-
-
-1.12 GL_NV_geometry_program4
------------------------------
-
-
-1.12.1 EMIT - Emit
-
- TBD
-
-
-1.12.2 ENDPRIM - End Primitive
-
- TBD
-
-
-1.13 GLSL
-----------
-
-
-1.13.1 BGNLOOP - Begin a Loop
-
- TBD
-
-
-1.13.2 BGNSUB - Begin Subroutine
-
- TBD
-
-
-1.13.3 ENDLOOP - End a Loop
-
- TBD
-
-
-1.13.4 ENDSUB - End Subroutine
-
- TBD
-
-
-1.13.5 INT - Truncate
-
- Alias for TRUNC.
-
-
-1.13.6 NOISE1 - 1D Noise
-
- TBD
-
-
-1.13.7 NOISE2 - 2D Noise
-
- TBD
-
-
-1.13.8 NOISE3 - 3D Noise
-
- TBD
-
-
-1.13.9 NOISE4 - 4D Noise
-
- TBD
-
-
-1.13.10 NOP - No Operation
-
- Do nothing.
-
-
-1.14 ps_1_1
-------------
-
-
-1.14.1 TEXKILL - Conditional Discard
-
- Alias for KIL.
-
-
-1.15 ps_1_4
-------------
-
-
-1.15.1 TEXLD - Texture Lookup
-
- Alias for TEX.
-
-
-1.16 ps_2_0
-------------
-
-
-1.16.1 M4X4 - Multiply Matrix
-
- Alias for MULTIPLYMATRIX.
-
-
-1.16.2 M4X3 - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.16.3 M3X4 - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.16.4 M3X3 - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.16.5 M3X2 - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.16.6 CRS - Cross Product
-
- Alias for XPD.
-
-
-1.16.7 NRM4 - 4-component Vector Normalise
-
- dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
- dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
- dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
- dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
-
-
-1.16.8 SINCOS - Sine Cosine
-
- Alias for SCS.
-
-
-1.16.9 TEXLDB - Texture Lookup With Bias
-
- Alias for TXB.
-
-
-1.16.10 DP2ADD - 2-component Dot Product And Add
-
- Alias for DP2A.
-
-
-1.17 ps_2_x
-------------
-
-
-1.17.1 CALL - Subroutine Call
-
- Alias for CAL.
-
-
-1.17.2 CALLNZ - Subroutine Call If Not Zero
-
- TBD
-
-
-1.17.3 IFC - If
-
- TBD
-
-
-1.17.4 BREAK - Break
-
- Alias for BRK.
-
-
-1.17.5 BREAKC - Break Conditional
-
- TBD
-
-
-1.17.6 DSX - Derivative Relative To X
-
- Alias for DDX.
-
-
-1.17.7 DSY - Derivative Relative To Y
-
- Alias for DDY.
-
-
-1.17.8 TEXLDD - Texture Lookup with Derivatives
-
- Alias for TXD.
-
-
-1.18 vs_1_1
-------------
-
-
-1.18.1 EXPP - Approximate Exponential Base 2
-
- Use EXP. See also 1.19.3.
-
-
-1.18.2 LOGP - Logarithm Base 2
-
- Use LOG. See also 1.19.4.
-
-
-1.19 vs_2_0
-------------
-
-
-1.19.1 SGN - Set Sign
-
- Alias for SSG.
-
-
-1.19.2 MOVA - Move Address Register
-
- Alias for ARR.
-
-
-1.19.3 EXPP - Approximate Exponential Base 2
-
- Use EX2.
-
-
-1.19.4 LOGP - Logarithm Base 2
-
- Use LG2.
-
-
-2 Explanation of symbols used
-==============================
-
-
-2.1 Functions
---------------
-
-
- abs(x) Absolute value of x.
- |x|
- (x < 0.0) ? -x : x
-
- ceil(x) Ceiling of x.
-
- clamp(x,y,z) Clamp x between y and z.
- (x < y) ? y : (x > z) ? z : x
-
- cos(x) Cosine of x.
-
- floor(x) Floor of x.
-
- lg2(x) Logarithm base 2 of x.
-
- max(x,y) Maximum of x and y.
- (x > y) ? x : y
-
- min(x,y) Minimum of x and y.
- (x < y) ? x : y
-
- partialx(x) Derivative of x relative to fragment's X.
-
- partialy(x) Derivative of x relative to fragment's Y.
-
- pop() Pop from stack.
-
- pow(x,y) Raise x to power of y.
-
- push(x) Push x on stack.
-
- round(x) Round x.
-
- sin(x) Sine of x.
-
- sqrt(x) Square root of x.
-
- trunc(x) Truncate x.
-
-
-2.2 Keywords
--------------
-
-
- discard Discard fragment.
-
- dst First destination register.
-
- dst0 First destination register.
-
- pc Program counter.
-
- src First source register.
-
- src0 First source register.
-
- src1 Second source register.
-
- src2 Third source register.
-
- target Label of target instruction.
-
-
-3 Other tokens
-===============
-
-
-3.1 Declaration Semantic
--------------------------
-
-
- Follows Declaration token if Semantic bit is set.
-
- Since its purpose is to link a shader with other stages of the pipeline,
- it is valid to follow only those Declaration tokens that declare a register
- either in INPUT or OUTPUT file.
-
- SemanticName field contains the semantic name of the register being declared.
- There is no default value.
-
- SemanticIndex is an optional subscript that can be used to distinguish
- different register declarations with the same semantic name. The default value
- is 0.
-
- The meanings of the individual semantic names are explained in the following
- sections.
-
-
-3.1.1 FACE
-
- Valid only in a fragment shader INPUT declaration.
-
- FACE.x is negative when the primitive is back facing. FACE.x is positive
- when the primitive is front facing.
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 0890078cd05..6dbedf15ca8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -164,6 +164,7 @@ tgsi_default_full_declaration( void )
full_declaration.Declaration = tgsi_default_declaration();
full_declaration.Range = tgsi_default_declaration_range();
full_declaration.Semantic = tgsi_default_declaration_semantic();
+ full_declaration.ImmediateData.u = NULL;
return full_declaration;
}
@@ -180,7 +181,7 @@ tgsi_build_full_declaration(
struct tgsi_declaration_range *dr;
if( maxsize <= size )
- return 0;
+ return 0;
declaration = (struct tgsi_declaration *) &tokens[size];
size++;
@@ -235,6 +236,24 @@ tgsi_build_full_declaration(
header );
}
+ if (full_decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) {
+ unsigned i, j;
+ union tgsi_immediate_data *data;
+
+ for (i = 0; i <= dr->Last; ++i) {
+ for (j = 0; j < 4; ++j) {
+ unsigned idx = i*4 + j;
+ if (maxsize <= size)
+ return 0;
+ data = (union tgsi_immediate_data *) &tokens[size];
+ ++size;
+
+ *data = full_decl->ImmediateData.u[idx];
+ declaration_grow( declaration, header );
+ }
+ }
+ }
+
return size;
}
@@ -613,6 +632,7 @@ tgsi_build_full_instruction(
reg->Register.File,
reg->Register.WriteMask,
reg->Register.Indirect,
+ reg->Register.Dimension,
reg->Register.Index,
instruction,
header );
@@ -640,6 +660,46 @@ tgsi_build_full_instruction(
instruction,
header );
}
+
+ if( reg->Register.Dimension ) {
+ struct tgsi_dimension *dim;
+
+ assert( !reg->Dimension.Dimension );
+
+ if( maxsize <= size )
+ return 0;
+ dim = (struct tgsi_dimension *) &tokens[size];
+ size++;
+
+ *dim = tgsi_build_dimension(
+ reg->Dimension.Indirect,
+ reg->Dimension.Index,
+ instruction,
+ header );
+
+ if( reg->Dimension.Indirect ) {
+ struct tgsi_src_register *ind;
+
+ if( maxsize <= size )
+ return 0;
+ ind = (struct tgsi_src_register *) &tokens[size];
+ size++;
+
+ *ind = tgsi_build_src_register(
+ reg->DimIndirect.File,
+ reg->DimIndirect.SwizzleX,
+ reg->DimIndirect.SwizzleY,
+ reg->DimIndirect.SwizzleZ,
+ reg->DimIndirect.SwizzleW,
+ reg->DimIndirect.Negate,
+ reg->DimIndirect.Absolute,
+ reg->DimIndirect.Indirect,
+ reg->DimIndirect.Dimension,
+ reg->DimIndirect.Index,
+ instruction,
+ header );
+ }
+ }
}
for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) {
@@ -959,6 +1019,7 @@ tgsi_build_dst_register(
unsigned file,
unsigned mask,
unsigned indirect,
+ unsigned dimension,
int index,
struct tgsi_instruction *instruction,
struct tgsi_header *header )
@@ -974,6 +1035,7 @@ tgsi_build_dst_register(
dst_register.WriteMask = mask;
dst_register.Index = index;
dst_register.Indirect = indirect;
+ dst_register.Dimension = dimension;
instruction_grow( instruction, header );
@@ -987,6 +1049,8 @@ tgsi_default_full_dst_register( void )
full_dst_register.Register = tgsi_default_dst_register();
full_dst_register.Indirect = tgsi_default_src_register();
+ full_dst_register.Dimension = tgsi_default_dimension();
+ full_dst_register.DimIndirect = tgsi_default_src_register();
return full_dst_register;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 13d7f5272d6..112107a0881 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -263,6 +263,7 @@ tgsi_build_dst_register(
unsigned file,
unsigned mask,
unsigned indirect,
+ unsigned dimension,
int index,
struct tgsi_instruction *instruction,
struct tgsi_header *header );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 83000200189..f71ffb70308 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -56,7 +56,7 @@ dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
va_list ap;
(void)ctx;
va_start(ap, format);
- debug_vprintf(format, ap);
+ _debug_vprintf(format, ap);
va_end(ap);
}
@@ -100,9 +100,10 @@ static const char *file_names[TGSI_FILE_COUNT] =
"SAMP",
"ADDR",
"IMM",
- "LOOP",
"PRED",
- "SV"
+ "SV",
+ "IMMX",
+ "TEMPX"
};
static const char *interpolate_names[] =
@@ -175,7 +176,11 @@ static const char *primitive_names[] =
"TRIANGLE_FAN",
"QUADS",
"QUAD_STRIP",
- "POLYGON"
+ "POLYGON",
+ "LINES_ADJACENCY",
+ "LINE_STRIP_ADJACENCY",
+ "TRIANGLES_ADJACENCY",
+ "TRIANGLE_STRIP_ADJACENCY"
};
static const char *fs_coord_origin_names[] =
@@ -192,29 +197,30 @@ static const char *fs_coord_pixel_center_names[] =
static void
-_dump_register_dst(
- struct dump_ctx *ctx,
- uint file,
- int index)
-{
- ENM( file, file_names );
-
- CHR( '[' );
- SID( index );
- CHR( ']' );
-}
-
-
-static void
_dump_register_src(
struct dump_ctx *ctx,
const struct tgsi_full_src_register *src )
{
ENM(src->Register.File, file_names);
if (src->Register.Dimension) {
- CHR('[');
- SID(src->Dimension.Index);
- CHR(']');
+ if (src->Dimension.Indirect) {
+ CHR( '[' );
+ ENM( src->DimIndirect.File, file_names );
+ CHR( '[' );
+ SID( src->DimIndirect.Index );
+ TXT( "]." );
+ ENM( src->DimIndirect.SwizzleX, swizzle_names );
+ if (src->Dimension.Index != 0) {
+ if (src->Dimension.Index > 0)
+ CHR( '+' );
+ SID( src->Dimension.Index );
+ }
+ CHR( ']' );
+ } else {
+ CHR('[');
+ SID(src->Dimension.Index);
+ CHR(']');
+ }
}
if (src->Register.Indirect) {
CHR( '[' );
@@ -236,30 +242,52 @@ _dump_register_src(
}
}
+
static void
-_dump_register_ind(
+_dump_register_dst(
struct dump_ctx *ctx,
- uint file,
- int index,
- uint ind_file,
- int ind_index,
- uint ind_swizzle )
+ const struct tgsi_full_dst_register *dst )
{
- ENM( file, file_names );
- CHR( '[' );
- ENM( ind_file, file_names );
- CHR( '[' );
- SID( ind_index );
- TXT( "]." );
- ENM( ind_swizzle, swizzle_names );
- if (index != 0) {
- if (index > 0)
- CHR( '+' );
- SID( index );
+ ENM(dst->Register.File, file_names);
+ if (dst->Register.Dimension) {
+ if (dst->Dimension.Indirect) {
+ CHR( '[' );
+ ENM( dst->DimIndirect.File, file_names );
+ CHR( '[' );
+ SID( dst->DimIndirect.Index );
+ TXT( "]." );
+ ENM( dst->DimIndirect.SwizzleX, swizzle_names );
+ if (dst->Dimension.Index != 0) {
+ if (dst->Dimension.Index > 0)
+ CHR( '+' );
+ SID( dst->Dimension.Index );
+ }
+ CHR( ']' );
+ } else {
+ CHR('[');
+ SID(dst->Dimension.Index);
+ CHR(']');
+ }
+ }
+ if (dst->Register.Indirect) {
+ CHR( '[' );
+ ENM( dst->Indirect.File, file_names );
+ CHR( '[' );
+ SID( dst->Indirect.Index );
+ TXT( "]." );
+ ENM( dst->Indirect.SwizzleX, swizzle_names );
+ if (dst->Register.Index != 0) {
+ if (dst->Register.Index > 0)
+ CHR( '+' );
+ SID( dst->Register.Index );
+ }
+ CHR( ']' );
+ } else {
+ CHR( '[' );
+ SID( dst->Register.Index );
+ CHR( ']' );
}
- CHR( ']' );
}
-
static void
_dump_writemask(
struct dump_ctx *ctx,
@@ -278,6 +306,39 @@ _dump_writemask(
}
}
+static void
+dump_imm_data(struct tgsi_iterate_context *iter,
+ union tgsi_immediate_data *data,
+ unsigned num_tokens,
+ unsigned data_type)
+{
+ struct dump_ctx *ctx = (struct dump_ctx *)iter;
+ unsigned i ;
+
+ TXT( " {" );
+
+ assert( num_tokens <= 4 );
+ for (i = 0; i < num_tokens; i++) {
+ switch (data_type) {
+ case TGSI_IMM_FLOAT32:
+ FLT( data[i].Float );
+ break;
+ case TGSI_IMM_UINT32:
+ UID(data[i].Uint);
+ break;
+ case TGSI_IMM_INT32:
+ SID(data[i].Int);
+ break;
+ default:
+ assert( 0 );
+ }
+
+ if (i < num_tokens - 1)
+ TXT( ", " );
+ }
+ TXT( "}" );
+}
+
static boolean
iter_declaration(
struct tgsi_iterate_context *iter,
@@ -358,6 +419,43 @@ iter_declaration(
}
}
+ if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) {
+ unsigned i;
+ char range_indent[4];
+
+ TXT(" {");
+
+ if (decl->Range.Last < 10)
+ range_indent[0] = '\0';
+ else if (decl->Range.Last < 100) {
+ range_indent[0] = ' ';
+ range_indent[1] = '\0';
+ } else if (decl->Range.Last < 1000) {
+ range_indent[0] = ' ';
+ range_indent[1] = ' ';
+ range_indent[2] = '\0';
+ } else {
+ range_indent[0] = ' ';
+ range_indent[1] = ' ';
+ range_indent[2] = ' ';
+ range_indent[3] = '\0';
+ }
+
+ dump_imm_data(iter, decl->ImmediateData.u,
+ 4, TGSI_IMM_FLOAT32);
+ for(i = 1; i <= decl->Range.Last; ++i) {
+ /* indent by strlen of:
+ * "DCL IMMX[0..1] {" */
+ CHR('\n');
+ TXT( " " );
+ TXT( range_indent );
+ dump_imm_data(iter, decl->ImmediateData.u + i,
+ 4, TGSI_IMM_FLOAT32);
+ }
+
+ TXT(" }");
+ }
+
EOL();
return TRUE;
@@ -431,33 +529,11 @@ iter_immediate(
{
struct dump_ctx *ctx = (struct dump_ctx *) iter;
- uint i;
-
TXT( "IMM " );
ENM( imm->Immediate.DataType, immediate_type_names );
- TXT( " { " );
-
- assert( imm->Immediate.NrTokens <= 4 + 1 );
- for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
- switch (imm->Immediate.DataType) {
- case TGSI_IMM_FLOAT32:
- FLT( imm->u[i].Float );
- break;
- case TGSI_IMM_UINT32:
- UID(imm->u[i].Uint);
- break;
- case TGSI_IMM_INT32:
- SID(imm->u[i].Int);
- break;
- default:
- assert( 0 );
- }
-
- if (i < imm->Immediate.NrTokens - 2)
- TXT( ", " );
- }
- TXT( " }" );
+ dump_imm_data(iter, imm->u, imm->Immediate.NrTokens - 1,
+ imm->Immediate.DataType);
EOL();
@@ -488,12 +564,36 @@ iter_instruction(
INSTID( instno );
TXT( ": " );
-
+
ctx->indent -= info->pre_dedent;
for(i = 0; (int)i < ctx->indent; ++i)
TXT( " " );
ctx->indent += info->post_indent;
-
+
+ if (inst->Instruction.Predicate) {
+ CHR( '(' );
+
+ if (inst->Predicate.Negate)
+ CHR( '!' );
+
+ TXT( "PRED[" );
+ SID( inst->Predicate.Index );
+ CHR( ']' );
+
+ if (inst->Predicate.SwizzleX != TGSI_SWIZZLE_X ||
+ inst->Predicate.SwizzleY != TGSI_SWIZZLE_Y ||
+ inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z ||
+ inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) {
+ CHR( '.' );
+ ENM( inst->Predicate.SwizzleX, swizzle_names );
+ ENM( inst->Predicate.SwizzleY, swizzle_names );
+ ENM( inst->Predicate.SwizzleZ, swizzle_names );
+ ENM( inst->Predicate.SwizzleW, swizzle_names );
+ }
+
+ TXT( ") " );
+ }
+
TXT( info->mnemonic );
switch (inst->Instruction.Saturate) {
@@ -516,21 +616,7 @@ iter_instruction(
CHR( ',' );
CHR( ' ' );
- if (dst->Register.Indirect) {
- _dump_register_ind(
- ctx,
- dst->Register.File,
- dst->Register.Index,
- dst->Indirect.File,
- dst->Indirect.Index,
- dst->Indirect.SwizzleX );
- }
- else {
- _dump_register_dst(
- ctx,
- dst->Register.File,
- dst->Register.Index );
- }
+ _dump_register_dst( ctx, dst );
_dump_writemask( ctx, dst->Register.WriteMask );
first_reg = FALSE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index 4cd27317b36..dd78b361007 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -28,6 +28,7 @@
#ifndef TGSI_DUMP_H
#define TGSI_DUMP_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 82eac05dc4d..3a71540506d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -557,6 +557,23 @@ print_temp(const struct tgsi_exec_machine *mach, uint index)
#endif
+void
+tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
+ unsigned num_bufs,
+ const void **bufs,
+ const unsigned *buf_sizes)
+{
+ unsigned i;
+
+ for (i = 0; i < num_bufs; i++) {
+ mach->Consts[i] = bufs[i];
+ mach->ConstsSize[i] = buf_sizes[i];
+ }
+}
+
+
+
+
/**
* Check if there's a potential src/dst register data dependency when
* using SOA execution.
@@ -588,8 +605,10 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
if ((inst->Src[i].Register.File ==
inst->Dst[0].Register.File) &&
- (inst->Src[i].Register.Index ==
- inst->Dst[0].Register.Index)) {
+ ((inst->Src[i].Register.Index ==
+ inst->Dst[0].Register.Index) ||
+ inst->Src[i].Register.Indirect ||
+ inst->Dst[0].Register.Indirect)) {
/* loop over dest channels */
uint channelsWritten = 0x0;
FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
@@ -621,12 +640,10 @@ tgsi_exec_machine_bind_shader(
{
uint k;
struct tgsi_parse_context parse;
- struct tgsi_exec_labels *labels = &mach->Labels;
struct tgsi_full_instruction *instructions;
struct tgsi_full_declaration *declarations;
uint maxInstructions = 10, numInstructions = 0;
uint maxDeclarations = 10, numDeclarations = 0;
- uint instno = 0;
#if 0
tgsi_dump(tokens, 0);
@@ -634,9 +651,30 @@ tgsi_exec_machine_bind_shader(
util_init_math();
+ if (numSamplers) {
+ assert(samplers);
+ }
+
mach->Tokens = tokens;
mach->Samplers = samplers;
+ if (!tokens) {
+ /* unbind and free all */
+ if (mach->Declarations) {
+ FREE( mach->Declarations );
+ }
+ mach->Declarations = NULL;
+ mach->NumDeclarations = 0;
+
+ if (mach->Instructions) {
+ FREE( mach->Instructions );
+ }
+ mach->Instructions = NULL;
+ mach->NumInstructions = 0;
+
+ return;
+ }
+
k = tgsi_parse_init (&parse, mach->Tokens);
if (k != TGSI_PARSE_OK) {
debug_printf( "Problem parsing!\n" );
@@ -645,7 +683,6 @@ tgsi_exec_machine_bind_shader(
mach->Processor = parse.FullHeader.Processor.Processor;
mach->ImmLimit = 0;
- labels->count = 0;
declarations = (struct tgsi_full_declaration *)
MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
@@ -663,7 +700,6 @@ tgsi_exec_machine_bind_shader(
}
while( !tgsi_parse_end_of_tokens( &parse ) ) {
- uint pointer = parse.Position;
uint i;
tgsi_parse_token( &parse );
@@ -686,6 +722,19 @@ tgsi_exec_machine_bind_shader(
++mach->NumOutputs;
}
}
+ if (parse.FullToken.FullDeclaration.Declaration.File ==
+ TGSI_FILE_IMMEDIATE_ARRAY) {
+ unsigned reg;
+ struct tgsi_full_declaration *decl =
+ &parse.FullToken.FullDeclaration;
+ debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES);
+ for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) {
+ for( i = 0; i < 4; i++ ) {
+ int idx = reg * 4 + i;
+ mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float;
+ }
+ }
+ }
memcpy(declarations + numDeclarations,
&parse.FullToken.FullDeclaration,
sizeof(declarations[0]));
@@ -707,11 +756,6 @@ tgsi_exec_machine_bind_shader(
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- assert( labels->count < MAX_LABELS );
-
- labels->labels[labels->count][0] = instno;
- labels->labels[labels->count][1] = pointer;
- labels->count++;
/* save expanded instruction */
if (numInstructions == maxInstructions) {
@@ -801,7 +845,9 @@ void
tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
{
if (mach) {
- FREE(mach->Instructions);
+ if (mach->Instructions)
+ FREE(mach->Instructions);
+ if (mach->Declarations)
FREE(mach->Declarations);
}
@@ -1017,6 +1063,8 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
{
uint i;
+ assert(swizzle < 4);
+
switch (file) {
case TGSI_FILE_CONSTANT:
for (i = 0; i < QUAD_SIZE; i++) {
@@ -1026,9 +1074,23 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
if (index->i[i] < 0) {
chan->u[i] = 0;
} else {
- const uint *p = (const uint *)mach->Consts[index2D->i[i]];
-
- chan->u[i] = p[index->i[i] * 4 + swizzle];
+ /* NOTE: copying the const value as a uint instead of float */
+ const uint constbuf = index2D->i[i];
+ const uint *buf = (const uint *)mach->Consts[constbuf];
+ const int pos = index->i[i] * 4 + swizzle;
+ /* const buffer bounds check */
+ if (pos < 0 || pos >= mach->ConstsSize[constbuf]) {
+ if (0) {
+ /* Debug: print warning */
+ static int count = 0;
+ if (count++ < 100)
+ debug_printf("TGSI Exec: const buffer index %d"
+ " out of bounds\n", pos);
+ }
+ chan->u[i] = 0;
+ }
+ else
+ chan->u[i] = buf[pos];
}
}
break;
@@ -1036,8 +1098,16 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
case TGSI_FILE_INPUT:
case TGSI_FILE_SYSTEM_VALUE:
for (i = 0; i < QUAD_SIZE; i++) {
- /* XXX: 2D indexing */
- chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i];
+ /*
+ if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
+ debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
+ index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
+ index2D->i[i], index->i[i]);
+ }*/
+ int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
+ assert(pos >= 0);
+ assert(pos < Elements(mach->Inputs));
+ chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
}
break;
@@ -1050,6 +1120,16 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
}
break;
+ case TGSI_FILE_TEMPORARY_ARRAY:
+ for (i = 0; i < QUAD_SIZE; i++) {
+ assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
+ assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS);
+
+ chan->u[i] =
+ mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i];
+ }
+ break;
+
case TGSI_FILE_IMMEDIATE:
for (i = 0; i < QUAD_SIZE; i++) {
assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
@@ -1059,6 +1139,14 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
}
break;
+ case TGSI_FILE_IMMEDIATE_ARRAY:
+ for (i = 0; i < QUAD_SIZE; i++) {
+ assert(index2D->i[i] == 0);
+
+ chan->f[i] = mach->ImmArray[index->i[i]][swizzle];
+ }
+ break;
+
case TGSI_FILE_ADDRESS:
for (i = 0; i < QUAD_SIZE; i++) {
assert(index->i[i] >= 0);
@@ -1139,7 +1227,7 @@ fetch_source(const struct tgsi_exec_machine *mach,
index2.i[1] =
index2.i[2] =
index2.i[3] = reg->Indirect.Index;
-
+ assert(reg->Indirect.File == TGSI_FILE_ADDRESS);
/* get current value of address register[swizzle] */
swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
fetch_src_file_channel(mach,
@@ -1270,6 +1358,7 @@ store_dest(struct tgsi_exec_machine *mach,
uint i;
union tgsi_exec_channel null;
union tgsi_exec_channel *dst;
+ union tgsi_exec_channel index2D;
uint execmask = mach->ExecMask;
int offset = 0; /* indirection offset */
int index;
@@ -1315,6 +1404,77 @@ store_dest(struct tgsi_exec_machine *mach,
offset = indir_index.i[0];
}
+ /* There is an extra source register that is a second
+ * subscript to a register file. Effectively it means that
+ * the register file is actually a 2D array of registers.
+ *
+ * file[3][1],
+ * where:
+ * [3] = Dimension.Index
+ */
+ if (reg->Register.Dimension) {
+ index2D.i[0] =
+ index2D.i[1] =
+ index2D.i[2] =
+ index2D.i[3] = reg->Dimension.Index;
+
+ /* Again, the second subscript index can be addressed indirectly
+ * identically to the first one.
+ * Nothing stops us from indirectly addressing the indirect register,
+ * but there is no need for that, so we won't exercise it.
+ *
+ * file[ind[4].y+3][1],
+ * where:
+ * ind = DimIndirect.File
+ * [4] = DimIndirect.Index
+ * .y = DimIndirect.SwizzleX
+ */
+ if (reg->Dimension.Indirect) {
+ union tgsi_exec_channel index2;
+ union tgsi_exec_channel indir_index;
+ const uint execmask = mach->ExecMask;
+ unsigned swizzle;
+ uint i;
+
+ index2.i[0] =
+ index2.i[1] =
+ index2.i[2] =
+ index2.i[3] = reg->DimIndirect.Index;
+
+ swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
+ fetch_src_file_channel(mach,
+ reg->DimIndirect.File,
+ swizzle,
+ &index2,
+ &ZeroVec,
+ &indir_index);
+
+ index2D.i[0] += indir_index.i[0];
+ index2D.i[1] += indir_index.i[1];
+ index2D.i[2] += indir_index.i[2];
+ index2D.i[3] += indir_index.i[3];
+
+ /* for disabled execution channels, zero-out the index to
+ * avoid using a potential garbage value.
+ */
+ for (i = 0; i < QUAD_SIZE; i++) {
+ if ((execmask & (1 << i)) == 0) {
+ index2D.i[i] = 0;
+ }
+ }
+ }
+
+ /* If by any chance there was a need for a 3D array of register
+ * files, we would have to check whether Dimension is followed
+ * by a dimension register and continue the saga.
+ */
+ } else {
+ index2D.i[0] =
+ index2D.i[1] =
+ index2D.i[2] =
+ index2D.i[3] = 0;
+ }
+
switch (reg->Register.File) {
case TGSI_FILE_NULL:
dst = &null;
@@ -1341,16 +1501,19 @@ store_dest(struct tgsi_exec_machine *mach,
dst = &mach->Temps[offset + index].xyzw[chan_index];
break;
- case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_TEMPORARY_ARRAY:
index = reg->Register.Index;
- dst = &mach->Addrs[index].xyzw[chan_index];
+ assert( index < TGSI_EXEC_NUM_TEMPS );
+ assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS );
+ /* XXX we use index2D.i[0] here but somehow we might
+ * end up with someone trying to store indirectly in
+ * different buffers */
+ dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index];
break;
- case TGSI_FILE_LOOP:
- assert(reg->Register.Index == 0);
- assert(mach->LoopCounterStackTop > 0);
- assert(chan_index == CHAN_X);
- dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
+ case TGSI_FILE_ADDRESS:
+ index = reg->Register.Index;
+ dst = &mach->Addrs[index].xyzw[chan_index];
break;
case TGSI_FILE_PREDICATE:
@@ -1533,6 +1696,19 @@ emit_primitive(struct tgsi_exec_machine *mach)
}
}
+static void
+conditional_emit_primitive(struct tgsi_exec_machine *mach)
+{
+ if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
+ int emitted_verts =
+ mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
+ if (emitted_verts) {
+ emit_primitive(mach);
+ }
+ }
+}
+
+
/*
* Fetch four texture samples using STR texture coordinates.
*/
@@ -3065,6 +3241,8 @@ exec_instruction(
if (mach->CallStackTop == 0) {
/* returning from main() */
+ mach->CondStackTop = 0;
+ mach->LoopStackTop = 0;
*pc = -1;
return;
}
@@ -3133,7 +3311,7 @@ exec_instruction(
break;
case TGSI_OPCODE_DIV:
- assert( 0 );
+ exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DP2:
@@ -3182,6 +3360,9 @@ exec_instruction(
break;
case TGSI_OPCODE_END:
+ /* make sure we end primitives which haven't
+ * been explicitly emitted */
+ conditional_emit_primitive(mach);
/* halt execution */
*pc = -1;
break;
@@ -3590,6 +3771,9 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
}
#endif
+ /* Strictly speaking, these assertions aren't really needed but they
+ * can potentially catch some bugs in the control flow code.
+ */
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index a22873e4c2b..9d62c1d7e7e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -37,8 +37,6 @@ extern "C" {
#endif
-#define MAX_LABELS (4 * 1024) /**< basically, max instructions */
-
#define NUM_CHANNELS 4 /* R,G,B,A */
#define QUAD_SIZE 4 /* 4 pixel/quad */
@@ -93,18 +91,9 @@ struct tgsi_sampler
float rgba[NUM_CHANNELS][QUAD_SIZE]);
};
-/**
- * For branching/calling subroutines.
- */
-struct tgsi_exec_labels
-{
- unsigned labels[MAX_LABELS][2];
- unsigned count;
-};
-
-
#define TGSI_EXEC_NUM_TEMPS 128
#define TGSI_EXEC_NUM_IMMEDIATES 256
+#define TGSI_EXEC_NUM_TEMP_ARRAYS 8
/*
* Locations of various utility registers (_I = Index, _C = Channel)
@@ -186,10 +175,11 @@ struct tgsi_exec_labels
-#define TGSI_EXEC_MAX_COND_NESTING 32
-#define TGSI_EXEC_MAX_LOOP_NESTING 32
-#define TGSI_EXEC_MAX_SWITCH_NESTING 32
-#define TGSI_EXEC_MAX_CALL_NESTING 32
+#define TGSI_EXEC_MAX_NESTING 32
+#define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING
+#define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING
+#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING
+#define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING
/* The maximum number of input attributes per vertex. For 2D
* input register files, this is the stride between two 1D
@@ -248,9 +238,12 @@ struct tgsi_exec_machine
*/
struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS +
TGSI_EXEC_NUM_TEMP_EXTRAS];
+ struct tgsi_exec_vector TempArray[TGSI_EXEC_NUM_TEMP_ARRAYS][TGSI_EXEC_NUM_TEMPS];
float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
+ float ImmArray[TGSI_EXEC_NUM_IMMEDIATES][4];
+
struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS];
struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES];
@@ -260,7 +253,10 @@ struct tgsi_exec_machine
struct tgsi_sampler **Samplers;
unsigned ImmLimit;
+
const void *Consts[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS];
+
const struct tgsi_token *Tokens; /**< Declarations, instructions */
unsigned Processor; /**< TGSI_PROCESSOR_x */
@@ -299,10 +295,6 @@ struct tgsi_exec_machine
uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING];
int LoopLabelStackTop;
- /** Loop counter stack (x = index, y = counter, z = step) */
- struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING];
- int LoopCounterStackTop;
-
/** Loop continue mask stack (see comments in tgsi_exec.c) */
uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
int ContStackTop;
@@ -328,7 +320,6 @@ struct tgsi_exec_machine
struct tgsi_full_declaration *Declarations;
uint NumDeclarations;
- struct tgsi_exec_labels Labels;
};
struct tgsi_exec_machine *
@@ -379,6 +370,43 @@ tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
}
+extern void
+tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
+ unsigned num_bufs,
+ const void **bufs,
+ const unsigned *buf_sizes);
+
+
+static INLINE int
+tgsi_exec_get_shader_param(enum pipe_shader_cap param)
+{
+ switch(param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return INT_MAX;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return TGSI_EXEC_MAX_NESTING;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ return TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return TGSI_EXEC_MAX_CONST_BUFFER;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return PIPE_MAX_CONSTANT_BUFFERS;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return TGSI_EXEC_NUM_TEMPS;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return TGSI_EXEC_NUM_ADDRS;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return TGSI_EXEC_NUM_PREDS;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
#if defined __cplusplus
} /* extern "C" */
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index cfa2f631bd8..e59e964ffa7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -205,3 +205,18 @@ tgsi_get_opcode_name( uint opcode )
return info->mnemonic;
}
+
+const char *
+tgsi_get_processor_name( uint processor )
+{
+ switch (processor) {
+ case TGSI_PROCESSOR_VERTEX:
+ return "vertex shader";
+ case TGSI_PROCESSOR_FRAGMENT:
+ return "fragment shader";
+ case TGSI_PROCESSOR_GEOMETRY:
+ return "geometry shader";
+ default:
+ return "unknown shader type!";
+ }
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
index 74713c3b98a..1992d11bbe8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -28,6 +28,7 @@
#ifndef TGSI_INFO_H
#define TGSI_INFO_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
@@ -52,6 +53,9 @@ tgsi_get_opcode_info( uint opcode );
const char *
tgsi_get_opcode_name( uint opcode );
+const char *
+tgsi_get_processor_name( uint processor );
+
#if defined __cplusplus
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index e472947507d..b3123ed016d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -163,6 +163,10 @@ OP12(USGE)
OP12(USHR)
OP12(USLT)
OP12(USNE)
+OP01(SWITCH)
+OP01(CASE)
+OP00(DEFAULT)
+OP00(ENDSWITCH)
#undef OP00
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 7e19e1fe36f..1891203abe1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -117,6 +117,17 @@ tgsi_parse_token(
next_token( ctx, &decl->Semantic );
}
+ if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) {
+ unsigned i, j;
+ decl->ImmediateData.u = (union tgsi_immediate_data*)
+ &ctx->Tokens[ctx->Position];
+ for (i = 0; i <= decl->Range.Last; ++i) {
+ for (j = 0; j < 4; ++j) {
+ ctx->Position++;
+ }
+ }
+ }
+
break;
}
@@ -181,11 +192,6 @@ tgsi_parse_token(
next_token( ctx, &inst->Dst[i].Register );
- /*
- * No support for indirect or multi-dimensional addressing.
- */
- assert( !inst->Dst[i].Register.Dimension );
-
if( inst->Dst[i].Register.Indirect ) {
next_token( ctx, &inst->Dst[i].Indirect );
@@ -195,6 +201,24 @@ tgsi_parse_token(
assert( !inst->Dst[i].Indirect.Dimension );
assert( !inst->Dst[i].Indirect.Indirect );
}
+ if( inst->Dst[i].Register.Dimension ) {
+ next_token( ctx, &inst->Dst[i].Dimension );
+
+ /*
+ * No support for multi-dimensional addressing.
+ */
+ assert( !inst->Dst[i].Dimension.Dimension );
+
+ if( inst->Dst[i].Dimension.Indirect ) {
+ next_token( ctx, &inst->Dst[i].DimIndirect );
+
+ /*
+ * No support for indirect or multi-dimensional addressing.
+ */
+ assert( !inst->Dst[i].Indirect.Indirect );
+ assert( !inst->Dst[i].Indirect.Dimension );
+ }
+ }
}
assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
@@ -258,17 +282,6 @@ tgsi_parse_token(
}
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens)
-{
- struct tgsi_parse_context ctx;
- if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) {
- unsigned len = (ctx.FullHeader.Header.HeaderSize +
- ctx.FullHeader.Header.BodySize);
- return len;
- }
- return 0;
-}
/**
@@ -295,3 +308,19 @@ tgsi_alloc_tokens(unsigned num_tokens)
unsigned bytes = num_tokens * sizeof(struct tgsi_token);
return (struct tgsi_token *) MALLOC(bytes);
}
+
+
+void
+tgsi_dump_tokens(const struct tgsi_token *tokens)
+{
+ const unsigned *dwords = (const unsigned *)tokens;
+ int nr = tgsi_num_tokens(tokens);
+ int i;
+
+ assert(sizeof(*tokens) == sizeof(unsigned));
+
+ debug_printf("const unsigned tokens[%d] = {\n", nr);
+ for (i = 0; i < nr; i++)
+ debug_printf("0x%08x,\n", dwords[i]);
+ debug_printf("};\n");
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index b45ccee2f63..d4df5851764 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -28,6 +28,7 @@
#ifndef TGSI_PARSE_H
#define TGSI_PARSE_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
@@ -44,6 +45,8 @@ struct tgsi_full_dst_register
{
struct tgsi_dst_register Register;
struct tgsi_src_register Indirect;
+ struct tgsi_dimension Dimension;
+ struct tgsi_src_register DimIndirect;
};
struct tgsi_full_src_register
@@ -54,12 +57,18 @@ struct tgsi_full_src_register
struct tgsi_src_register DimIndirect;
};
+struct tgsi_immediate_array_data
+{
+ union tgsi_immediate_data *u;
+};
+
struct tgsi_full_declaration
{
struct tgsi_declaration Declaration;
struct tgsi_declaration_range Range;
struct tgsi_declaration_dimension Dim;
struct tgsi_declaration_semantic Semantic;
+ struct tgsi_immediate_array_data ImmediateData;
};
struct tgsi_full_immediate
@@ -124,8 +133,15 @@ void
tgsi_parse_token(
struct tgsi_parse_context *ctx );
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens);
+static INLINE unsigned
+tgsi_num_tokens(const struct tgsi_token *tokens)
+{
+ struct tgsi_header header = *(const struct tgsi_header *) tokens;
+ return header.HeaderSize + header.BodySize;
+}
+
+void
+tgsi_dump_tokens(const struct tgsi_token *tokens);
struct tgsi_token *
tgsi_dup_tokens(const struct tgsi_token *tokens);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index ad553c71a57..3521847b619 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -1366,4 +1366,12 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
return ok;
}
+#else
+
+void ppc_dummy_func(void);
+
+void ppc_dummy_func(void)
+{
+}
+
#endif /* PIPE_ARCH_PPC */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 76b7564cc36..acbff103efe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -33,6 +33,10 @@
#include "tgsi_info.h"
#include "tgsi_iterate.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE)
+
+
typedef struct {
uint file : 28;
/* max 2 dimensions */
@@ -54,6 +58,8 @@ struct sanity_check_ctx
uint errors;
uint warnings;
uint implied_array_size;
+
+ boolean print;
};
static INLINE unsigned
@@ -90,9 +96,18 @@ static void
scan_register_dst(scan_register *reg,
struct tgsi_full_dst_register *dst)
{
- fill_scan_register1d(reg,
- dst->Register.File,
- dst->Register.Index);
+ if (dst->Register.Dimension) {
+ /*FIXME: right now we don't support indirect
+ * multidimensional addressing */
+ fill_scan_register2d(reg,
+ dst->Register.File,
+ dst->Register.Index,
+ dst->Dimension.Index);
+ } else {
+ fill_scan_register1d(reg,
+ dst->Register.File,
+ dst->Register.Index);
+ }
}
static void
@@ -102,7 +117,6 @@ scan_register_src(scan_register *reg,
if (src->Register.Dimension) {
/*FIXME: right now we don't support indirect
* multidimensional addressing */
- debug_assert(!src->Dimension.Indirect);
fill_scan_register2d(reg,
src->Register.File,
src->Register.Index,
@@ -140,6 +154,9 @@ report_error(
{
va_list args;
+ if (!ctx->print)
+ return;
+
debug_printf( "Error : " );
va_start( args, format );
_debug_vprintf( format, args );
@@ -156,6 +173,9 @@ report_warning(
{
va_list args;
+ if (!ctx->print)
+ return;
+
debug_printf( "Warning: " );
va_start( args, format );
_debug_vprintf( format, args );
@@ -235,8 +255,10 @@ static const char *file_names[TGSI_FILE_COUNT] =
"SAMP",
"ADDR",
"IMM",
- "LOOP",
- "PRED"
+ "PRED",
+ "SV",
+ "IMMX",
+ "TEMPX"
};
static boolean
@@ -529,6 +551,7 @@ tgsi_sanity_check(
ctx.errors = 0;
ctx.warnings = 0;
ctx.implied_array_size = 0;
+ ctx.print = debug_get_option_print_sanity();
if (!tgsi_iterate_shader( tokens, &ctx.iter ))
return FALSE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
index 52263ff8832..73f0f414e3f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
@@ -35,7 +35,8 @@ extern "C" {
#endif
/* Check the given token stream for errors and common mistakes.
- * Diagnostic messages are printed out to the debug output.
+ * Diagnostic messages are printed out to the debug output, and is
+ * controlled by the debug option TGSI_PRINT_SANITY (default false).
* Returns TRUE if there are no errors, even though there could be some warnings.
*/
boolean
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 232fc537c1d..90198a4f604 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -36,6 +36,7 @@
#include "util/u_math.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_scan.h"
@@ -84,31 +85,46 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
{
const struct tgsi_full_instruction *fullinst
= &parse.FullToken.FullInstruction;
+ uint i;
assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
info->opcode_count[fullinst->Instruction.Opcode]++;
- /* special case: scan fragment shaders for use of the fog
- * input/attribute. The X component is fog, the Y component
- * is the front/back-face flag.
- */
- if (procType == TGSI_PROCESSOR_FRAGMENT) {
- uint i;
- for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *src =
- &fullinst->Src[i];
- if (src->Register.File == TGSI_FILE_INPUT ||
- src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
- const int ind = src->Register.Index;
- if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) {
- info->uses_fogcoord = TRUE;
- }
- else if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FACE) {
- info->uses_frontfacing = TRUE;
+ for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *src =
+ &fullinst->Src[i];
+ int ind = src->Register.Index;
+
+ /* Mark which inputs are effectively used */
+ if (src->Register.File == TGSI_FILE_INPUT) {
+ unsigned usage_mask;
+ usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
+ if (src->Register.Indirect) {
+ for (ind = 0; ind < info->num_inputs; ++ind) {
+ info->input_usage_mask[ind] |= usage_mask;
}
+ } else {
+ assert(ind >= 0);
+ assert(ind < PIPE_MAX_SHADER_INPUTS);
+ info->input_usage_mask[ind] |= usage_mask;
}
}
+
+ /* check for indirect register reads */
+ if (src->Register.Indirect) {
+ info->indirect_files |= (1 << src->Register.File);
+ }
}
+
+ /* check for indirect register writes */
+ for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
+ if (dst->Register.Indirect) {
+ info->indirect_files |= (1 << dst->Register.File);
+ }
+ }
+
+ info->num_instructions++;
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 741aa7d5c42..f8aa90cf065 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -45,6 +45,7 @@ struct tgsi_shader_info
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
+ ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
@@ -54,14 +55,19 @@ struct tgsi_shader_info
int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */
uint immediate_count; /**< number of immediates declared */
+ uint num_instructions;
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
boolean writes_z; /**< does fragment shader write Z value? */
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KIL or KILP instruction used? */
- boolean uses_fogcoord; /**< fragment shader uses fog coord? */
- boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */
+
+ /**
+ * Bitmask indicating which register files are accessed with
+ * indirect addressing. The bits are (1 << TGSI_FILE_x), etc.
+ */
+ unsigned indirect_files;
struct {
unsigned name;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 1071298b497..086d983a73a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1244,16 +1244,9 @@ emit_sub(
make_xmm( xmm_src ) );
}
-
-
-
-
-
-
/**
* Register fetch.
*/
-
static void
emit_fetch(
struct x86_function *func,
@@ -1338,7 +1331,6 @@ emit_fetch(
/**
* Register store.
*/
-
static void
emit_store(
struct x86_function *func,
@@ -1455,7 +1447,6 @@ fetch_texel( struct tgsi_sampler **sampler,
/**
* High-level instruction translators.
*/
-
static void
emit_tex( struct x86_function *func,
const struct tgsi_full_instruction *inst,
@@ -1507,7 +1498,6 @@ emit_tex( struct x86_function *func,
get_temp( TEMP_R0, 3 ),
make_xmm( 3 ) );
-
if (projected) {
FETCH( func, *inst, 3, 0, 3 );
@@ -1535,7 +1525,6 @@ emit_tex( struct x86_function *func,
args[0] = get_temp( TEMP_R0, 0 );
args[1] = get_sampler_ptr( unit );
-
emit_func_call( func,
0,
args,
@@ -1569,7 +1558,8 @@ emit_kil(
/* This mask stores component bits that were already tested. Note that
* we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
+ * tested.
+ */
uniquemask = 0;
FOR_EACH_CHANNEL( chan_index ) {
@@ -1715,22 +1705,26 @@ emit_cmp(
/**
- * Check if inst src/dest regs use indirect addressing into temporary
- * register file.
+ * Check if inst src/dest regs use indirect addressing into temporary,
+ * input or output register files.
*/
static boolean
-indirect_temp_reference(const struct tgsi_full_instruction *inst)
+indirect_reg_reference(const struct tgsi_full_instruction *inst)
{
uint i;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *reg = &inst->Src[i];
- if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ if ((reg->Register.File == TGSI_FILE_TEMPORARY ||
+ reg->Register.File == TGSI_FILE_INPUT ||
+ reg->Register.File == TGSI_FILE_OUTPUT) &&
reg->Register.Indirect)
return TRUE;
}
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
const struct tgsi_full_dst_register *reg = &inst->Dst[i];
- if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ if ((reg->Register.File == TGSI_FILE_TEMPORARY ||
+ reg->Register.File == TGSI_FILE_INPUT ||
+ reg->Register.File == TGSI_FILE_OUTPUT) &&
reg->Register.Indirect)
return TRUE;
}
@@ -1746,7 +1740,7 @@ emit_instruction(
unsigned chan_index;
/* we can't handle indirect addressing into temp register file yet */
- if (indirect_temp_reference(inst))
+ if (indirect_reg_reference(inst))
return FALSE;
switch (inst->Instruction.Opcode) {
@@ -1929,20 +1923,32 @@ emit_instruction(
break;
case TGSI_OPCODE_MUL:
+ /* do all fetches and adds, storing results in temp regs */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_mul( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
+ int r = chan_index + 1;
+ FETCH( func, *inst, 0, 0, chan_index ); /* load xmm[0] */
+ FETCH( func, *inst, r, 1, chan_index ); /* load xmm[r] */
+ emit_mul( func, r, 0 ); /* xmm[r] = xmm[r] * xmm[0] */
+ }
+ /* do all stores of the temp regs */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ int r = chan_index + 1;
+ STORE( func, *inst, r, 0, chan_index ); /* store xmm[r] */
}
break;
case TGSI_OPCODE_ADD:
+ /* do all fetches and adds, storing results in temp regs */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_add( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
+ int r = chan_index + 1;
+ FETCH( func, *inst, 0, 0, chan_index ); /* load xmm[0] */
+ FETCH( func, *inst, r, 1, chan_index ); /* load xmm[r] */
+ emit_add( func, r, 0 ); /* xmm[r] = xmm[r] + xmm[0] */
+ }
+ /* do all stores of the temp regs */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ int r = chan_index + 1;
+ STORE( func, *inst, r, 0, chan_index ); /* store xmm[r] */
}
break;
@@ -2697,8 +2703,7 @@ static void aos_to_soa( struct x86_function *func,
struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
- int inner_loop;
-
+ int loop_top, loop_exit_fixup;
/* Save EBX */
x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
@@ -2711,8 +2716,11 @@ static void aos_to_soa( struct x86_function *func,
x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
x86_mov( func, stride, x86_fn_arg( func, arg_stride ) );
- /* do */
- inner_loop = x86_get_label( func );
+ /* while (num_inputs != 0) */
+ loop_top = x86_get_label( func );
+ x86_cmp_imm( func, num_inputs, 0 );
+ loop_exit_fixup = x86_jcc_forward( func, cc_E );
+
{
x86_push( func, aos_input );
sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
@@ -2744,9 +2752,10 @@ static void aos_to_soa( struct x86_function *func,
x86_lea( func, aos_input, x86_make_disp(aos_input, 16) );
x86_lea( func, soa_input, x86_make_disp(soa_input, 64) );
}
- /* while --num_inputs */
+ /* --num_inputs */
x86_dec( func, num_inputs );
- x86_jcc( func, cc_NE, inner_loop );
+ x86_jmp( func, loop_top );
+ x86_fixup_fwd_jump( func, loop_exit_fixup );
/* Restore EBX */
x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
@@ -2816,6 +2825,61 @@ static void soa_to_aos( struct x86_function *func,
x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
}
+
+/**
+ * Check if the instructions dst register is the same as any src
+ * register and warn if there's a posible SOA dependency.
+ */
+static boolean
+check_soa_dependencies(const struct tgsi_full_instruction *inst)
+{
+ uint opcode = inst->Instruction.Opcode;
+
+ /* XXX: we only handle src/dst aliasing in a few opcodes currently.
+ * Need to use an additional temporay to hold the result in the
+ * cases where the code is too opaque to fix.
+ */
+
+ switch (opcode) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LOG:
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DP2A:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_XPD:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_NRM:
+ case TGSI_OPCODE_NRM4:
+ case TGSI_OPCODE_DP2:
+ /* OK - these opcodes correctly handle SOA dependencies */
+ return TRUE;
+ default:
+ if (!tgsi_check_soa_dependencies(inst))
+ return TRUE;
+
+ debug_printf("Warning: src/dst aliasing in instruction"
+ " is not handled:\n");
+ debug_printf("Warning: ");
+ tgsi_dump_instruction(inst, 1);
+
+ return FALSE;
+ }
+}
+
+
/**
* Translate a TGSI vertex/fragment shader to SSE2 code.
* Slightly different things are done for vertex vs. fragment shaders.
@@ -2885,7 +2949,6 @@ tgsi_emit_sse2(
x86_make_disp( get_machine_base(),
Offset( struct tgsi_exec_machine, Samplers ) ) );
-
while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
tgsi_parse_token( &parse );
@@ -2905,27 +2968,15 @@ tgsi_emit_sse2(
if (!ok) {
uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+ uint proc = parse.FullHeader.Processor.Processor;
debug_printf("failed to translate tgsi opcode %d (%s) to SSE (%s)\n",
opcode,
tgsi_get_opcode_name(opcode),
- parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
- "vertex shader" : "fragment shader");
+ tgsi_get_processor_name(proc));
}
- if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
- uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
-
- /* XXX: we only handle src/dst aliasing in a few opcodes
- * currently. Need to use an additional temporay to hold
- * the result in the cases where the code is too opaque to
- * fix.
- */
- if (opcode != TGSI_OPCODE_MOV) {
- debug_printf("Warning: src/dst aliasing in instruction"
- " is not handled:\n");
- tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1);
- }
- }
+ if (ok)
+ ok = check_soa_dependencies(&parse.FullToken.FullInstruction);
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
@@ -2982,4 +3033,3 @@ tgsi_emit_sse2(
}
#endif /* PIPE_ARCH_X86 */
-
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
index d81ee3d00ec..00aa8b84fe9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
@@ -32,9 +32,12 @@
extern "C" {
#endif
+#include "pipe/p_compiler.h"
+
+struct tgsi_exec_machine;
+struct tgsi_interp_coef;
struct tgsi_token;
struct x86_function;
-struct tgsi_interp_coef;
unsigned
tgsi_emit_sse2(
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 0b468a9184e..b01d2ff4689 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -58,7 +58,7 @@ static boolean is_digit_alpha_underscore( const char *cur )
static char uprcase( char c )
{
if (c >= 'a' && c <= 'z')
- return c += 'A' - 'a';
+ return c + 'A' - 'a';
return c;
}
@@ -138,6 +138,7 @@ static boolean parse_identifier( const char **pcur, char *ret )
ret[i++] = *cur++;
while (is_alpha_underscore( cur ))
ret[i++] = *cur++;
+ ret[i++] = '\0';
*pcur = cur;
return TRUE;
}
@@ -278,9 +279,10 @@ static const char *file_names[TGSI_FILE_COUNT] =
"SAMP",
"ADDR",
"IMM",
- "LOOP",
"PRED",
- "SV"
+ "SV",
+ "IMMX",
+ "TEMPX"
};
static boolean
@@ -345,12 +347,68 @@ parse_opt_writemask(
return TRUE;
}
+
+/* <register_file_bracket> ::= <file> `['
+ */
+static boolean
+parse_register_file_bracket(
+ struct translate_ctx *ctx,
+ uint *file )
+{
+ if (!parse_file( &ctx->cur, file )) {
+ report_error( ctx, "Unknown register file" );
+ return FALSE;
+ }
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != '[') {
+ report_error( ctx, "Expected `['" );
+ return FALSE;
+ }
+ ctx->cur++;
+ return TRUE;
+}
+
+/* <register_file_bracket_index> ::= <register_file_bracket> <uint>
+ */
+static boolean
+parse_register_file_bracket_index(
+ struct translate_ctx *ctx,
+ uint *file,
+ int *index )
+{
+ uint uindex;
+
+ if (!parse_register_file_bracket( ctx, file ))
+ return FALSE;
+ eat_opt_white( &ctx->cur );
+ if (!parse_uint( &ctx->cur, &uindex )) {
+ report_error( ctx, "Expected literal unsigned integer" );
+ return FALSE;
+ }
+ *index = (int) uindex;
+ return TRUE;
+}
+
+/* Parse simple 1d register operand.
+ * <register_dst> ::= <register_file_bracket_index> `]'
+ */
static boolean
-parse_register_dst( struct translate_ctx *ctx,
- uint *file,
- int *index );
+parse_register_1d(struct translate_ctx *ctx,
+ uint *file,
+ int *index )
+{
+ if (!parse_register_file_bracket_index( ctx, file, index ))
+ return FALSE;
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != ']') {
+ report_error( ctx, "Expected `]'" );
+ return FALSE;
+ }
+ ctx->cur++;
+ return TRUE;
+}
-struct parsed_src_bracket {
+struct parsed_bracket {
int index;
uint ind_file;
@@ -360,21 +418,21 @@ struct parsed_src_bracket {
static boolean
-parse_register_src_bracket(
+parse_register_bracket(
struct translate_ctx *ctx,
- struct parsed_src_bracket *brackets)
+ struct parsed_bracket *brackets)
{
const char *cur;
uint uindex;
- memset(brackets, 0, sizeof(struct parsed_src_bracket));
+ memset(brackets, 0, sizeof(struct parsed_bracket));
eat_opt_white( &ctx->cur );
cur = ctx->cur;
if (parse_file( &cur, &brackets->ind_file )) {
- if (!parse_register_dst( ctx, &brackets->ind_file,
- &brackets->ind_index ))
+ if (!parse_register_1d( ctx, &brackets->ind_file,
+ &brackets->ind_index ))
return FALSE;
eat_opt_white( &ctx->cur );
@@ -443,7 +501,7 @@ parse_register_src_bracket(
static boolean
parse_opt_register_src_bracket(
struct translate_ctx *ctx,
- struct parsed_src_bracket *brackets,
+ struct parsed_bracket *brackets,
int *parsed_brackets)
{
const char *cur = ctx->cur;
@@ -455,7 +513,7 @@ parse_opt_register_src_bracket(
++cur;
ctx->cur = cur;
- if (!parse_register_src_bracket(ctx, brackets))
+ if (!parse_register_bracket(ctx, brackets))
return FALSE;
*parsed_brackets = 1;
@@ -464,46 +522,6 @@ parse_opt_register_src_bracket(
return TRUE;
}
-/* <register_file_bracket> ::= <file> `['
- */
-static boolean
-parse_register_file_bracket(
- struct translate_ctx *ctx,
- uint *file )
-{
- if (!parse_file( &ctx->cur, file )) {
- report_error( ctx, "Unknown register file" );
- return FALSE;
- }
- eat_opt_white( &ctx->cur );
- if (*ctx->cur != '[') {
- report_error( ctx, "Expected `['" );
- return FALSE;
- }
- ctx->cur++;
- return TRUE;
-}
-
-/* <register_file_bracket_index> ::= <register_file_bracket> <uint>
- */
-static boolean
-parse_register_file_bracket_index(
- struct translate_ctx *ctx,
- uint *file,
- int *index )
-{
- uint uindex;
-
- if (!parse_register_file_bracket( ctx, file ))
- return FALSE;
- eat_opt_white( &ctx->cur );
- if (!parse_uint( &ctx->cur, &uindex )) {
- report_error( ctx, "Expected literal unsigned integer" );
- return FALSE;
- }
- *index = (int) uindex;
- return TRUE;
-}
/* Parse source register operand.
* <register_src> ::= <register_file_bracket_index> `]' |
@@ -515,13 +533,12 @@ static boolean
parse_register_src(
struct translate_ctx *ctx,
uint *file,
- struct parsed_src_bracket *brackets)
+ struct parsed_bracket *brackets)
{
-
brackets->ind_comp = TGSI_SWIZZLE_X;
if (!parse_register_file_bracket( ctx, file ))
return FALSE;
- if (!parse_register_src_bracket( ctx, brackets ))
+ if (!parse_register_bracket( ctx, brackets ))
return FALSE;
return TRUE;
@@ -629,23 +646,19 @@ parse_register_dcl(
}
-/* Parse destination register operand.
- * <register_dst> ::= <register_file_bracket_index> `]'
- */
+/* Parse destination register operand.*/
static boolean
parse_register_dst(
struct translate_ctx *ctx,
uint *file,
- int *index )
+ struct parsed_bracket *brackets)
{
- if (!parse_register_file_bracket_index( ctx, file, index ))
- return FALSE;
- eat_opt_white( &ctx->cur );
- if (*ctx->cur != ']') {
- report_error( ctx, "Expected `]'" );
+ brackets->ind_comp = TGSI_SWIZZLE_X;
+ if (!parse_register_file_bracket( ctx, file ))
return FALSE;
- }
- ctx->cur++;
+ if (!parse_register_bracket( ctx, brackets ))
+ return FALSE;
+
return TRUE;
}
@@ -655,11 +668,14 @@ parse_dst_operand(
struct tgsi_full_dst_register *dst )
{
uint file;
- int index;
uint writemask;
const char *cur;
+ struct parsed_bracket bracket[2];
+ int parsed_opt_brackets;
- if (!parse_register_dst( ctx, &file, &index ))
+ if (!parse_register_dst( ctx, &file, &bracket[0] ))
+ return FALSE;
+ if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets))
return FALSE;
cur = ctx->cur;
@@ -669,8 +685,24 @@ parse_dst_operand(
return FALSE;
dst->Register.File = file;
- dst->Register.Index = index;
+ if (parsed_opt_brackets) {
+ dst->Register.Dimension = 1;
+ dst->Dimension.Indirect = 0;
+ dst->Dimension.Dimension = 0;
+ dst->Dimension.Index = bracket[0].index;
+ bracket[0] = bracket[1];
+ }
+ dst->Register.Index = bracket[0].index;
dst->Register.WriteMask = writemask;
+ if (bracket[0].ind_file != TGSI_FILE_NULL) {
+ dst->Register.Indirect = 1;
+ dst->Indirect.File = bracket[0].ind_file;
+ dst->Indirect.Index = bracket[0].ind_index;
+ dst->Indirect.SwizzleX = bracket[0].ind_comp;
+ dst->Indirect.SwizzleY = bracket[0].ind_comp;
+ dst->Indirect.SwizzleZ = bracket[0].ind_comp;
+ dst->Indirect.SwizzleW = bracket[0].ind_comp;
+ }
return TRUE;
}
@@ -700,7 +732,7 @@ parse_optional_swizzle(
else if (uprcase( *cur ) == 'W')
swizzle[i] = TGSI_SWIZZLE_W;
else {
- report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" );
+ report_error( ctx, "Expected register swizzle component `x', `y', `z' or `w'" );
return FALSE;
}
cur++;
@@ -719,7 +751,7 @@ parse_src_operand(
uint file;
uint swizzle[4];
boolean parsed_swizzle;
- struct parsed_src_bracket bracket[2];
+ struct parsed_bracket bracket[2];
int parsed_opt_brackets;
if (*ctx->cur == '-') {
@@ -816,6 +848,45 @@ parse_instruction(
struct tgsi_full_instruction inst;
uint advance;
+ inst = tgsi_default_full_instruction();
+
+ /* Parse predicate.
+ */
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur == '(') {
+ uint file;
+ int index;
+ uint swizzle[4];
+ boolean parsed_swizzle;
+
+ inst.Instruction.Predicate = 1;
+
+ ctx->cur++;
+ if (*ctx->cur == '!') {
+ ctx->cur++;
+ inst.Predicate.Negate = 1;
+ }
+
+ if (!parse_register_1d( ctx, &file, &index ))
+ return FALSE;
+
+ if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) {
+ if (parsed_swizzle) {
+ inst.Predicate.SwizzleX = swizzle[0];
+ inst.Predicate.SwizzleY = swizzle[1];
+ inst.Predicate.SwizzleZ = swizzle[2];
+ inst.Predicate.SwizzleW = swizzle[3];
+ }
+ }
+
+ if (*ctx->cur != ')') {
+ report_error( ctx, "Expected `)'" );
+ return FALSE;
+ }
+
+ ctx->cur++;
+ }
+
/* Parse instruction name.
*/
eat_opt_white( &ctx->cur );
@@ -849,7 +920,6 @@ parse_instruction(
return FALSE;
}
- inst = tgsi_default_full_instruction();
inst.Instruction.Opcode = i;
inst.Instruction.Saturate = saturate;
inst.Instruction.NumDstRegs = info->num_dst;
@@ -947,6 +1017,45 @@ static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
"PERSPECTIVE"
};
+
+/* parses a 4-touple of the form {x, y, z, w}
+ * where x, y, z, w are numbers */
+static boolean parse_immediate_data(struct translate_ctx *ctx,
+ float *values)
+{
+ unsigned i;
+
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != '{') {
+ report_error( ctx, "Expected `{'" );
+ return FALSE;
+ }
+ ctx->cur++;
+ for (i = 0; i < 4; i++) {
+ eat_opt_white( &ctx->cur );
+ if (i > 0) {
+ if (*ctx->cur != ',') {
+ report_error( ctx, "Expected `,'" );
+ return FALSE;
+ }
+ ctx->cur++;
+ eat_opt_white( &ctx->cur );
+ }
+ if (!parse_float( &ctx->cur, &values[i] )) {
+ report_error( ctx, "Expected literal floating point" );
+ return FALSE;
+ }
+ }
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != '}') {
+ report_error( ctx, "Expected `}'" );
+ return FALSE;
+ }
+ ctx->cur++;
+
+ return TRUE;
+}
+
static boolean parse_declaration( struct translate_ctx *ctx )
{
struct tgsi_full_declaration decl;
@@ -956,6 +1065,8 @@ static boolean parse_declaration( struct translate_ctx *ctx )
uint writemask;
const char *cur;
uint advance;
+ boolean is_vs_input;
+ boolean is_imm_array;
assert(Elements(semantic_names) == TGSI_SEMANTIC_COUNT);
assert(Elements(interpolate_names) == TGSI_INTERPOLATE_COUNT);
@@ -984,9 +1095,13 @@ static boolean parse_declaration( struct translate_ctx *ctx )
decl.Dim.Index2D = brackets[0].first;
}
+ is_vs_input = (file == TGSI_FILE_INPUT &&
+ ctx->processor == TGSI_PROCESSOR_VERTEX);
+ is_imm_array = (file == TGSI_FILE_IMMEDIATE_ARRAY);
+
cur = ctx->cur;
eat_opt_white( &cur );
- if (*cur == ',') {
+ if (*cur == ',' && !is_vs_input) {
uint i;
cur++;
@@ -1025,11 +1140,49 @@ static boolean parse_declaration( struct translate_ctx *ctx )
break;
}
}
+ } else if (is_imm_array) {
+ unsigned i;
+ float *vals_itr;
+ /* we have our immediate data */
+ if (*cur != '{') {
+ report_error( ctx, "Immediate array without data" );
+ return FALSE;
+ }
+ ++cur;
+ ctx->cur = cur;
+
+ decl.ImmediateData.u =
+ MALLOC(sizeof(union tgsi_immediate_data) * 4 *
+ (decl.Range.Last + 1));
+ vals_itr = (float*)decl.ImmediateData.u;
+ for (i = 0; i <= decl.Range.Last; ++i) {
+ if (!parse_immediate_data(ctx, vals_itr)) {
+ FREE(decl.ImmediateData.u);
+ return FALSE;
+ }
+ vals_itr += 4;
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != ',') {
+ if (i != decl.Range.Last) {
+ report_error( ctx, "Not enough data in immediate array!" );
+ FREE(decl.ImmediateData.u);
+ return FALSE;
+ }
+ } else
+ ++ctx->cur;
+ }
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != '}') {
+ FREE(decl.ImmediateData.u);
+ report_error( ctx, "Immediate array data missing closing '}'" );
+ return FALSE;
+ }
+ ++ctx->cur;
}
cur = ctx->cur;
eat_opt_white( &cur );
- if (*cur == ',') {
+ if (*cur == ',' && !is_vs_input) {
uint i;
cur++;
@@ -1055,6 +1208,10 @@ static boolean parse_declaration( struct translate_ctx *ctx )
ctx->tokens_cur,
ctx->header,
(uint) (ctx->tokens_end - ctx->tokens_cur) );
+
+ if (is_imm_array)
+ FREE(decl.ImmediateData.u);
+
if (advance == 0)
return FALSE;
ctx->tokens_cur += advance;
@@ -1065,7 +1222,6 @@ static boolean parse_declaration( struct translate_ctx *ctx )
static boolean parse_immediate( struct translate_ctx *ctx )
{
struct tgsi_full_immediate imm;
- uint i;
float values[4];
uint advance;
@@ -1073,37 +1229,13 @@ static boolean parse_immediate( struct translate_ctx *ctx )
report_error( ctx, "Syntax error" );
return FALSE;
}
- if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) {
+ if (!str_match_no_case( &ctx->cur, "FLT32" ) ||
+ is_digit_alpha_underscore( ctx->cur )) {
report_error( ctx, "Expected `FLT32'" );
return FALSE;
}
- eat_opt_white( &ctx->cur );
- if (*ctx->cur != '{') {
- report_error( ctx, "Expected `{'" );
- return FALSE;
- }
- ctx->cur++;
- for (i = 0; i < 4; i++) {
- eat_opt_white( &ctx->cur );
- if (i > 0) {
- if (*ctx->cur != ',') {
- report_error( ctx, "Expected `,'" );
- return FALSE;
- }
- ctx->cur++;
- eat_opt_white( &ctx->cur );
- }
- if (!parse_float( &ctx->cur, &values[i] )) {
- report_error( ctx, "Expected literal floating point" );
- return FALSE;
- }
- }
- eat_opt_white( &ctx->cur );
- if (*ctx->cur != '}') {
- report_error( ctx, "Expected `}'" );
- return FALSE;
- }
- ctx->cur++;
+
+ parse_immediate_data(ctx, values);
imm = tgsi_default_full_immediate();
imm.Immediate.NrTokens += 4;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index f725405ade1..7d13a17bdbc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -74,7 +74,6 @@ struct ureg_tokens {
#define UREG_MAX_IMMEDIATE 32
#define UREG_MAX_TEMP 256
#define UREG_MAX_ADDR 2
-#define UREG_MAX_LOOP 1
#define UREG_MAX_PRED 1
struct const_decl {
@@ -97,7 +96,8 @@ struct ureg_program
unsigned semantic_name;
unsigned semantic_index;
unsigned interp;
- unsigned cylindrical_wrap;
+ unsigned char cylindrical_wrap;
+ unsigned char centroid;
} fs_input[UREG_MAX_INPUT];
unsigned nr_fs_inputs;
@@ -151,7 +151,6 @@ struct ureg_program
unsigned nr_addrs;
unsigned nr_preds;
- unsigned nr_loops;
unsigned nr_instructions;
struct ureg_tokens domain[2];
@@ -288,11 +287,12 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
struct ureg_src
-ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
+ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
unsigned interp_mode,
- unsigned cylindrical_wrap)
+ unsigned cylindrical_wrap,
+ unsigned centroid)
{
unsigned i;
@@ -308,6 +308,7 @@ ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
ureg->fs_input[i].semantic_index = semantic_index;
ureg->fs_input[i].interp = interp_mode;
ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap;
+ ureg->fs_input[i].centroid = centroid;
ureg->nr_fs_inputs++;
} else {
set_bad(ureg);
@@ -537,19 +538,6 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
}
-/* Allocate a new loop register.
- */
-struct ureg_dst
-ureg_DECL_loop(struct ureg_program *ureg)
-{
- if (ureg->nr_loops < UREG_MAX_LOOP) {
- return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++);
- }
-
- assert(0);
- return ureg_dst_register(TGSI_FILE_LOOP, 0);
-}
-
/* Allocate a new predicate register.
*/
struct ureg_dst
@@ -747,11 +735,12 @@ ureg_DECL_immediate_int( struct ureg_program *ureg,
}
-void
+void
ureg_emit_src( struct ureg_program *ureg,
struct ureg_src src )
{
- unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0);
+ unsigned size = 1 + (src.Indirect ? 1 : 0) +
+ (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0);
union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
unsigned n = 0;
@@ -784,11 +773,27 @@ ureg_emit_src( struct ureg_program *ureg,
}
if (src.Dimension) {
- out[0].src.Dimension = 1;
- out[n].dim.Indirect = 0;
- out[n].dim.Dimension = 0;
- out[n].dim.Padding = 0;
- out[n].dim.Index = src.DimensionIndex;
+ if (src.DimIndirect) {
+ out[0].src.Dimension = 1;
+ out[n].dim.Indirect = 1;
+ out[n].dim.Dimension = 0;
+ out[n].dim.Padding = 0;
+ out[n].dim.Index = src.DimensionIndex;
+ n++;
+ out[n].value = 0;
+ out[n].src.File = src.DimIndFile;
+ out[n].src.SwizzleX = src.DimIndSwizzle;
+ out[n].src.SwizzleY = src.DimIndSwizzle;
+ out[n].src.SwizzleZ = src.DimIndSwizzle;
+ out[n].src.SwizzleW = src.DimIndSwizzle;
+ out[n].src.Index = src.DimIndIndex;
+ } else {
+ out[0].src.Dimension = 1;
+ out[n].dim.Indirect = 0;
+ out[n].dim.Dimension = 0;
+ out[n].dim.Padding = 0;
+ out[n].dim.Index = src.DimensionIndex;
+ }
n++;
}
@@ -1124,7 +1129,8 @@ emit_decl_fs(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
unsigned interpolate,
- unsigned cylindrical_wrap)
+ unsigned cylindrical_wrap,
+ unsigned centroid)
{
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
@@ -1136,6 +1142,7 @@ emit_decl_fs(struct ureg_program *ureg,
out[0].decl.Interpolate = interpolate;
out[0].decl.Semantic = 1;
out[0].decl.CylindricalWrap = cylindrical_wrap;
+ out[0].decl.Centroid = centroid;
out[1].value = 0;
out[1].decl_range.First = index;
@@ -1251,7 +1258,7 @@ static void emit_decls( struct ureg_program *ureg )
assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
emit_property(ureg,
- TGSI_PROPERTY_GS_MAX_VERTICES,
+ TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
ureg->property_gs_max_vertices);
}
@@ -1285,7 +1292,8 @@ static void emit_decls( struct ureg_program *ureg )
ureg->fs_input[i].semantic_name,
ureg->fs_input[i].semantic_index,
ureg->fs_input[i].interp,
- ureg->fs_input[i].cylindrical_wrap);
+ ureg->fs_input[i].cylindrical_wrap,
+ ureg->fs_input[i].centroid);
}
} else {
for (i = 0; i < ureg->nr_gs_inputs; i++) {
@@ -1356,13 +1364,6 @@ static void emit_decls( struct ureg_program *ureg )
0, ureg->nr_addrs );
}
- if (ureg->nr_loops) {
- emit_decl_range(ureg,
- TGSI_FILE_LOOP,
- 0,
- ureg->nr_loops);
- }
-
if (ureg->nr_preds) {
emit_decl_range(ureg,
TGSI_FILE_PREDICATE,
@@ -1489,6 +1490,12 @@ const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
}
+void ureg_free_tokens( const struct tgsi_token *tokens )
+{
+ FREE((struct tgsi_token *)tokens);
+}
+
+
struct ureg_program *ureg_create( unsigned processor )
{
struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 0130a77aadb..acc463200a6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -49,14 +49,18 @@ struct ureg_src
unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */
unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */
unsigned Indirect : 1; /* BOOL */
+ unsigned DimIndirect : 1; /* BOOL */
unsigned Dimension : 1; /* BOOL */
unsigned Absolute : 1; /* BOOL */
unsigned Negate : 1; /* BOOL */
int Index : 16; /* SINT */
- unsigned IndirectFile : 4; /* TGSI_FILE_ */
- int IndirectIndex : 16; /* SINT */
- unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */
- int DimensionIndex : 16; /* SINT */
+ unsigned IndirectFile : 4; /* TGSI_FILE_ */
+ int IndirectIndex : 16; /* SINT */
+ unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */
+ int DimensionIndex : 16; /* SINT */
+ unsigned DimIndFile : 4; /* TGSI_FILE_ */
+ int DimIndIndex : 16; /* SINT */
+ unsigned DimIndSwizzle : 2; /* TGSI_SWIZZLE_ */
};
/* Very similar to a tgsi_dst_register, removing unsupported fields
@@ -104,6 +108,10 @@ ureg_get_tokens( struct ureg_program *ureg,
unsigned *nr_tokens );
+/* Free the tokens created by ureg_get_tokens() */
+void ureg_free_tokens( const struct tgsi_token *tokens );
+
+
void
ureg_destroy( struct ureg_program * );
@@ -150,11 +158,27 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
*/
struct ureg_src
-ureg_DECL_fs_input_cyl(struct ureg_program *,
+ureg_DECL_fs_input_cyl_centroid(struct ureg_program *,
unsigned semantic_name,
unsigned semantic_index,
unsigned interp_mode,
- unsigned cylindrical_wrap);
+ unsigned cylindrical_wrap,
+ unsigned centroid);
+
+static INLINE struct ureg_src
+ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned interp_mode,
+ unsigned cylindrical_wrap)
+{
+ return ureg_DECL_fs_input_cyl_centroid(ureg,
+ semantic_name,
+ semantic_index,
+ interp_mode,
+ cylindrical_wrap,
+ 0);
+}
static INLINE struct ureg_src
ureg_DECL_fs_input(struct ureg_program *ureg,
@@ -162,11 +186,11 @@ ureg_DECL_fs_input(struct ureg_program *ureg,
unsigned semantic_index,
unsigned interp_mode)
{
- return ureg_DECL_fs_input_cyl(ureg,
+ return ureg_DECL_fs_input_cyl_centroid(ureg,
semantic_name,
semantic_index,
interp_mode,
- 0);
+ 0, 0);
}
struct ureg_src
@@ -231,9 +255,6 @@ struct ureg_dst
ureg_DECL_address( struct ureg_program * );
struct ureg_dst
-ureg_DECL_loop( struct ureg_program * );
-
-struct ureg_dst
ureg_DECL_predicate(struct ureg_program *);
/* Supply an index to the sampler declaration as this is the hook to
@@ -820,15 +841,31 @@ ureg_src_indirect( struct ureg_src reg, struct ureg_src addr )
return reg;
}
-static INLINE struct ureg_src
+static INLINE struct ureg_src
ureg_src_dimension( struct ureg_src reg, int index )
{
assert(reg.File != TGSI_FILE_NULL);
reg.Dimension = 1;
+ reg.DimIndirect = 0;
reg.DimensionIndex = index;
return reg;
}
+
+static INLINE struct ureg_src
+ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr,
+ int index )
+{
+ assert(reg.File != TGSI_FILE_NULL);
+ reg.Dimension = 1;
+ reg.DimIndirect = 1;
+ reg.DimensionIndex = index;
+ reg.DimIndFile = addr.File;
+ reg.DimIndIndex = addr.Index;
+ reg.DimIndSwizzle = addr.SwizzleX;
+ return reg;
+}
+
static INLINE struct ureg_dst
ureg_dst( struct ureg_src src )
{
@@ -873,6 +910,10 @@ ureg_src_register(unsigned file,
src.Negate = 0;
src.Dimension = 0;
src.DimensionIndex = 0;
+ src.DimIndirect = 0;
+ src.DimIndFile = TGSI_FILE_NULL;
+ src.DimIndIndex = 0;
+ src.DimIndSwizzle = 0;
return src;
}
@@ -896,6 +937,10 @@ ureg_src( struct ureg_dst dst )
src.Negate = 0;
src.Dimension = 0;
src.DimensionIndex = 0;
+ src.DimIndirect = 0;
+ src.DimIndFile = TGSI_FILE_NULL;
+ src.DimIndIndex = 0;
+ src.DimIndSwizzle = 0;
return src;
}
@@ -943,7 +988,11 @@ ureg_src_undef( void )
src.Negate = 0;
src.Dimension = 0;
src.DimensionIndex = 0;
-
+ src.DimIndirect = 0;
+ src.DimIndFile = TGSI_FILE_NULL;
+ src.DimIndIndex = 0;
+ src.DimIndSwizzle = 0;
+
return src;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 0a7e4105a80..08e7e89bd67 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -163,3 +163,150 @@ tgsi_util_set_full_src_register_sign_mode(
assert( 0 );
}
}
+
+/**
+ * Determine which channels of the specificed src register are effectively
+ * used by this instruction.
+ */
+unsigned
+tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
+ unsigned src_idx)
+{
+ const struct tgsi_full_src_register *src = &inst->Src[src_idx];
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ unsigned read_mask;
+ unsigned usage_mask;
+ unsigned chan;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_ARL:
+ case TGSI_OPCODE_ARR:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_DIV:
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_LRP:
+ case TGSI_OPCODE_CND:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_CEIL:
+ case TGSI_OPCODE_CLAMP:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_ROUND:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_SSG:
+ case TGSI_OPCODE_CMP:
+ case TGSI_OPCODE_TRUNC:
+ case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD:
+ /* Channel-wise operations */
+ read_mask = write_mask;
+ break;
+
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_RCC:
+ read_mask = TGSI_WRITEMASK_X;
+ break;
+
+ case TGSI_OPCODE_SCS:
+ read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0;
+ break;
+
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LOG:
+ read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
+ break;
+
+ case TGSI_OPCODE_DP2A:
+ read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY;
+ break;
+
+ case TGSI_OPCODE_DP2:
+ read_mask = TGSI_WRITEMASK_XY;
+ break;
+
+ case TGSI_OPCODE_DP3:
+ read_mask = TGSI_WRITEMASK_XYZ;
+ break;
+
+ case TGSI_OPCODE_DP4:
+ read_mask = TGSI_WRITEMASK_XYZW;
+ break;
+
+ case TGSI_OPCODE_DPH:
+ read_mask = src_idx == 0 ? TGSI_WRITEMASK_XYZ : TGSI_WRITEMASK_XYZW;
+ break;
+
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ if (src_idx == 0) {
+ /* Note that the SHADOW variants use the Z component too */
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_1D:
+ read_mask = TGSI_WRITEMASK_X;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ read_mask = TGSI_WRITEMASK_XZ;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ read_mask = TGSI_WRITEMASK_XY;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ read_mask = TGSI_WRITEMASK_XYZ;
+ break;
+
+ default:
+ assert(0);
+ read_mask = 0;
+ }
+
+ if (inst->Instruction.Opcode != TGSI_OPCODE_TEX) {
+ read_mask |= TGSI_WRITEMASK_W;
+ }
+ } else {
+ /* A safe approximation */
+ read_mask = TGSI_WRITEMASK_XYZW;
+ }
+ break;
+
+ default:
+ /* Assume all channels are read */
+ read_mask = TGSI_WRITEMASK_XYZW;
+ break;
+ }
+
+ usage_mask = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ if (read_mask & (1 << chan)) {
+ usage_mask |= 1 << tgsi_util_get_full_src_register_swizzle(src, chan);
+ }
+ }
+
+ return usage_mask;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h
index 19ee2e7cf2a..04702ba9826 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -34,6 +34,7 @@ extern "C" {
struct tgsi_src_register;
struct tgsi_full_src_register;
+struct tgsi_full_instruction;
void *
tgsi_align_128bit(
@@ -71,6 +72,10 @@ tgsi_util_set_full_src_register_sign_mode(
struct tgsi_full_src_register *reg,
unsigned sign_mode );
+unsigned
+tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
+ unsigned src_idx);
+
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index a9b7253bf44..73287b667db 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -38,7 +38,7 @@ struct translate *translate_create( const struct translate_key *key )
{
struct translate *translate = NULL;
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
translate = translate_sse2_create( key );
if (translate)
return translate;
@@ -48,3 +48,8 @@ struct translate *translate_create( const struct translate_key *key )
return translate_generic_create( key );
}
+
+boolean translate_is_output_format_supported(enum pipe_format format)
+{
+ return translate_generic_is_output_format_supported(format);
+}
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index edd95e07882..a75380228b1 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -85,6 +85,18 @@ struct translate {
unsigned instance_id,
void *output_buffer);
+ void (PIPE_CDECL *run_elts16)( struct translate *,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
+ void (PIPE_CDECL *run_elts8)( struct translate *,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
@@ -105,6 +117,8 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx,
struct translate *translate_create( const struct translate_key *key );
+boolean translate_is_output_format_supported(enum pipe_format format);
+
static INLINE int translate_keysize( const struct translate_key *key )
{
return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element);
@@ -138,5 +152,6 @@ struct translate *translate_sse2_create( const struct translate_key *key );
struct translate *translate_generic_create( const struct translate_key *key );
+boolean translate_generic_is_output_format_supported(enum pipe_format format);
#endif
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index a9272fbb491..ad809db720d 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -31,6 +31,7 @@
*/
#include "util/u_memory.h"
+#include "util/u_format.h"
#include "util/u_math.h"
#include "pipe/p_state.h"
#include "translate.h"
@@ -38,7 +39,9 @@
#define DRAW_DBG 0
-typedef void (*fetch_func)(const void *ptr, float *attrib);
+typedef void (*fetch_func)(float *dst,
+ const uint8_t *src,
+ unsigned i, unsigned j);
typedef void (*emit_func)(const float *attrib, void *ptr);
@@ -57,10 +60,18 @@ struct translate_generic {
emit_func emit;
unsigned output_offset;
- char *input_ptr;
+ const uint8_t *input_ptr;
unsigned input_stride;
unsigned max_index;
+ /* this value is set to -1 if this is a normal element with output_format != input_format:
+ * in this case, u_format is used to do a full conversion
+ *
+ * this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids:
+ * in this case, memcpy is used to copy this amount of bytes
+ */
+ int copy_size;
+
} attrib[PIPE_MAX_ATTRIBS];
unsigned nr_attrib;
@@ -79,22 +90,7 @@ static struct translate_generic *translate_generic( struct translate *translate
* This is probably needed/dupliocated elsewhere, eg format
* conversion, texture sampling etc.
*/
-#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \
-static void \
-fetch_##NAME(const void *ptr, float *attrib) \
-{ \
- const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \
- unsigned i; \
- \
- for (i = 0; i < SZ; i++) { \
- attrib[i] = FROM(i); \
- } \
- \
- for (; i < 4; i++) { \
- attrib[i] = defaults[i]; \
- } \
-} \
- \
+#define ATTRIB( NAME, SZ, TYPE, TO ) \
static void \
emit_##NAME(const float *attrib, void *ptr) \
{ \
@@ -107,27 +103,6 @@ emit_##NAME(const float *attrib, void *ptr) \
}
-#define FROM_64_FLOAT(i) ((float) ((double *) ptr)[i])
-#define FROM_32_FLOAT(i) (((float *) ptr)[i])
-
-#define FROM_8_USCALED(i) ((float) ((unsigned char *) ptr)[i])
-#define FROM_16_USCALED(i) ((float) ((unsigned short *) ptr)[i])
-#define FROM_32_USCALED(i) ((float) ((unsigned int *) ptr)[i])
-
-#define FROM_8_SSCALED(i) ((float) ((char *) ptr)[i])
-#define FROM_16_SSCALED(i) ((float) ((short *) ptr)[i])
-#define FROM_32_SSCALED(i) ((float) ((int *) ptr)[i])
-
-#define FROM_8_UNORM(i) ((float) ((unsigned char *) ptr)[i] / 255.0f)
-#define FROM_16_UNORM(i) ((float) ((unsigned short *) ptr)[i] / 65535.0f)
-#define FROM_32_UNORM(i) ((float) ((unsigned int *) ptr)[i] / 4294967295.0f)
-
-#define FROM_8_SNORM(i) ((float) ((char *) ptr)[i] / 127.0f)
-#define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f)
-#define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f)
-
-#define FROM_32_FIXED(i) (((int *) ptr)[i] / 65536.0f)
-
#define TO_64_FLOAT(x) ((double) x)
#define TO_32_FLOAT(x) (x)
@@ -150,94 +125,84 @@ emit_##NAME(const float *attrib, void *ptr) \
#define TO_32_FIXED(x) ((int) (x * 65536.0f))
-
-ATTRIB( R64G64B64A64_FLOAT, 4, double, FROM_64_FLOAT, TO_64_FLOAT )
-ATTRIB( R64G64B64_FLOAT, 3, double, FROM_64_FLOAT, TO_64_FLOAT )
-ATTRIB( R64G64_FLOAT, 2, double, FROM_64_FLOAT, TO_64_FLOAT )
-ATTRIB( R64_FLOAT, 1, double, FROM_64_FLOAT, TO_64_FLOAT )
-
-ATTRIB( R32G32B32A32_FLOAT, 4, float, FROM_32_FLOAT, TO_32_FLOAT )
-ATTRIB( R32G32B32_FLOAT, 3, float, FROM_32_FLOAT, TO_32_FLOAT )
-ATTRIB( R32G32_FLOAT, 2, float, FROM_32_FLOAT, TO_32_FLOAT )
-ATTRIB( R32_FLOAT, 1, float, FROM_32_FLOAT, TO_32_FLOAT )
-
-ATTRIB( R32G32B32A32_USCALED, 4, unsigned, FROM_32_USCALED, TO_32_USCALED )
-ATTRIB( R32G32B32_USCALED, 3, unsigned, FROM_32_USCALED, TO_32_USCALED )
-ATTRIB( R32G32_USCALED, 2, unsigned, FROM_32_USCALED, TO_32_USCALED )
-ATTRIB( R32_USCALED, 1, unsigned, FROM_32_USCALED, TO_32_USCALED )
-
-ATTRIB( R32G32B32A32_SSCALED, 4, int, FROM_32_SSCALED, TO_32_SSCALED )
-ATTRIB( R32G32B32_SSCALED, 3, int, FROM_32_SSCALED, TO_32_SSCALED )
-ATTRIB( R32G32_SSCALED, 2, int, FROM_32_SSCALED, TO_32_SSCALED )
-ATTRIB( R32_SSCALED, 1, int, FROM_32_SSCALED, TO_32_SSCALED )
-
-ATTRIB( R32G32B32A32_UNORM, 4, unsigned, FROM_32_UNORM, TO_32_UNORM )
-ATTRIB( R32G32B32_UNORM, 3, unsigned, FROM_32_UNORM, TO_32_UNORM )
-ATTRIB( R32G32_UNORM, 2, unsigned, FROM_32_UNORM, TO_32_UNORM )
-ATTRIB( R32_UNORM, 1, unsigned, FROM_32_UNORM, TO_32_UNORM )
-
-ATTRIB( R32G32B32A32_SNORM, 4, int, FROM_32_SNORM, TO_32_SNORM )
-ATTRIB( R32G32B32_SNORM, 3, int, FROM_32_SNORM, TO_32_SNORM )
-ATTRIB( R32G32_SNORM, 2, int, FROM_32_SNORM, TO_32_SNORM )
-ATTRIB( R32_SNORM, 1, int, FROM_32_SNORM, TO_32_SNORM )
-
-ATTRIB( R16G16B16A16_USCALED, 4, ushort, FROM_16_USCALED, TO_16_USCALED )
-ATTRIB( R16G16B16_USCALED, 3, ushort, FROM_16_USCALED, TO_16_USCALED )
-ATTRIB( R16G16_USCALED, 2, ushort, FROM_16_USCALED, TO_16_USCALED )
-ATTRIB( R16_USCALED, 1, ushort, FROM_16_USCALED, TO_16_USCALED )
-
-ATTRIB( R16G16B16A16_SSCALED, 4, short, FROM_16_SSCALED, TO_16_SSCALED )
-ATTRIB( R16G16B16_SSCALED, 3, short, FROM_16_SSCALED, TO_16_SSCALED )
-ATTRIB( R16G16_SSCALED, 2, short, FROM_16_SSCALED, TO_16_SSCALED )
-ATTRIB( R16_SSCALED, 1, short, FROM_16_SSCALED, TO_16_SSCALED )
-
-ATTRIB( R16G16B16A16_UNORM, 4, ushort, FROM_16_UNORM, TO_16_UNORM )
-ATTRIB( R16G16B16_UNORM, 3, ushort, FROM_16_UNORM, TO_16_UNORM )
-ATTRIB( R16G16_UNORM, 2, ushort, FROM_16_UNORM, TO_16_UNORM )
-ATTRIB( R16_UNORM, 1, ushort, FROM_16_UNORM, TO_16_UNORM )
-
-ATTRIB( R16G16B16A16_SNORM, 4, short, FROM_16_SNORM, TO_16_SNORM )
-ATTRIB( R16G16B16_SNORM, 3, short, FROM_16_SNORM, TO_16_SNORM )
-ATTRIB( R16G16_SNORM, 2, short, FROM_16_SNORM, TO_16_SNORM )
-ATTRIB( R16_SNORM, 1, short, FROM_16_SNORM, TO_16_SNORM )
-
-ATTRIB( R8G8B8A8_USCALED, 4, ubyte, FROM_8_USCALED, TO_8_USCALED )
-ATTRIB( R8G8B8_USCALED, 3, ubyte, FROM_8_USCALED, TO_8_USCALED )
-ATTRIB( R8G8_USCALED, 2, ubyte, FROM_8_USCALED, TO_8_USCALED )
-ATTRIB( R8_USCALED, 1, ubyte, FROM_8_USCALED, TO_8_USCALED )
-
-ATTRIB( R8G8B8A8_SSCALED, 4, char, FROM_8_SSCALED, TO_8_SSCALED )
-ATTRIB( R8G8B8_SSCALED, 3, char, FROM_8_SSCALED, TO_8_SSCALED )
-ATTRIB( R8G8_SSCALED, 2, char, FROM_8_SSCALED, TO_8_SSCALED )
-ATTRIB( R8_SSCALED, 1, char, FROM_8_SSCALED, TO_8_SSCALED )
-
-ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )
-ATTRIB( R8G8B8_UNORM, 3, ubyte, FROM_8_UNORM, TO_8_UNORM )
-ATTRIB( R8G8_UNORM, 2, ubyte, FROM_8_UNORM, TO_8_UNORM )
-ATTRIB( R8_UNORM, 1, ubyte, FROM_8_UNORM, TO_8_UNORM )
-
-ATTRIB( R8G8B8A8_SNORM, 4, char, FROM_8_SNORM, TO_8_SNORM )
-ATTRIB( R8G8B8_SNORM, 3, char, FROM_8_SNORM, TO_8_SNORM )
-ATTRIB( R8G8_SNORM, 2, char, FROM_8_SNORM, TO_8_SNORM )
-ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM )
-
-ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )
-/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )*/
-
-ATTRIB( R32G32B32A32_FIXED, 4, int, FROM_32_FIXED, TO_32_FIXED )
-ATTRIB( R32G32B32_FIXED, 3, int, FROM_32_FIXED, TO_32_FIXED )
-ATTRIB( R32G32_FIXED, 2, int, FROM_32_FIXED, TO_32_FIXED )
-ATTRIB( R32_FIXED, 1, int, FROM_32_FIXED, TO_32_FIXED )
-
-
+ATTRIB( R64G64B64A64_FLOAT, 4, double, TO_64_FLOAT )
+ATTRIB( R64G64B64_FLOAT, 3, double, TO_64_FLOAT )
+ATTRIB( R64G64_FLOAT, 2, double, TO_64_FLOAT )
+ATTRIB( R64_FLOAT, 1, double, TO_64_FLOAT )
+
+ATTRIB( R32G32B32A32_FLOAT, 4, float, TO_32_FLOAT )
+ATTRIB( R32G32B32_FLOAT, 3, float, TO_32_FLOAT )
+ATTRIB( R32G32_FLOAT, 2, float, TO_32_FLOAT )
+ATTRIB( R32_FLOAT, 1, float, TO_32_FLOAT )
+
+ATTRIB( R32G32B32A32_USCALED, 4, unsigned, TO_32_USCALED )
+ATTRIB( R32G32B32_USCALED, 3, unsigned, TO_32_USCALED )
+ATTRIB( R32G32_USCALED, 2, unsigned, TO_32_USCALED )
+ATTRIB( R32_USCALED, 1, unsigned, TO_32_USCALED )
+
+ATTRIB( R32G32B32A32_SSCALED, 4, int, TO_32_SSCALED )
+ATTRIB( R32G32B32_SSCALED, 3, int, TO_32_SSCALED )
+ATTRIB( R32G32_SSCALED, 2, int, TO_32_SSCALED )
+ATTRIB( R32_SSCALED, 1, int, TO_32_SSCALED )
+
+ATTRIB( R32G32B32A32_UNORM, 4, unsigned, TO_32_UNORM )
+ATTRIB( R32G32B32_UNORM, 3, unsigned, TO_32_UNORM )
+ATTRIB( R32G32_UNORM, 2, unsigned, TO_32_UNORM )
+ATTRIB( R32_UNORM, 1, unsigned, TO_32_UNORM )
+
+ATTRIB( R32G32B32A32_SNORM, 4, int, TO_32_SNORM )
+ATTRIB( R32G32B32_SNORM, 3, int, TO_32_SNORM )
+ATTRIB( R32G32_SNORM, 2, int, TO_32_SNORM )
+ATTRIB( R32_SNORM, 1, int, TO_32_SNORM )
+
+ATTRIB( R16G16B16A16_USCALED, 4, ushort, TO_16_USCALED )
+ATTRIB( R16G16B16_USCALED, 3, ushort, TO_16_USCALED )
+ATTRIB( R16G16_USCALED, 2, ushort, TO_16_USCALED )
+ATTRIB( R16_USCALED, 1, ushort, TO_16_USCALED )
+
+ATTRIB( R16G16B16A16_SSCALED, 4, short, TO_16_SSCALED )
+ATTRIB( R16G16B16_SSCALED, 3, short, TO_16_SSCALED )
+ATTRIB( R16G16_SSCALED, 2, short, TO_16_SSCALED )
+ATTRIB( R16_SSCALED, 1, short, TO_16_SSCALED )
+
+ATTRIB( R16G16B16A16_UNORM, 4, ushort, TO_16_UNORM )
+ATTRIB( R16G16B16_UNORM, 3, ushort, TO_16_UNORM )
+ATTRIB( R16G16_UNORM, 2, ushort, TO_16_UNORM )
+ATTRIB( R16_UNORM, 1, ushort, TO_16_UNORM )
+
+ATTRIB( R16G16B16A16_SNORM, 4, short, TO_16_SNORM )
+ATTRIB( R16G16B16_SNORM, 3, short, TO_16_SNORM )
+ATTRIB( R16G16_SNORM, 2, short, TO_16_SNORM )
+ATTRIB( R16_SNORM, 1, short, TO_16_SNORM )
+
+ATTRIB( R8G8B8A8_USCALED, 4, ubyte, TO_8_USCALED )
+ATTRIB( R8G8B8_USCALED, 3, ubyte, TO_8_USCALED )
+ATTRIB( R8G8_USCALED, 2, ubyte, TO_8_USCALED )
+ATTRIB( R8_USCALED, 1, ubyte, TO_8_USCALED )
+
+ATTRIB( R8G8B8A8_SSCALED, 4, char, TO_8_SSCALED )
+ATTRIB( R8G8B8_SSCALED, 3, char, TO_8_SSCALED )
+ATTRIB( R8G8_SSCALED, 2, char, TO_8_SSCALED )
+ATTRIB( R8_SSCALED, 1, char, TO_8_SSCALED )
+
+ATTRIB( R8G8B8A8_UNORM, 4, ubyte, TO_8_UNORM )
+ATTRIB( R8G8B8_UNORM, 3, ubyte, TO_8_UNORM )
+ATTRIB( R8G8_UNORM, 2, ubyte, TO_8_UNORM )
+ATTRIB( R8_UNORM, 1, ubyte, TO_8_UNORM )
+
+ATTRIB( R8G8B8A8_SNORM, 4, char, TO_8_SNORM )
+ATTRIB( R8G8B8_SNORM, 3, char, TO_8_SNORM )
+ATTRIB( R8G8_SNORM, 2, char, TO_8_SNORM )
+ATTRIB( R8_SNORM, 1, char, TO_8_SNORM )
static void
-fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib)
+emit_A8R8G8B8_UNORM( const float *attrib, void *ptr)
{
- attrib[2] = FROM_8_UNORM(0);
- attrib[1] = FROM_8_UNORM(1);
- attrib[0] = FROM_8_UNORM(2);
- attrib[3] = FROM_8_UNORM(3);
+ ubyte *out = (ubyte *)ptr;
+ out[0] = TO_8_UNORM(attrib[3]);
+ out[1] = TO_8_UNORM(attrib[0]);
+ out[2] = TO_8_UNORM(attrib[1]);
+ out[3] = TO_8_UNORM(attrib[2]);
}
static void
@@ -251,181 +216,13 @@ emit_B8G8R8A8_UNORM( const float *attrib, void *ptr)
}
static void
-fetch_NULL( const void *ptr, float *attrib )
-{
- attrib[0] = 0;
- attrib[1] = 0;
- attrib[2] = 0;
- attrib[3] = 1;
-}
-
-static void
emit_NULL( const float *attrib, void *ptr )
{
/* do nothing is the only sensible option */
}
-static fetch_func get_fetch_func( enum pipe_format format )
-{
- switch (format) {
- case PIPE_FORMAT_R64_FLOAT:
- return &fetch_R64_FLOAT;
- case PIPE_FORMAT_R64G64_FLOAT:
- return &fetch_R64G64_FLOAT;
- case PIPE_FORMAT_R64G64B64_FLOAT:
- return &fetch_R64G64B64_FLOAT;
- case PIPE_FORMAT_R64G64B64A64_FLOAT:
- return &fetch_R64G64B64A64_FLOAT;
-
- case PIPE_FORMAT_R32_FLOAT:
- return &fetch_R32_FLOAT;
- case PIPE_FORMAT_R32G32_FLOAT:
- return &fetch_R32G32_FLOAT;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return &fetch_R32G32B32_FLOAT;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- return &fetch_R32G32B32A32_FLOAT;
-
- case PIPE_FORMAT_R32_UNORM:
- return &fetch_R32_UNORM;
- case PIPE_FORMAT_R32G32_UNORM:
- return &fetch_R32G32_UNORM;
- case PIPE_FORMAT_R32G32B32_UNORM:
- return &fetch_R32G32B32_UNORM;
- case PIPE_FORMAT_R32G32B32A32_UNORM:
- return &fetch_R32G32B32A32_UNORM;
-
- case PIPE_FORMAT_R32_USCALED:
- return &fetch_R32_USCALED;
- case PIPE_FORMAT_R32G32_USCALED:
- return &fetch_R32G32_USCALED;
- case PIPE_FORMAT_R32G32B32_USCALED:
- return &fetch_R32G32B32_USCALED;
- case PIPE_FORMAT_R32G32B32A32_USCALED:
- return &fetch_R32G32B32A32_USCALED;
-
- case PIPE_FORMAT_R32_SNORM:
- return &fetch_R32_SNORM;
- case PIPE_FORMAT_R32G32_SNORM:
- return &fetch_R32G32_SNORM;
- case PIPE_FORMAT_R32G32B32_SNORM:
- return &fetch_R32G32B32_SNORM;
- case PIPE_FORMAT_R32G32B32A32_SNORM:
- return &fetch_R32G32B32A32_SNORM;
-
- case PIPE_FORMAT_R32_SSCALED:
- return &fetch_R32_SSCALED;
- case PIPE_FORMAT_R32G32_SSCALED:
- return &fetch_R32G32_SSCALED;
- case PIPE_FORMAT_R32G32B32_SSCALED:
- return &fetch_R32G32B32_SSCALED;
- case PIPE_FORMAT_R32G32B32A32_SSCALED:
- return &fetch_R32G32B32A32_SSCALED;
-
- case PIPE_FORMAT_R16_UNORM:
- return &fetch_R16_UNORM;
- case PIPE_FORMAT_R16G16_UNORM:
- return &fetch_R16G16_UNORM;
- case PIPE_FORMAT_R16G16B16_UNORM:
- return &fetch_R16G16B16_UNORM;
- case PIPE_FORMAT_R16G16B16A16_UNORM:
- return &fetch_R16G16B16A16_UNORM;
-
- case PIPE_FORMAT_R16_USCALED:
- return &fetch_R16_USCALED;
- case PIPE_FORMAT_R16G16_USCALED:
- return &fetch_R16G16_USCALED;
- case PIPE_FORMAT_R16G16B16_USCALED:
- return &fetch_R16G16B16_USCALED;
- case PIPE_FORMAT_R16G16B16A16_USCALED:
- return &fetch_R16G16B16A16_USCALED;
-
- case PIPE_FORMAT_R16_SNORM:
- return &fetch_R16_SNORM;
- case PIPE_FORMAT_R16G16_SNORM:
- return &fetch_R16G16_SNORM;
- case PIPE_FORMAT_R16G16B16_SNORM:
- return &fetch_R16G16B16_SNORM;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- return &fetch_R16G16B16A16_SNORM;
-
- case PIPE_FORMAT_R16_SSCALED:
- return &fetch_R16_SSCALED;
- case PIPE_FORMAT_R16G16_SSCALED:
- return &fetch_R16G16_SSCALED;
- case PIPE_FORMAT_R16G16B16_SSCALED:
- return &fetch_R16G16B16_SSCALED;
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- return &fetch_R16G16B16A16_SSCALED;
-
- case PIPE_FORMAT_R8_UNORM:
- return &fetch_R8_UNORM;
- case PIPE_FORMAT_R8G8_UNORM:
- return &fetch_R8G8_UNORM;
- case PIPE_FORMAT_R8G8B8_UNORM:
- return &fetch_R8G8B8_UNORM;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- return &fetch_R8G8B8A8_UNORM;
-
- case PIPE_FORMAT_R8_USCALED:
- return &fetch_R8_USCALED;
- case PIPE_FORMAT_R8G8_USCALED:
- return &fetch_R8G8_USCALED;
- case PIPE_FORMAT_R8G8B8_USCALED:
- return &fetch_R8G8B8_USCALED;
- case PIPE_FORMAT_R8G8B8A8_USCALED:
- return &fetch_R8G8B8A8_USCALED;
-
- case PIPE_FORMAT_R8_SNORM:
- return &fetch_R8_SNORM;
- case PIPE_FORMAT_R8G8_SNORM:
- return &fetch_R8G8_SNORM;
- case PIPE_FORMAT_R8G8B8_SNORM:
- return &fetch_R8G8B8_SNORM;
- case PIPE_FORMAT_R8G8B8A8_SNORM:
- return &fetch_R8G8B8A8_SNORM;
-
- case PIPE_FORMAT_R8_SSCALED:
- return &fetch_R8_SSCALED;
- case PIPE_FORMAT_R8G8_SSCALED:
- return &fetch_R8G8_SSCALED;
- case PIPE_FORMAT_R8G8B8_SSCALED:
- return &fetch_R8G8B8_SSCALED;
- case PIPE_FORMAT_R8G8B8A8_SSCALED:
- return &fetch_R8G8B8A8_SSCALED;
-
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- return &fetch_B8G8R8A8_UNORM;
-
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- return &fetch_A8R8G8B8_UNORM;
-
- case PIPE_FORMAT_R32_FIXED:
- return &fetch_R32_FIXED;
- case PIPE_FORMAT_R32G32_FIXED:
- return &fetch_R32G32_FIXED;
- case PIPE_FORMAT_R32G32B32_FIXED:
- return &fetch_R32G32B32_FIXED;
- case PIPE_FORMAT_R32G32B32A32_FIXED:
- return &fetch_R32G32B32A32_FIXED;
-
- default:
- assert(0);
- return &fetch_NULL;
- }
-}
-
-
-
-
static emit_func get_emit_func( enum pipe_format format )
{
- /* silence warnings */
- (void) emit_R32G32B32A32_FIXED;
- (void) emit_R32G32B32_FIXED;
- (void) emit_R32G32_FIXED;
- (void) emit_R32_FIXED;
-
switch (format) {
case PIPE_FORMAT_R64_FLOAT:
return &emit_R64_FLOAT;
@@ -565,60 +362,116 @@ static emit_func get_emit_func( enum pipe_format format )
}
}
-
-
-/**
- * Fetch vertex attributes for 'count' vertices.
- */
-static void PIPE_CDECL generic_run_elts( struct translate *translate,
- const unsigned *elts,
- unsigned count,
+static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg,
+ unsigned elt,
unsigned instance_id,
- void *output_buffer )
+ void *vert )
{
- struct translate_generic *tg = translate_generic(translate);
- char *vert = output_buffer;
unsigned nr_attrs = tg->nr_attrib;
unsigned attr;
- unsigned i;
- /* loop over vertex attributes (vertex shader inputs)
- */
- for (i = 0; i < count; i++) {
- unsigned elt = *elts++;
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+ uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset;
- for (attr = 0; attr < nr_attrs; attr++) {
- float data[4];
- const char *src;
+ if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+ const uint8_t *src;
unsigned index;
-
- char *dst = (vert +
- tg->attrib[attr].output_offset);
+ int copy_size;
if (tg->attrib[attr].instance_divisor) {
index = instance_id / tg->attrib[attr].instance_divisor;
- } else {
+ }
+ else {
index = elt;
}
+ /* clamp to void going out of bounds */
index = MIN2(index, tg->attrib[attr].max_index);
src = tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride * index;
- tg->attrib[attr].fetch( src, data );
+ copy_size = tg->attrib[attr].copy_size;
+ if(likely(copy_size >= 0))
+ memcpy(dst, src, copy_size);
+ else
+ {
+ tg->attrib[attr].fetch( data, src, 0, 0 );
+
+ if (0)
+ debug_printf("Fetch linear attr %d from %p stride %d index %d: "
+ " %f, %f, %f, %f \n",
+ attr,
+ tg->attrib[attr].input_ptr,
+ tg->attrib[attr].input_stride,
+ index,
+ data[0], data[1],data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+ } else {
+ if(likely(tg->attrib[attr].copy_size >= 0))
+ memcpy(data, &instance_id, 4);
+ else
+ {
+ data[0] = (float)instance_id;
+ tg->attrib[attr].emit( data, dst );
+ }
+ }
+ }
+}
+
+/**
+ * Fetch vertex attributes for 'count' vertices.
+ */
+static void PIPE_CDECL generic_run_elts( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
+
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
- if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
- i, elt, attr, data[0], data[1], data[2], data[3]);
+static void PIPE_CDECL generic_run_elts16( struct translate *translate,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
- tg->attrib[attr].emit( data, dst );
- }
-
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
+static void PIPE_CDECL generic_run_elts8( struct translate *translate,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
@@ -628,44 +481,10 @@ static void PIPE_CDECL generic_run( struct translate *translate,
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
- unsigned nr_attrs = tg->nr_attrib;
- unsigned attr;
unsigned i;
- /* loop over vertex attributes (vertex shader inputs)
- */
for (i = 0; i < count; i++) {
- unsigned elt = start + i;
-
- for (attr = 0; attr < nr_attrs; attr++) {
- float data[4];
-
- char *dst = (vert +
- tg->attrib[attr].output_offset);
-
- if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
- const char *src;
-
- if (tg->attrib[attr].instance_divisor) {
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride *
- (instance_id / tg->attrib[attr].instance_divisor);
- } else {
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * elt;
- }
-
- tg->attrib[attr].fetch( src, data );
- } else {
- data[0] = (float)instance_id;
- }
-
- if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
- i, attr, data[0], data[1], data[2], data[3]);
-
- tg->attrib[attr].emit( data, dst );
- }
-
+ generic_run_one(tg, start + i, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
@@ -683,7 +502,7 @@ static void generic_set_buffer( struct translate *translate,
for (i = 0; i < tg->nr_attrib; i++) {
if (tg->attrib[i].buffer == buf) {
- tg->attrib[i].input_ptr = ((char *)ptr +
+ tg->attrib[i].input_ptr = ((const uint8_t *)ptr +
tg->attrib[i].input_offset);
tg->attrib[i].input_stride = stride;
tg->attrib[i].max_index = max_index;
@@ -711,19 +530,46 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->translate.release = generic_release;
tg->translate.set_buffer = generic_set_buffer;
tg->translate.run_elts = generic_run_elts;
+ tg->translate.run_elts16 = generic_run_elts16;
+ tg->translate.run_elts8 = generic_run_elts8;
tg->translate.run = generic_run;
for (i = 0; i < key->nr_elements; i++) {
+ const struct util_format_description *format_desc =
+ util_format_description(key->element[i].input_format);
+
+ assert(format_desc);
+ assert(format_desc->fetch_rgba_float);
+
tg->attrib[i].type = key->element[i].type;
- tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
+ tg->attrib[i].fetch = format_desc->fetch_rgba_float;
tg->attrib[i].buffer = key->element[i].input_buffer;
tg->attrib[i].input_offset = key->element[i].input_offset;
tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
- tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
tg->attrib[i].output_offset = key->element[i].output_offset;
+ tg->attrib[i].copy_size = -1;
+ if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
+ {
+ if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
+ || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
+ tg->attrib[i].copy_size = 4;
+ }
+ else
+ {
+ if(key->element[i].input_format == key->element[i].output_format
+ && format_desc->block.width == 1
+ && format_desc->block.height == 1
+ && !(format_desc->block.bits & 7))
+ tg->attrib[i].copy_size = format_desc->block.bits >> 3;
+ }
+
+ if(tg->attrib[i].copy_size < 0)
+ tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+ else
+ tg->attrib[i].emit = NULL;
}
tg->nr_attrib = key->nr_elements;
@@ -731,3 +577,83 @@ struct translate *translate_generic_create( const struct translate_key *key )
return &tg->translate;
}
+
+boolean translate_generic_is_output_format_supported(enum pipe_format format)
+{
+ switch(format)
+ {
+ case PIPE_FORMAT_R64G64B64A64_FLOAT: return TRUE;
+ case PIPE_FORMAT_R64G64B64_FLOAT: return TRUE;
+ case PIPE_FORMAT_R64G64_FLOAT: return TRUE;
+ case PIPE_FORMAT_R64_FLOAT: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_FLOAT: return TRUE;
+ case PIPE_FORMAT_R32G32B32_FLOAT: return TRUE;
+ case PIPE_FORMAT_R32G32_FLOAT: return TRUE;
+ case PIPE_FORMAT_R32_FLOAT: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_USCALED: return TRUE;
+ case PIPE_FORMAT_R32G32B32_USCALED: return TRUE;
+ case PIPE_FORMAT_R32G32_USCALED: return TRUE;
+ case PIPE_FORMAT_R32_USCALED: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_SSCALED: return TRUE;
+ case PIPE_FORMAT_R32G32B32_SSCALED: return TRUE;
+ case PIPE_FORMAT_R32G32_SSCALED: return TRUE;
+ case PIPE_FORMAT_R32_SSCALED: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_UNORM: return TRUE;
+ case PIPE_FORMAT_R32G32B32_UNORM: return TRUE;
+ case PIPE_FORMAT_R32G32_UNORM: return TRUE;
+ case PIPE_FORMAT_R32_UNORM: return TRUE;
+
+ case PIPE_FORMAT_R32G32B32A32_SNORM: return TRUE;
+ case PIPE_FORMAT_R32G32B32_SNORM: return TRUE;
+ case PIPE_FORMAT_R32G32_SNORM: return TRUE;
+ case PIPE_FORMAT_R32_SNORM: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_USCALED: return TRUE;
+ case PIPE_FORMAT_R16G16B16_USCALED: return TRUE;
+ case PIPE_FORMAT_R16G16_USCALED: return TRUE;
+ case PIPE_FORMAT_R16_USCALED: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_SSCALED: return TRUE;
+ case PIPE_FORMAT_R16G16B16_SSCALED: return TRUE;
+ case PIPE_FORMAT_R16G16_SSCALED: return TRUE;
+ case PIPE_FORMAT_R16_SSCALED: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_UNORM: return TRUE;
+ case PIPE_FORMAT_R16G16B16_UNORM: return TRUE;
+ case PIPE_FORMAT_R16G16_UNORM: return TRUE;
+ case PIPE_FORMAT_R16_UNORM: return TRUE;
+
+ case PIPE_FORMAT_R16G16B16A16_SNORM: return TRUE;
+ case PIPE_FORMAT_R16G16B16_SNORM: return TRUE;
+ case PIPE_FORMAT_R16G16_SNORM: return TRUE;
+ case PIPE_FORMAT_R16_SNORM: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_USCALED: return TRUE;
+ case PIPE_FORMAT_R8G8B8_USCALED: return TRUE;
+ case PIPE_FORMAT_R8G8_USCALED: return TRUE;
+ case PIPE_FORMAT_R8_USCALED: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_SSCALED: return TRUE;
+ case PIPE_FORMAT_R8G8B8_SSCALED: return TRUE;
+ case PIPE_FORMAT_R8G8_SSCALED: return TRUE;
+ case PIPE_FORMAT_R8_SSCALED: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_UNORM: return TRUE;
+ case PIPE_FORMAT_R8G8B8_UNORM: return TRUE;
+ case PIPE_FORMAT_R8G8_UNORM: return TRUE;
+ case PIPE_FORMAT_R8_UNORM: return TRUE;
+
+ case PIPE_FORMAT_R8G8B8A8_SNORM: return TRUE;
+ case PIPE_FORMAT_R8G8B8_SNORM: return TRUE;
+ case PIPE_FORMAT_R8G8_SNORM: return TRUE;
+ case PIPE_FORMAT_R8_SNORM: return TRUE;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM: return TRUE;
+ case PIPE_FORMAT_B8G8R8A8_UNORM: return TRUE;
+ default: return FALSE;
+ }
+}
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index ef3aa674a34..f8bf5b46692 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -30,11 +30,12 @@
#include "pipe/p_compiler.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_format.h"
#include "translate.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#include "rtasm/rtasm_cpu.h"
#include "rtasm/rtasm_x86sse.h"
@@ -46,21 +47,9 @@
#define W 3
-typedef void (PIPE_CDECL *run_func)( struct translate *translate,
- unsigned start,
- unsigned count,
- unsigned instance_id,
- void *output_buffer);
-
-typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- unsigned instance_id,
- void *output_buffer);
-
struct translate_buffer {
const void *base_ptr;
- unsigned stride;
+ uintptr_t stride;
unsigned max_index;
};
@@ -73,21 +62,43 @@ struct translate_buffer_varient {
#define ELEMENT_BUFFER_INSTANCE_ID 1001
+#define NUM_CONSTS 7
+
+enum
+{
+ CONST_IDENTITY,
+ CONST_INV_127,
+ CONST_INV_255,
+ CONST_INV_32767,
+ CONST_INV_65535,
+ CONST_INV_2147483647,
+ CONST_255
+};
+
+#define C(v) {(float)(v), (float)(v), (float)(v), (float)(v)}
+static float consts[NUM_CONSTS][4] = {
+ {0, 0, 0, 1},
+ C(1.0 / 127.0),
+ C(1.0 / 255.0),
+ C(1.0 / 32767.0),
+ C(1.0 / 65535.0),
+ C(1.0 / 2147483647.0),
+ C(255.0)
+};
+#undef C
struct translate_sse {
struct translate translate;
struct x86_function linear_func;
struct x86_function elt_func;
+ struct x86_function elt16_func;
+ struct x86_function elt8_func;
struct x86_function *func;
- boolean loaded_identity;
- boolean loaded_255;
- boolean loaded_inv_255;
-
- float identity[4];
- float float_255[4];
- float inv_255[4];
+ PIPE_ALIGN_VAR(16) float consts[NUM_CONSTS][4];
+ int8_t reg_to_const[16];
+ int8_t const_to_reg[NUM_CONSTS];
struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
unsigned nr_buffers;
@@ -102,17 +113,16 @@ struct translate_sse {
boolean use_instancing;
unsigned instance_id;
- run_func gen_run;
- run_elts_func gen_run_elts;
-
/* these are actually known values, but putting them in a struct
* like this is helpful to keep them in sync across the file.
*/
struct x86_reg tmp_EAX;
- struct x86_reg idx_EBX; /* either start+i or &elt[i] */
- struct x86_reg outbuf_ECX;
- struct x86_reg machine_EDX;
- struct x86_reg count_ESI; /* decrements to zero */
+ struct x86_reg tmp2_EDX;
+ struct x86_reg src_ECX;
+ struct x86_reg idx_ESI; /* either start+i or &elt[i] */
+ struct x86_reg machine_EDI;
+ struct x86_reg outbuf_EBX;
+ struct x86_reg count_EBP; /* decrements to zero */
};
static int get_offset( const void *a, const void *b )
@@ -120,281 +130,950 @@ static int get_offset( const void *a, const void *b )
return (const char *)b - (const char *)a;
}
+static struct x86_reg get_const( struct translate_sse *p, unsigned id)
+{
+ struct x86_reg reg;
+ unsigned i;
+ if(p->const_to_reg[id] >= 0)
+ return x86_make_reg(file_XMM, p->const_to_reg[id]);
-static struct x86_reg get_identity( struct translate_sse *p )
-{
- struct x86_reg reg = x86_make_reg(file_XMM, 6);
-
- if (!p->loaded_identity) {
- p->loaded_identity = TRUE;
- p->identity[0] = 0;
- p->identity[1] = 0;
- p->identity[2] = 0;
- p->identity[3] = 1;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->identity[0])));
+ for(i = 2; i < 8; ++i)
+ {
+ if(p->reg_to_const[i] < 0)
+ break;
}
+ /* TODO: be smarter here */
+ if(i == 8)
+ --i;
+
+ reg = x86_make_reg(file_XMM, i);
+
+ if(p->reg_to_const[i] >= 0)
+ p->const_to_reg[p->reg_to_const[i]] = -1;
+
+ p->reg_to_const[i] = id;
+ p->const_to_reg[id] = i;
+
+ /* TODO: this should happen outside the loop, if possible */
+ sse_movaps(p->func, reg,
+ x86_make_disp(p->machine_EDI,
+ get_offset(p, &p->consts[id][0])));
+
return reg;
}
-static struct x86_reg get_255( struct translate_sse *p )
+/* load the data in a SSE2 register, padding with zeros */
+static boolean emit_load_sse2( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg src,
+ unsigned size)
{
- struct x86_reg reg = x86_make_reg(file_XMM, 7);
-
- if (!p->loaded_255) {
- p->loaded_255 = TRUE;
- p->float_255[0] =
- p->float_255[1] =
- p->float_255[2] =
- p->float_255[3] = 255.0f;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->float_255[0])));
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ struct x86_reg tmp = p->tmp_EAX;
+ switch(size)
+ {
+ case 1:
+ x86_movzx8(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 2:
+ x86_movzx16(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 3:
+ x86_movzx8(p->func, tmp, x86_make_disp(src, 2));
+ x86_shl_imm(p->func, tmp, 16);
+ x86_mov16(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 4:
+ sse2_movd(p->func, data, src);
+ break;
+ case 6:
+ sse2_movd(p->func, data, src);
+ x86_movzx16(p->func, tmp, x86_make_disp(src, 4));
+ sse2_movd(p->func, tmpXMM, tmp);
+ sse2_punpckldq(p->func, data, tmpXMM);
+ break;
+ case 8:
+ sse2_movq(p->func, data, src);
+ break;
+ case 12:
+ sse2_movq(p->func, data, src);
+ sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8));
+ sse2_punpcklqdq(p->func, data, tmpXMM);
+ break;
+ case 16:
+ sse2_movdqu(p->func, data, src);
+ break;
+ default:
+ return FALSE;
}
-
- return reg;
+ return TRUE;
}
-static struct x86_reg get_inv_255( struct translate_sse *p )
+/* this value can be passed for the out_chans argument */
+#define CHANNELS_0001 5
+
+/* this function will load #chans float values, and will
+ * pad the register with zeroes at least up to out_chans.
+ *
+ * If out_chans is set to CHANNELS_0001, then the fourth
+ * value will be padded with 1. Only pass this value if
+ * chans < 4 or results are undefined.
+ */
+static void emit_load_float32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0,
+ unsigned out_chans,
+ unsigned chans)
{
- struct x86_reg reg = x86_make_reg(file_XMM, 5);
-
- if (!p->loaded_inv_255) {
- p->loaded_inv_255 = TRUE;
- p->inv_255[0] =
- p->inv_255[1] =
- p->inv_255[2] =
- p->inv_255[3] = 1.0f / 255.0f;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->inv_255[0])));
+ switch(chans)
+ {
+ case 1:
+ /* a 0 0 0
+ * a 0 0 1
+ */
+ sse_movss(p->func, data, arg0);
+ if(out_chans == CHANNELS_0001)
+ sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
+ break;
+ case 2:
+ /* 0 0 0 1
+ * a b 0 1
+ */
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
+ else if(out_chans > 2)
+ sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
+ sse_movlps(p->func, data, arg0);
+ break;
+ case 3:
+ /* Have to jump through some hoops:
+ *
+ * c 0 0 0
+ * c 0 0 1 if out_chans == CHANNELS_0001
+ * 0 0 c 0/1
+ * a b c 0/1
+ */
+ sse_movss(p->func, data, x86_make_disp(arg0, 8));
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X,Y,Z,W) );
+ sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
+ sse_movlps(p->func, data, arg0);
+ break;
+ case 4:
+ sse_movups(p->func, data, arg0);
+ break;
}
-
- return reg;
}
+/* this function behaves like emit_load_float32, but loads
+ 64-bit floating point numbers, converting them to 32-bit
+ ones */
+static void emit_load_float64to32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0,
+ unsigned out_chans,
+ unsigned chans)
+{
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ switch(chans)
+ {
+ case 1:
+ sse2_movsd(p->func, data, arg0);
+ if(out_chans > 1)
+ sse2_cvtpd2ps(p->func, data, data);
+ else
+ sse2_cvtsd2ss(p->func, data, data);
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
+ break;
+ case 2:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
+ else if(out_chans > 2)
+ sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
+ break;
+ case 3:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+ if(out_chans > 3)
+ sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+ else
+ sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM);
+ sse_movlhps(p->func, data, tmpXMM);
+ if(out_chans == CHANNELS_0001)
+ sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
+ break;
+ case 4:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+ sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+ sse_movlhps(p->func, data, tmpXMM);
+ break;
+ }
+}
-static void emit_load_R32G32B32A32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static void emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
- sse_movups(p->func, data, arg0);
+ if(x86_target(p->func) != X86_32)
+ x64_mov64(p->func, dst_gpr, src_gpr);
+ else
+ {
+ /* TODO: when/on which CPUs is SSE2 actually better than SSE? */
+ if(x86_target_caps(p->func) & X86_SSE2)
+ sse2_movq(p->func, dst_xmm, src_xmm);
+ else
+ sse_movlps(p->func, dst_xmm, src_xmm);
+ }
}
-static void emit_load_R32G32B32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static void emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src)
{
- /* Have to jump through some hoops:
- *
- * c 0 0 0
- * c 0 0 1
- * 0 0 c 1
- * a b c 1
- */
- sse_movss(p->func, data, x86_make_disp(arg0, 8));
- sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
- sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
- sse_movlps(p->func, data, arg0);
+ emit_mov64(p, dst_gpr, dst_xmm, src, src);
}
-static void emit_load_R32G32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static void emit_store64(struct translate_sse *p, struct x86_reg dst, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
- /* 0 0 0 1
- * a b 0 1
- */
- sse_movups(p->func, data, get_identity(p) );
- sse_movlps(p->func, data, arg0);
+ emit_mov64(p, dst, dst, src_gpr, src_xmm);
}
+static void emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src)
+{
+ if(x86_target_caps(p->func) & X86_SSE2)
+ sse2_movdqu(p->func, dst, src);
+ else
+ sse_movups(p->func, dst, src);
+}
-static void emit_load_R32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+/* TODO: this uses unaligned accesses liberally, which is great on Nehalem,
+ * but may or may not be good on older processors
+ * TODO: may perhaps want to use non-temporal stores here if possible
+ */
+static void emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, unsigned size)
{
- /* a 0 0 0
- * a 0 0 1
- */
- sse_movss(p->func, data, arg0);
- sse_orps(p->func, data, get_identity(p) );
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1);
+ struct x86_reg dataGPR = p->tmp_EAX;
+ struct x86_reg dataGPR2 = p->tmp2_EDX;
+
+ if(size < 8)
+ {
+ switch (size)
+ {
+ case 1:
+ x86_mov8(p->func, dataGPR, src);
+ x86_mov8(p->func, dst, dataGPR);
+ break;
+ case 2:
+ x86_mov16(p->func, dataGPR, src);
+ x86_mov16(p->func, dst, dataGPR);
+ break;
+ case 3:
+ x86_mov16(p->func, dataGPR, src);
+ x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2));
+ x86_mov16(p->func, dst, dataGPR);
+ x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2);
+ break;
+ case 4:
+ x86_mov(p->func, dataGPR, src);
+ x86_mov(p->func, dst, dataGPR);
+ break;
+ case 6:
+ x86_mov(p->func, dataGPR, src);
+ x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4));
+ x86_mov(p->func, dst, dataGPR);
+ x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2);
+ break;
+ }
+ }
+ else if(!(x86_target_caps(p->func) & X86_SSE))
+ {
+ unsigned i = 0;
+ assert((size & 3) == 0);
+ for(i = 0; i < size; i += 4)
+ {
+ x86_mov(p->func, dataGPR, x86_make_disp(src, i));
+ x86_mov(p->func, x86_make_disp(dst, i), dataGPR);
+ }
+ }
+ else
+ {
+ switch(size)
+ {
+ case 8:
+ emit_load64(p, dataGPR, dataXMM, src);
+ emit_store64(p, dst, dataGPR, dataXMM);
+ break;
+ case 12:
+ emit_load64(p, dataGPR2, dataXMM, src);
+ x86_mov(p->func, dataGPR, x86_make_disp(src, 8));
+ emit_store64(p, dst, dataGPR2, dataXMM);
+ x86_mov(p->func, x86_make_disp(dst, 8), dataGPR);
+ break;
+ case 16:
+ emit_mov128(p, dataXMM, src);
+ emit_mov128(p, dst, dataXMM);
+ break;
+ case 24:
+ emit_mov128(p, dataXMM, src);
+ emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16));
+ emit_mov128(p, dst, dataXMM);
+ emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2);
+ break;
+ case 32:
+ emit_mov128(p, dataXMM, src);
+ emit_mov128(p, dataXMM2, x86_make_disp(src, 16));
+ emit_mov128(p, dst, dataXMM);
+ emit_mov128(p, x86_make_disp(dst, 16), dataXMM2);
+ break;
+ default:
+ assert(0);
+ }
+ }
}
+static boolean translate_attr_convert( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg src,
+ struct x86_reg dst)
-static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg src )
{
+ const struct util_format_description* input_desc = util_format_description(a->input_format);
+ const struct util_format_description* output_desc = util_format_description(a->output_format);
+ unsigned i;
+ boolean id_swizzle = TRUE;
+ unsigned swizzle[4] = {UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE};
+ unsigned needed_chans = 0;
+ unsigned imms[2] = {0, 0x3f800000};
- /* Load and unpack twice:
- */
- sse_movss(p->func, data, src);
- sse2_punpcklbw(p->func, data, get_identity(p));
- sse2_punpcklbw(p->func, data, get_identity(p));
+ if(a->output_format == PIPE_FORMAT_NONE || a->input_format == PIPE_FORMAT_NONE)
+ return FALSE;
- /* Convert to float:
- */
- sse2_cvtdq2ps(p->func, data, data);
+ if(input_desc->channel[0].size & 7)
+ return FALSE;
+ if(input_desc->colorspace != output_desc->colorspace)
+ return FALSE;
- /* Scale by 1/255.0
- */
- sse_mulps(p->func, data, get_inv_255(p));
-}
+ for(i = 1; i < input_desc->nr_channels; ++i)
+ {
+ if(memcmp(&input_desc->channel[i], &input_desc->channel[0], sizeof(input_desc->channel[0])))
+ return FALSE;
+ }
+ for(i = 1; i < output_desc->nr_channels; ++i)
+ {
+ if(memcmp(&output_desc->channel[i], &output_desc->channel[0], sizeof(output_desc->channel[0])))
+ return FALSE;
+ }
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(output_desc->swizzle[i] < 4)
+ swizzle[output_desc->swizzle[i]] = input_desc->swizzle[i];
+ }
+ if((x86_target_caps(p->func) & X86_SSE) && (0
+ || a->output_format == PIPE_FORMAT_R32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32B32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
-static void emit_store_R32G32B32A32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- sse_movups(p->func, dest, dataXMM);
-}
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+ swizzle[i] = i;
+ }
-static void emit_store_R32G32B32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- /* Emit two, shuffle, emit one.
- */
- sse_movlps(p->func, dest, dataXMM);
- sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
- sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
-}
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] < 4)
+ needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+ if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+ id_swizzle = FALSE;
+ }
-static void emit_store_R32G32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- sse_movlps(p->func, dest, dataXMM);
-}
+ if(needed_chans > 0)
+ {
+ switch(input_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ /* TODO: add support for SSE4.1 pmovzx */
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ /* TODO: this may be inefficient due to get_identity() being used both as a float and integer register */
+ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ break;
+ case 16:
+ sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ break;
+ case 32: /* we lose precision here */
+ sse2_psrld_imm(p->func, dataXMM, 1);
+ break;
+ default:
+ return FALSE;
+ }
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+ if(input_desc->channel[0].normalized)
+ {
+ struct x86_reg factor;
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ factor = get_const(p, CONST_INV_255);
+ break;
+ case 16:
+ factor = get_const(p, CONST_INV_65535);
+ break;
+ case 32:
+ factor = get_const(p, CONST_INV_2147483647);
+ break;
+ default:
+ assert(0);
+ factor.disp = 0;
+ factor.file = 0;
+ factor.idx = 0;
+ factor.mod = 0;
+ break;
+ }
+ sse_mulps(p->func, dataXMM, factor);
+ }
+ else if(input_desc->channel[0].size == 32)
+ sse_addps(p->func, dataXMM, dataXMM); /* compensate for the bit we threw away to fit u32 into s32 */
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ /* TODO: add support for SSE4.1 pmovsx */
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_psrad_imm(p->func, dataXMM, 24);
+ break;
+ case 16:
+ sse2_punpcklwd(p->func, dataXMM, dataXMM);
+ sse2_psrad_imm(p->func, dataXMM, 16);
+ break;
+ case 32: /* we lose precision here */
+ break;
+ default:
+ return FALSE;
+ }
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+ if(input_desc->channel[0].normalized)
+ {
+ struct x86_reg factor;
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ factor = get_const(p, CONST_INV_127);
+ break;
+ case 16:
+ factor = get_const(p, CONST_INV_32767);
+ break;
+ case 32:
+ factor = get_const(p, CONST_INV_2147483647);
+ break;
+ default:
+ assert(0);
+ factor.disp = 0;
+ factor.file = 0;
+ factor.idx = 0;
+ factor.mod = 0;
+ break;
+ }
+ sse_mulps(p->func, dataXMM, factor);
+ }
+ break;
+
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if(input_desc->channel[0].size != 32 && input_desc->channel[0].size != 64)
+ return FALSE;
+ if(swizzle[3] == UTIL_FORMAT_SWIZZLE_1 && input_desc->nr_channels <= 3)
+ {
+ swizzle[3] = UTIL_FORMAT_SWIZZLE_W;
+ needed_chans = CHANNELS_0001;
+ }
+ switch(input_desc->channel[0].size)
+ {
+ case 32:
+ emit_load_float32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+ break;
+ case 64: /* we lose precision here */
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_float64to32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+ break;
+ default:
+ return FALSE;
+ }
+ break;
+ default:
+ return FALSE;
+ }
-static void emit_store_R32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- sse_movss(p->func, dest, dataXMM);
-}
+ if(!id_swizzle)
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(swizzle[0], swizzle[1], swizzle[2], swizzle[3]) );
+ }
+ if(output_desc->nr_channels >= 4
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+ )
+ sse_movups(p->func, dst, dataXMM);
+ else
+ {
+ if(output_desc->nr_channels >= 2
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movlps(p->func, dst, dataXMM);
+ else
+ {
+ if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movss(p->func, dst, dataXMM);
+ else
+ x86_mov_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 2)
+ {
+ if(swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3));
+ sse_movss(p->func, x86_make_disp(dst, 4), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ if(output_desc->nr_channels >= 3)
+ {
+ if(output_desc->nr_channels >= 4
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM);
+ else
+ {
+ if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3));
+ sse_movss(p->func, x86_make_disp(dst, 8), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 8), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 4)
+ {
+ if(swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3));
+ sse_movss(p->func, x86_make_disp(dst, 12), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 12), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ }
+ }
+ return TRUE;
+ }
+ else if((x86_target_caps(p->func) & X86_SSE2) && input_desc->channel[0].size == 8 && output_desc->channel[0].size == 16
+ && output_desc->channel[0].normalized == input_desc->channel[0].normalized
+ && (0
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ ))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ struct x86_reg tmp = p->tmp_EAX;
+ unsigned imms[2] = {0, 1};
+
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+ swizzle[i] = i;
+ }
-static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- /* Scale by 255.0
- */
- sse_mulps(p->func, dataXMM, get_255(p));
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] < 4)
+ needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+ if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+ id_swizzle = FALSE;
+ }
- /* Pack and emit:
- */
- sse2_cvtps2dq(p->func, dataXMM, dataXMM);
- sse2_packssdw(p->func, dataXMM, dataXMM);
- sse2_packuswb(p->func, dataXMM, dataXMM);
- sse_movss(p->func, dest, dataXMM);
-}
+ if(needed_chans > 0)
+ {
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ switch(input_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(input_desc->channel[0].normalized)
+ {
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ if(output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ sse2_psrlw_imm(p->func, dataXMM, 1);
+ }
+ else
+ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if(input_desc->channel[0].normalized)
+ {
+ sse2_movq(p->func, tmpXMM, get_const(p, CONST_IDENTITY));
+ sse2_punpcklbw(p->func, tmpXMM, dataXMM);
+ sse2_psllw_imm(p->func, dataXMM, 9);
+ sse2_psrlw_imm(p->func, dataXMM, 8);
+ sse2_por(p->func, tmpXMM, dataXMM);
+ sse2_psrlw_imm(p->func, dataXMM, 7);
+ sse2_por(p->func, tmpXMM, dataXMM);
+ {
+ struct x86_reg t = dataXMM;
+ dataXMM = tmpXMM;
+ tmpXMM = t;
+ }
+ }
+ else
+ {
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_psraw_imm(p->func, dataXMM, 8);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ if(output_desc->channel[0].normalized)
+ imms[1] = (output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) ? 0xffff : 0x7ffff;
+ if(!id_swizzle)
+ sse2_pshuflw(p->func, dataXMM, dataXMM, (swizzle[0] & 3) | ((swizzle[1] & 3) << 2) | ((swizzle[2] & 3) << 4) | ((swizzle[3] & 3) << 6));
+ }
+ if(output_desc->nr_channels >= 4
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+ )
+ sse2_movq(p->func, dst, dataXMM);
+ else
+ {
+ if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ if(output_desc->nr_channels >= 2 && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ sse2_movd(p->func, dst, dataXMM);
+ else
+ {
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, dst, tmp);
+ if(output_desc->nr_channels >= 2)
+ x86_mov16_imm(p->func, x86_make_disp(dst, 2), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ else
+ {
+ if(output_desc->nr_channels >= 2 && swizzle[1] >= UTIL_FORMAT_SWIZZLE_0)
+ x86_mov_imm(p->func, dst, (imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+ else
+ {
+ x86_mov16_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+ if(output_desc->nr_channels >= 2)
+ {
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_shr_imm(p->func, tmp, 16);
+ x86_mov16(p->func, x86_make_disp(dst, 2), tmp);
+ }
+ }
+ }
+ if(output_desc->nr_channels >= 3)
+ {
+ if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ if(output_desc->nr_channels >= 4 && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 32);
+ sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM);
+ }
+ else
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 32);
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, x86_make_disp(dst, 4), tmp);
+ if(output_desc->nr_channels >= 4)
+ {
+ x86_mov16_imm(p->func, x86_make_disp(dst, 6), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ }
+ else
+ {
+ if(output_desc->nr_channels >= 4 && swizzle[3] >= UTIL_FORMAT_SWIZZLE_0)
+ x86_mov_imm(p->func, x86_make_disp(dst, 4), (imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+ else
+ {
+ x86_mov16_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 4)
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 48);
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, x86_make_disp(dst, 6), tmp);
+ }
+ }
+ }
+ }
+ }
+ return TRUE;
+ }
+ else if(!memcmp(&output_desc->channel[0], &input_desc->channel[0], sizeof(output_desc->channel[0])))
+ {
+ struct x86_reg tmp = p->tmp_EAX;
+ unsigned i;
+ if(input_desc->channel[0].size == 8 && input_desc->nr_channels == 4 && output_desc->nr_channels == 4
+ && swizzle[0] == UTIL_FORMAT_SWIZZLE_W
+ && swizzle[1] == UTIL_FORMAT_SWIZZLE_Z
+ && swizzle[2] == UTIL_FORMAT_SWIZZLE_Y
+ && swizzle[3] == UTIL_FORMAT_SWIZZLE_X)
+ {
+ /* TODO: support movbe */
+ x86_mov(p->func, tmp, src);
+ x86_bswap(p->func, tmp);
+ x86_mov(p->func, dst, tmp);
+ return TRUE;
+ }
-/* Extended swizzles? Maybe later.
- */
-static void emit_swizzle( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg src,
- unsigned char shuffle )
-{
- sse_shufps(p->func, dest, src, shuffle);
-}
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ switch(output_desc->channel[0].size)
+ {
+ case 8:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[0].normalized ? 0xff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[0].normalized ? 0x7f : 1;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v);
+ }
+ else
+ {
+ x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1));
+ x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp);
+ }
+ break;
+ case 16:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[1].normalized ? 0xffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[1].normalized ? 0x7fff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ v = 0x3c00;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v);
+ }
+ else if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0)
+ x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0);
+ else
+ {
+ x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2));
+ x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp);
+ }
+ break;
+ case 32:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[1].normalized ? 0x7fffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ v = 0x3f800000;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v);
+ }
+ else
+ {
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4));
+ x86_mov(p->func, x86_make_disp(dst, i * 4), tmp);
+ }
+ break;
+ case 64:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned l = 0;
+ unsigned h = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ h = output_desc->channel[1].normalized ? 0xffffffff : 0;
+ l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ h = output_desc->channel[1].normalized ? 0x7fffffff : 0;
+ l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ h = 0x3ff00000;
+ l = 0;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l);
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h);
+ }
+ else
+ {
+ if(x86_target_caps(p->func) & X86_SSE)
+ {
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 0);
+ emit_load64(p, tmp, tmpXMM, x86_make_disp(src, swizzle[i] * 8));
+ emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM);
+ }
+ else
+ {
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8));
+ x86_mov(p->func, x86_make_disp(dst, i * 8), tmp);
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8 + 4));
+ x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp);
+ }
+ }
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ return TRUE;
+ }
+ /* special case for draw's EMIT_4UB (RGBA) and EMIT_4UB_BGRA */
+ else if((x86_target_caps(p->func) & X86_SSE2) &&
+ a->input_format == PIPE_FORMAT_R32G32B32A32_FLOAT && (0
+ || a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM
+ || a->output_format == PIPE_FORMAT_R8G8B8A8_UNORM
+ ))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ /* load */
+ sse_movups(p->func, dataXMM, src);
-static boolean translate_attr( struct translate_sse *p,
- const struct translate_element *a,
- struct x86_reg srcECX,
- struct x86_reg dstEAX)
-{
- struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ if (a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM)
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3));
- switch (a->input_format) {
- case PIPE_FORMAT_R32_FLOAT:
- emit_load_R32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- emit_load_R32G32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- emit_load_R32G32B32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- emit_load_R32G32B32A32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
- emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
- break;
- default:
- return FALSE;
- }
+ /* scale by 255.0 */
+ sse_mulps(p->func, dataXMM, get_const(p, CONST_255));
- switch (a->output_format) {
- case PIPE_FORMAT_R32_FLOAT:
- emit_store_R32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- emit_store_R32G32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- emit_store_R32G32B32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- emit_store_R32G32B32A32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
- emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
- break;
- default:
- return FALSE;
+ /* pack and emit */
+ sse2_cvtps2dq(p->func, dataXMM, dataXMM);
+ sse2_packssdw(p->func, dataXMM, dataXMM);
+ sse2_packuswb(p->func, dataXMM, dataXMM);
+ sse2_movd(p->func, dst, dataXMM);
+
+ return TRUE;
}
- return TRUE;
+ return FALSE;
}
+static boolean translate_attr( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg src,
+ struct x86_reg dst)
+{
+ if(a->input_format == a->output_format)
+ {
+ emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1));
+ return TRUE;
+ }
+
+ return translate_attr_convert(p, a, src, dst);
+}
static boolean init_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
unsigned i;
- struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
+ struct x86_reg instance_id = x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
for (i = 0; i < p->nr_buffer_varients; i++) {
struct translate_buffer_varient *varient = &p->buffer_varient[i];
struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
- if (linear || varient->instance_divisor) {
- struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
+ if (!index_size || varient->instance_divisor) {
+ struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->stride));
- struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &varient->ptr));
- struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->base_ptr));
- struct x86_reg elt = p->idx_EBX;
+ struct x86_reg elt = p->idx_ESI;
struct x86_reg tmp_EAX = p->tmp_EAX;
/* Calculate pointer to first attrib:
@@ -406,20 +1085,16 @@ static boolean init_inputs( struct translate_sse *p,
x86_mov(p->func, tmp_EAX, instance_id);
if (varient->instance_divisor != 1) {
- struct x86_reg tmp_EDX = p->machine_EDX;
- struct x86_reg tmp_ECX = p->outbuf_ECX;
+ struct x86_reg tmp_EDX = p->tmp2_EDX;
+ struct x86_reg tmp_ECX = p->src_ECX;
/* TODO: Add x86_shr() to rtasm and use it whenever
* instance divisor is power of two.
*/
- x86_push(p->func, tmp_EDX);
- x86_push(p->func, tmp_ECX);
x86_xor(p->func, tmp_EDX, tmp_EDX);
x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
- x86_pop(p->func, tmp_ECX);
- x86_pop(p->func, tmp_EDX);
}
} else {
x86_mov(p->func, tmp_EAX, elt);
@@ -430,16 +1105,23 @@ static boolean init_inputs( struct translate_sse *p,
*/
x86_imul(p->func, tmp_EAX, buf_stride);
+ x64_rexw(p->func);
x86_add(p->func, tmp_EAX, buf_base_ptr);
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
- if (linear && p->nr_buffer_varients == 1)
+ if (!index_size && p->nr_buffer_varients == 1)
+ {
+ x64_rexw(p->func);
x86_mov(p->func, elt, tmp_EAX);
+ }
else
+ {
+ x64_rexw(p->func);
x86_mov(p->func, buf_ptr, tmp_EAX);
+ }
}
}
@@ -448,44 +1130,57 @@ static boolean init_inputs( struct translate_sse *p,
static struct x86_reg get_buffer_ptr( struct translate_sse *p,
- boolean linear,
+ unsigned index_size,
unsigned var_idx,
struct x86_reg elt )
{
if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
- return x86_make_disp(p->machine_EDX,
+ return x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
}
- if (linear && p->nr_buffer_varients == 1) {
- return p->idx_EBX;
+ if (!index_size && p->nr_buffer_varients == 1) {
+ return p->idx_ESI;
}
- else if (linear || p->buffer_varient[var_idx].instance_divisor) {
- struct x86_reg ptr = p->tmp_EAX;
+ else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
+ struct x86_reg ptr = p->src_ECX;
struct x86_reg buf_ptr =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer_varient[var_idx].ptr));
+ x64_rexw(p->func);
x86_mov(p->func, ptr, buf_ptr);
return ptr;
}
else {
- struct x86_reg ptr = p->tmp_EAX;
+ struct x86_reg ptr = p->src_ECX;
const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
struct x86_reg buf_stride =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].stride));
struct x86_reg buf_base_ptr =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
/* Calculate pointer to current attrib:
*/
- x86_mov(p->func, ptr, buf_stride);
- x86_imul(p->func, ptr, elt);
+ switch(index_size)
+ {
+ case 1:
+ x86_movzx8(p->func, ptr, elt);
+ break;
+ case 2:
+ x86_movzx16(p->func, ptr, elt);
+ break;
+ case 4:
+ x86_mov(p->func, ptr, elt);
+ break;
+ }
+ x86_imul(p->func, ptr, buf_stride);
+ x64_rexw(p->func);
x86_add(p->func, ptr, buf_base_ptr);
return ptr;
}
@@ -494,39 +1189,43 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
static boolean incr_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
- if (linear && p->nr_buffer_varients == 1) {
- struct x86_reg stride = x86_make_disp(p->machine_EDX,
+ if (!index_size && p->nr_buffer_varients == 1) {
+ struct x86_reg stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[0].stride));
if (p->buffer_varient[0].instance_divisor == 0) {
- x86_add(p->func, p->idx_EBX, stride);
- sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
+ x64_rexw(p->func);
+ x86_add(p->func, p->idx_ESI, stride);
+ sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192));
}
}
- else if (linear) {
+ else if (!index_size) {
unsigned i;
/* Is this worthwhile??
*/
for (i = 0; i < p->nr_buffer_varients; i++) {
struct translate_buffer_varient *varient = &p->buffer_varient[i];
- struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &varient->ptr));
- struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].stride));
if (varient->instance_divisor == 0) {
- x86_mov(p->func, p->tmp_EAX, buf_ptr);
- x86_add(p->func, p->tmp_EAX, buf_stride);
+ x86_mov(p->func, p->tmp_EAX, buf_stride);
+ x64_rexw(p->func);
+ x86_add(p->func, p->tmp_EAX, buf_ptr);
if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
+ x64_rexw(p->func);
x86_mov(p->func, buf_ptr, p->tmp_EAX);
}
}
}
else {
- x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
+ x64_rexw(p->func);
+ x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size));
}
return TRUE;
@@ -551,35 +1250,52 @@ static boolean incr_inputs( struct translate_sse *p,
*/
static boolean build_vertex_emit( struct translate_sse *p,
struct x86_function *func,
- boolean linear )
+ unsigned index_size )
{
int fixup, label;
unsigned j;
+ memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const));
+ memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg));
+
p->tmp_EAX = x86_make_reg(file_REG32, reg_AX);
- p->idx_EBX = x86_make_reg(file_REG32, reg_BX);
- p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
- p->machine_EDX = x86_make_reg(file_REG32, reg_DX);
- p->count_ESI = x86_make_reg(file_REG32, reg_SI);
+ p->idx_ESI = x86_make_reg(file_REG32, reg_SI);
+ p->outbuf_EBX = x86_make_reg(file_REG32, reg_BX);
+ p->machine_EDI = x86_make_reg(file_REG32, reg_DI);
+ p->count_EBP = x86_make_reg(file_REG32, reg_BP);
+ p->tmp2_EDX = x86_make_reg(file_REG32, reg_DX);
+ p->src_ECX = x86_make_reg(file_REG32, reg_CX);
p->func = func;
- p->loaded_inv_255 = FALSE;
- p->loaded_255 = FALSE;
- p->loaded_identity = FALSE;
x86_init_func(p->func);
- /* Push a few regs?
- */
- x86_push(p->func, p->idx_EBX);
- x86_push(p->func, p->count_ESI);
+ if(x86_target(p->func) == X86_64_WIN64_ABI)
+ {
+ /* the ABI guarantees a 16-byte aligned 32-byte "shadow space" above the return address */
+ sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), x86_make_reg(file_XMM, 6));
+ sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24), x86_make_reg(file_XMM, 7));
+ }
- /* Load arguments into regs:
- */
- x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
- x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
- x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
- x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
+ x86_push(p->func, p->outbuf_EBX);
+ x86_push(p->func, p->count_EBP);
+
+/* on non-Win64 x86-64, these are already in the right registers */
+ if(x86_target(p->func) != X86_64_STD_ABI)
+ {
+ x86_push(p->func, p->machine_EDI);
+ x86_push(p->func, p->idx_ESI);
+
+ x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1));
+ x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2));
+ }
+
+ x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3));
+
+ if(x86_target(p->func) != X86_32)
+ x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
+ else
+ x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
/* Load instance ID.
*/
@@ -588,25 +1304,25 @@ static boolean build_vertex_emit( struct translate_sse *p,
p->tmp_EAX,
x86_fn_arg(p->func, 4));
x86_mov(p->func,
- x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
+ x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)),
p->tmp_EAX);
}
/* Get vertex count, compare to zero
*/
x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
- x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
+ x86_cmp(p->func, p->count_EBP, p->tmp_EAX);
fixup = x86_jcc_forward(p->func, cc_E);
/* always load, needed or not:
*/
- init_inputs(p, linear);
+ init_inputs(p, index_size);
/* Note address for loop jump
*/
label = x86_get_label(p->func);
{
- struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
+ struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI);
int last_varient = -1;
struct x86_reg vb;
@@ -618,30 +1334,31 @@ static boolean build_vertex_emit( struct translate_sse *p,
*/
if (varient != last_varient) {
last_varient = varient;
- vb = get_buffer_ptr(p, linear, varient, elt);
+ vb = get_buffer_ptr(p, index_size, varient, elt);
}
if (!translate_attr( p, a,
x86_make_disp(vb, a->input_offset),
- x86_make_disp(p->outbuf_ECX, a->output_offset)))
+ x86_make_disp(p->outbuf_EBX, a->output_offset)))
return FALSE;
}
/* Next output vertex:
*/
+ x64_rexw(p->func);
x86_lea(p->func,
- p->outbuf_ECX,
- x86_make_disp(p->outbuf_ECX,
+ p->outbuf_EBX,
+ x86_make_disp(p->outbuf_EBX,
p->translate.key.output_stride));
/* Incr index
*/
- incr_inputs( p, linear );
+ incr_inputs( p, index_size );
}
/* decr count, loop if not zero
*/
- x86_dec(p->func, p->count_ESI);
+ x86_dec(p->func, p->count_EBP);
x86_jcc(p->func, cc_NZ, label);
/* Exit mmx state?
@@ -656,8 +1373,20 @@ static boolean build_vertex_emit( struct translate_sse *p,
/* Pop regs and return
*/
- x86_pop(p->func, p->count_ESI);
- x86_pop(p->func, p->idx_EBX);
+ if(x86_target(p->func) != X86_64_STD_ABI)
+ {
+ x86_pop(p->func, p->idx_ESI);
+ x86_pop(p->func, p->machine_EDI);
+ }
+
+ x86_pop(p->func, p->count_EBP);
+ x86_pop(p->func, p->outbuf_EBX);
+
+ if(x86_target(p->func) == X86_64_WIN64_ABI)
+ {
+ sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8));
+ sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24));
+ }
x86_ret(p->func);
return TRUE;
@@ -697,37 +1426,7 @@ static void translate_sse_release( struct translate *translate )
x86_release_func( &p->linear_func );
x86_release_func( &p->elt_func );
- FREE(p);
-}
-
-static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- unsigned instance_id,
- void *output_buffer )
-{
- struct translate_sse *p = (struct translate_sse *)translate;
-
- p->gen_run_elts( translate,
- elts,
- count,
- instance_id,
- output_buffer);
-}
-
-static void PIPE_CDECL translate_sse_run( struct translate *translate,
- unsigned start,
- unsigned count,
- unsigned instance_id,
- void *output_buffer )
-{
- struct translate_sse *p = (struct translate_sse *)translate;
-
- p->gen_run( translate,
- start,
- count,
- instance_id,
- output_buffer);
+ os_free_aligned(p);
}
@@ -736,18 +1435,19 @@ struct translate *translate_sse2_create( const struct translate_key *key )
struct translate_sse *p = NULL;
unsigned i;
- if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
+ /* this is misnamed, it actually refers to whether rtasm is enabled or not */
+ if (!rtasm_cpu_has_sse())
goto fail;
- p = CALLOC_STRUCT( translate_sse );
+ p = os_malloc_aligned(sizeof(struct translate_sse), 16);
if (p == NULL)
goto fail;
+ memset(p, 0, sizeof(*p));
+ memcpy(p->consts, consts, sizeof(consts));
p->translate.key = *key;
p->translate.release = translate_sse_release;
p->translate.set_buffer = translate_sse_set_buffer;
- p->translate.run_elts = translate_sse_run_elts;
- p->translate.run = translate_sse_run;
for (i = 0; i < key->nr_elements; i++) {
if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
@@ -783,18 +1483,32 @@ struct translate *translate_sse2_create( const struct translate_key *key )
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
- if (!build_vertex_emit(p, &p->linear_func, TRUE))
+ if (!build_vertex_emit(p, &p->linear_func, 0))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt_func, 4))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt16_func, 2))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt8_func, 1))
+ goto fail;
+
+ p->translate.run = (void*)x86_get_func(&p->linear_func);
+ if (p->translate.run == NULL)
goto fail;
- if (!build_vertex_emit(p, &p->elt_func, FALSE))
+ p->translate.run_elts = (void*)x86_get_func(&p->elt_func);
+ if (p->translate.run_elts == NULL)
goto fail;
- p->gen_run = (run_func)x86_get_func(&p->linear_func);
- if (p->gen_run == NULL)
+ p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func);
+ if (p->translate.run_elts16 == NULL)
goto fail;
- p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func);
- if (p->gen_run_elts == NULL)
+ p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func);
+ if (p->translate.run_elts8 == NULL)
goto fail;
return &p->translate;
diff --git a/src/gallium/auxiliary/util/u_atomic.h b/src/gallium/auxiliary/util/u_atomic.h
index a1568233906..8434491a421 100644
--- a/src/gallium/auxiliary/util/u_atomic.h
+++ b/src/gallium/auxiliary/util/u_atomic.h
@@ -29,6 +29,8 @@
#define PIPE_ATOMIC_ASM_MSVC_X86
#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86))
#define PIPE_ATOMIC_ASM_GCC_X86
+#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64))
+#define PIPE_ATOMIC_ASM_GCC_X86_64
#elif defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 401)
#define PIPE_ATOMIC_GCC_INTRINSIC
#else
@@ -36,6 +38,51 @@
#endif
+#if defined(PIPE_ATOMIC_ASM_GCC_X86_64)
+#define PIPE_ATOMIC "GCC x86_64 assembly"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define p_atomic_set(_v, _i) (*(_v) = (_i))
+#define p_atomic_read(_v) (*(_v))
+
+static INLINE boolean
+p_atomic_dec_zero(int32_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__("lock; decl %0; sete %1":"+m"(*v), "=qm"(c)
+ ::"memory");
+
+ return c != 0;
+}
+
+static INLINE void
+p_atomic_inc(int32_t *v)
+{
+ __asm__ __volatile__("lock; incl %0":"+m"(*v));
+}
+
+static INLINE void
+p_atomic_dec(int32_t *v)
+{
+ __asm__ __volatile__("lock; decl %0":"+m"(*v));
+}
+
+static INLINE int32_t
+p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new)
+{
+ return __sync_val_compare_and_swap(v, old, _new);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PIPE_ATOMIC_ASM_GCC_X86_64 */
+
#if defined(PIPE_ATOMIC_ASM_GCC_X86)
diff --git a/src/gallium/auxiliary/util/u_bitmask.h b/src/gallium/auxiliary/util/u_bitmask.h
index 87f1110296a..98b85ddecd5 100644
--- a/src/gallium/auxiliary/util/u_bitmask.h
+++ b/src/gallium/auxiliary/util/u_bitmask.h
@@ -36,6 +36,9 @@
#define U_HANDLE_BITMASK_H_
+#include "pipe/p_compiler.h"
+
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index e45310b9bb7..dfb142b9e1c 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -47,8 +47,6 @@
#include "util/u_memory.h"
#include "util/u_sampler.h"
#include "util/u_simple_shaders.h"
-#include "util/u_surface.h"
-#include "util/u_rect.h"
#include "cso_cache/cso_context.h"
@@ -59,15 +57,18 @@ struct blit_state
struct cso_context *cso;
struct pipe_blend_state blend;
- struct pipe_depth_stencil_alpha_state depthstencil;
+ struct pipe_depth_stencil_alpha_state depthstencil_keep;
+ struct pipe_depth_stencil_alpha_state depthstencil_write;
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
struct pipe_vertex_element velem[2];
+ enum pipe_texture_target internal_target;
void *vs;
void *fs[TGSI_WRITEMASK_XYZW + 1];
+ void *fs_depth;
struct pipe_resource *vbuf; /**< quad vertices */
unsigned vbuf_slot;
@@ -98,12 +99,15 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->blend.rt[0].colormask = PIPE_MASK_RGBA;
/* no-op depth/stencil/alpha */
- memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil));
+ memset(&ctx->depthstencil_keep, 0, sizeof(ctx->depthstencil_keep));
+ memset(&ctx->depthstencil_write, 0, sizeof(ctx->depthstencil_write));
+ ctx->depthstencil_write.depth.enabled = 1;
+ ctx->depthstencil_write.depth.writemask = 1;
+ ctx->depthstencil_write.depth.func = PIPE_FUNC_ALWAYS;
/* rasterizer */
memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
- ctx->rasterizer.front_winding = PIPE_WINDING_CW;
- ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
+ ctx->rasterizer.cull_face = PIPE_FACE_NONE;
ctx->rasterizer.gl_rasterization_rules = 1;
/* samplers */
@@ -114,7 +118,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
ctx->sampler.min_img_filter = 0; /* set later */
ctx->sampler.mag_img_filter = 0; /* set later */
- ctx->sampler.normalized_coords = 1;
/* vertex elements state */
memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
@@ -138,7 +141,8 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
/* fragment shader */
ctx->fs[TGSI_WRITEMASK_XYZW] =
- util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
+ util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR);
ctx->vbuf = NULL;
/* init vertex data that doesn't change */
@@ -148,6 +152,11 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->vertices[i][1][3] = 1.0f; /* q */
}
+ if(pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES))
+ ctx->internal_target = PIPE_TEXTURE_2D;
+ else
+ ctx->internal_target = PIPE_TEXTURE_RECT;
+
return ctx;
}
@@ -167,6 +176,9 @@ util_destroy_blit(struct blit_state *ctx)
if (ctx->fs[i])
pipe->delete_fs_state(pipe, ctx->fs[i]);
+ if (ctx->fs_depth)
+ pipe->delete_fs_state(pipe, ctx->fs_depth);
+
pipe_resource_reference(&ctx->vbuf, NULL);
FREE(ctx);
@@ -192,7 +204,6 @@ get_next_slot( struct blit_state *ctx )
return ctx->vbuf_slot++ * sizeof ctx->vertices;
}
-
@@ -275,14 +286,15 @@ regions_overlap(int srcX0, int srcY0,
* \param writemask controls which channels in the dest surface are sourced
* from the src surface. Disabled channels are sourced
* from (0,0,0,1).
- * XXX need some control over blitting Z and/or stencil.
+ * XXX need some control over blitting stencil.
*/
void
util_blit_pixels_writemask(struct blit_state *ctx,
- struct pipe_surface *src,
- struct pipe_sampler_view *src_sampler_view,
+ struct pipe_resource *src_tex,
+ struct pipe_subresource srcsub,
int srcX0, int srcY0,
int srcX1, int srcY1,
+ int srcZ0,
struct pipe_surface *dst,
int dstX0, int dstY0,
int dstX1, int dstY1,
@@ -292,23 +304,25 @@ util_blit_pixels_writemask(struct blit_state *ctx,
struct pipe_context *pipe = ctx->pipe;
struct pipe_screen *screen = pipe->screen;
struct pipe_sampler_view *sampler_view = NULL;
+ struct pipe_sampler_view sv_templ;
struct pipe_framebuffer_state fb;
const int srcW = abs(srcX1 - srcX0);
const int srcH = abs(srcY1 - srcY0);
unsigned offset;
- boolean overlap;
+ boolean overlap, dst_is_depth;
float s0, t0, s1, t1;
+ boolean normalized;
assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
filter == PIPE_TEX_MIPFILTER_LINEAR);
- assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D,
- PIPE_BIND_SAMPLER_VIEW, 0));
- assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
- PIPE_BIND_RENDER_TARGET, 0));
+ assert(srcsub.level <= src_tex->last_level);
/* do the regions overlap? */
- overlap = util_same_surface(src, dst) &&
+ overlap = src_tex == dst->texture &&
+ dst->face == srcsub.face &&
+ dst->level == srcsub.level &&
+ dst->zslice == srcZ0 &&
regions_overlap(srcX0, srcY0, srcX1, srcY1,
dstX0, dstY0, dstX1, dstY1);
@@ -317,8 +331,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
* no overlapping.
* Filter mode should not matter since there's no stretching.
*/
- if (pipe->surface_copy &&
- dst->format == src->format &&
+ if (dst->format == src_tex->format &&
srcX0 < srcX1 &&
dstX0 < dstX1 &&
srcY0 < srcY1 &&
@@ -326,29 +339,36 @@ util_blit_pixels_writemask(struct blit_state *ctx,
(dstX1 - dstX0) == (srcX1 - srcX0) &&
(dstY1 - dstY0) == (srcY1 - srcY0) &&
!overlap) {
- pipe->surface_copy(pipe,
- dst, dstX0, dstY0, /* dest */
- src, srcX0, srcY0, /* src */
- srcW, srcH); /* size */
+ struct pipe_subresource subdst;
+ subdst.face = dst->face;
+ subdst.level = dst->level;
+ pipe->resource_copy_region(pipe,
+ dst->texture, subdst,
+ dstX0, dstY0, dst->zslice,/* dest */
+ src_tex, srcsub,
+ srcX0, srcY0, srcZ0,/* src */
+ srcW, srcH); /* size */
return;
}
-
- assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
- PIPE_BIND_RENDER_TARGET, 0));
/* Create a temporary texture when src and dest alias or when src
- * is anything other than a single-level 2d texture.
+ * is anything other than a 2d texture.
+ * XXX should just use appropriate shader to access 1d / 3d slice / cube face,
+ * much like the u_blitter code does (should be pretty trivial).
*
* This can still be improved upon.
*/
- if (util_same_surface(src, dst) ||
- src->texture->target != PIPE_TEXTURE_2D ||
- src->texture->last_level != 0)
+ if ((src_tex == dst->texture &&
+ dst->face == srcsub.face &&
+ dst->level == srcsub.level &&
+ dst->zslice == srcZ0) ||
+ (src_tex->target != PIPE_TEXTURE_2D &&
+ src_tex->target != PIPE_TEXTURE_RECT))
{
struct pipe_resource texTemp;
struct pipe_resource *tex;
struct pipe_sampler_view sv_templ;
- struct pipe_surface *texSurf;
+ struct pipe_subresource texsub;
const int srcLeft = MIN2(srcX0, srcX1);
const int srcTop = MIN2(srcY0, srcY1);
@@ -368,8 +388,8 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* create temp texture */
memset(&texTemp, 0, sizeof(texTemp));
- texTemp.target = PIPE_TEXTURE_2D;
- texTemp.format = src->format;
+ texTemp.target = ctx->internal_target;
+ texTemp.format = src_tex->format;
texTemp.last_level = 0;
texTemp.width0 = srcW;
texTemp.height0 = srcH;
@@ -380,50 +400,69 @@ util_blit_pixels_writemask(struct blit_state *ctx,
if (!tex)
return;
+ texsub.face = 0;
+ texsub.level = 0;
+ /* load temp texture */
+ pipe->resource_copy_region(pipe,
+ tex, texsub, 0, 0, 0, /* dest */
+ src_tex, srcsub, srcLeft, srcTop, srcZ0, /* src */
+ srcW, srcH); /* size */
+
+ normalized = tex->target != PIPE_TEXTURE_RECT;
+ if(normalized) {
+ s0 = 0.0f;
+ s1 = 1.0f;
+ t0 = 0.0f;
+ t1 = 1.0f;
+ }
+ else {
+ s0 = 0;
+ s1 = srcW;
+ t0 = 0;
+ t1 = srcH;
+ }
+
u_sampler_view_default_template(&sv_templ, tex, tex->format);
+ sampler_view = pipe->create_sampler_view(pipe, tex, &sv_templ);
- sampler_view = ctx->pipe->create_sampler_view(ctx->pipe, tex, &sv_templ);
if (!sampler_view) {
pipe_resource_reference(&tex, NULL);
return;
}
-
- texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0,
- PIPE_BIND_BLIT_DESTINATION);
-
- /* load temp texture */
- if (pipe->surface_copy) {
- pipe->surface_copy(pipe,
- texSurf, 0, 0, /* dest */
- src, srcLeft, srcTop, /* src */
- srcW, srcH); /* size */
- } else {
- util_surface_copy(pipe, FALSE,
- texSurf, 0, 0, /* dest */
- src, srcLeft, srcTop, /* src */
- srcW, srcH); /* size */
- }
-
- /* free the surface, update the texture if necessary.
- */
- pipe_surface_reference(&texSurf, NULL);
- s0 = 0.0f;
- s1 = 1.0f;
- t0 = 0.0f;
- t1 = 1.0f;
-
pipe_resource_reference(&tex, NULL);
}
else {
- pipe_sampler_view_reference(&sampler_view, src_sampler_view);
- s0 = srcX0 / (float)src->texture->width0;
- s1 = srcX1 / (float)src->texture->width0;
- t0 = srcY0 / (float)src->texture->height0;
- t1 = srcY1 / (float)src->texture->height0;
+ u_sampler_view_default_template(&sv_templ, src_tex, src_tex->format);
+ sv_templ.first_level = sv_templ.last_level = srcsub.level;
+ sampler_view = pipe->create_sampler_view(pipe, src_tex, &sv_templ);
+
+ if (!sampler_view) {
+ return;
+ }
+
+ s0 = srcX0;
+ s1 = srcX1;
+ t0 = srcY0;
+ t1 = srcY1;
+ normalized = sampler_view->texture->target != PIPE_TEXTURE_RECT;
+ if(normalized)
+ {
+ s0 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
+ s1 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
+ t0 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+ t1 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+ }
}
-
+ dst_is_depth = util_format_is_depth_or_stencil(dst->format);
+ assert(screen->is_format_supported(screen, sampler_view->format, ctx->internal_target,
+ sampler_view->texture->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW, 0));
+ assert(screen->is_format_supported(screen, dst->format, ctx->internal_target,
+ dst->texture->nr_samples,
+ dst_is_depth ? PIPE_BIND_DEPTH_STENCIL :
+ PIPE_BIND_RENDER_TARGET, 0));
/* save state (restored below) */
cso_save_blend(ctx->cso);
cso_save_depth_stencil_alpha(ctx->cso);
@@ -439,14 +478,20 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
- cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
+ cso_set_depth_stencil_alpha(ctx->cso,
+ dst_is_depth ? &ctx->depthstencil_write :
+ &ctx->depthstencil_keep);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
+ ctx->sampler.normalized_coords = normalized;
ctx->sampler.min_img_filter = filter;
ctx->sampler.mag_img_filter = filter;
+ /* we've limited this already with the sampler view but you never know... */
+ ctx->sampler.min_lod = srcsub.level;
+ ctx->sampler.max_lod = srcsub.level;
cso_single_sampler(ctx->cso, 0, &ctx->sampler);
cso_single_sampler_done(ctx->cso);
@@ -464,21 +509,35 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* texture */
cso_set_fragment_sampler_views(ctx->cso, 1, &sampler_view);
- if (ctx->fs[writemask] == NULL)
- ctx->fs[writemask] =
- util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
- writemask);
-
/* shaders */
- cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+ if (dst_is_depth) {
+ if (ctx->fs_depth == NULL)
+ ctx->fs_depth =
+ util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR);
+
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth);
+ } else {
+ if (ctx->fs[writemask] == NULL)
+ ctx->fs[writemask] =
+ util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR,
+ writemask);
+
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+ }
cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
/* drawing dest */
memset(&fb, 0, sizeof(fb));
fb.width = dst->width;
fb.height = dst->height;
- fb.nr_cbufs = 1;
- fb.cbufs[0] = dst;
+ if (dst_is_depth) {
+ fb.zsbuf = dst;
+ } else {
+ fb.nr_cbufs = 1;
+ fb.cbufs[0] = dst;
+ }
cso_set_framebuffer(ctx->cso, &fb);
/* draw quad */
@@ -515,18 +574,21 @@ util_blit_pixels_writemask(struct blit_state *ctx,
void
util_blit_pixels(struct blit_state *ctx,
- struct pipe_surface *src,
- struct pipe_sampler_view *src_sampler_view,
+ struct pipe_resource *src_tex,
+ struct pipe_subresource srcsub,
int srcX0, int srcY0,
int srcX1, int srcY1,
+ int srcZ,
struct pipe_surface *dst,
int dstX0, int dstY0,
int dstX1, int dstY1,
float z, uint filter )
{
- util_blit_pixels_writemask( ctx, src, src_sampler_view,
+ util_blit_pixels_writemask( ctx, src_tex,
+ srcsub,
srcX0, srcY0,
srcX1, srcY1,
+ srcZ,
dst,
dstX0, dstY0,
dstX1, dstY1,
@@ -548,7 +610,6 @@ void util_blit_flush( struct blit_state *ctx )
/**
* Copy pixel block from src texture to dst surface.
- * Overlapping regions are acceptable.
*
* XXX Should support selection of level.
* XXX need some control over blitting Z and/or stencil.
@@ -563,6 +624,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
int dstX1, int dstY1,
float z, uint filter)
{
+ boolean normalized = src_sampler_view->texture->target != PIPE_TEXTURE_RECT;
struct pipe_framebuffer_state fb;
float s0, t0, s1, t1;
unsigned offset;
@@ -575,13 +637,22 @@ util_blit_pixels_tex(struct blit_state *ctx,
assert(tex->width0 != 0);
assert(tex->height0 != 0);
- s0 = srcX0 / (float)tex->width0;
- s1 = srcX1 / (float)tex->width0;
- t0 = srcY0 / (float)tex->height0;
- t1 = srcY1 / (float)tex->height0;
+ s0 = srcX0;
+ s1 = srcX1;
+ t0 = srcY0;
+ t1 = srcY1;
+
+ if(normalized)
+ {
+ s0 /= (float)tex->width0;
+ s1 /= (float)tex->width0;
+ t0 /= (float)tex->height0;
+ t1 /= (float)tex->height0;
+ }
assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
PIPE_TEXTURE_2D,
+ dst->texture->nr_samples,
PIPE_BIND_RENDER_TARGET,
0));
@@ -599,12 +670,13 @@ util_blit_pixels_tex(struct blit_state *ctx,
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
- cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
+ cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil_keep);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
+ ctx->sampler.normalized_coords = normalized;
ctx->sampler.min_img_filter = filter;
ctx->sampler.mag_img_filter = filter;
cso_single_sampler(ctx->cso, 0, &ctx->sampler);
diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h
index 464ff9aaced..b8a0dfce13f 100644
--- a/src/gallium/auxiliary/util/u_blit.h
+++ b/src/gallium/auxiliary/util/u_blit.h
@@ -30,18 +30,20 @@
#define U_BLIT_H
+#include "pipe/p_compiler.h"
+
+
#ifdef __cplusplus
extern "C" {
#endif
+struct cso_context;
struct pipe_context;
-struct pipe_surface;
struct pipe_resource;
-struct cso_context;
-
-
-struct blit_state;
+struct pipe_sampler_view;
+struct pipe_subresource;
+struct pipe_surface;
extern struct blit_state *
@@ -52,10 +54,11 @@ util_destroy_blit(struct blit_state *ctx);
extern void
util_blit_pixels(struct blit_state *ctx,
- struct pipe_surface *src,
- struct pipe_sampler_view *src_sampler_view,
+ struct pipe_resource *src_tex,
+ struct pipe_subresource srcsub,
int srcX0, int srcY0,
int srcX1, int srcY1,
+ int srcZ0,
struct pipe_surface *dst,
int dstX0, int dstY0,
int dstX1, int dstY1,
@@ -63,10 +66,11 @@ util_blit_pixels(struct blit_state *ctx,
void
util_blit_pixels_writemask(struct blit_state *ctx,
- struct pipe_surface *src,
- struct pipe_sampler_view *src_sampler_view,
+ struct pipe_resource *src_tex,
+ struct pipe_subresource srcsub,
int srcX0, int srcY0,
int srcX1, int srcY1,
+ int srcZ0,
struct pipe_surface *dst,
int dstX0, int dstY0,
int dstX1, int dstY1,
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 956aedc8a15..a163f93cb82 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -26,8 +26,8 @@
/**
* @file
- * Blitter utility to facilitate acceleration of the clear, surface_copy,
- * and surface_fill functions.
+ * Blitter utility to facilitate acceleration of the clear, clear_render_target, clear_depth_stencil
+ * resource_copy_region functions.
*
* @author Marek Olšák
*/
@@ -43,19 +43,17 @@
#include "util/u_math.h"
#include "util/u_blitter.h"
#include "util/u_draw_quad.h"
-#include "util/u_pack_color.h"
-#include "util/u_rect.h"
#include "util/u_sampler.h"
#include "util/u_simple_shaders.h"
+#include "util/u_surface.h"
#include "util/u_texture.h"
#define INVALID_PTR ((void*)~0)
struct blitter_context_priv
{
- struct blitter_context blitter;
+ struct blitter_context base;
- struct pipe_context *pipe; /**< pipe context */
struct pipe_resource *vbuf; /**< quad */
float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */
@@ -69,8 +67,8 @@ struct blitter_context_priv
void *vs_tex; /**< Vertex shader which passes {pos, texcoord} to the output.*/
/* Fragment shaders. */
- /* FS which outputs a color to multiple color buffers. */
- void *fs_col[PIPE_MAX_COLOR_BUFS];
+ /* The shader at index i outputs color to color buffers 0,1,...,i-1. */
+ void *fs_col[PIPE_MAX_COLOR_BUFS+1];
/* FS which outputs a color from a texture,
where the index is PIPE_TEXTURE_* to be sampled. */
@@ -88,24 +86,36 @@ struct blitter_context_priv
void *dsa_write_depth_stencil;
void *dsa_write_depth_keep_stencil;
void *dsa_keep_depth_stencil;
+ void *dsa_keep_depth_write_stencil;
+ void *dsa_flush_depth_stencil;
void *velem_state;
/* Sampler state for clamping to a miplevel. */
- void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
+ void *sampler_state[PIPE_MAX_TEXTURE_LEVELS * 2];
/* Rasterizer state. */
void *rs_state;
- struct pipe_sampler_view *sampler_view;
-
/* Viewport state. */
struct pipe_viewport_state viewport;
/* Clip state. */
struct pipe_clip_state clip;
+
+ /* Destination surface dimensions. */
+ unsigned dst_width;
+ unsigned dst_height;
};
+static void blitter_draw_rectangle(struct blitter_context *blitter,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4]);
+
+
struct blitter_context *util_blitter_create(struct pipe_context *pipe)
{
struct blitter_context_priv *ctx;
@@ -120,19 +130,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
if (!ctx)
return NULL;
- ctx->pipe = pipe;
+ ctx->base.pipe = pipe;
+ ctx->base.draw_rectangle = blitter_draw_rectangle;
/* init state objects for them to be considered invalid */
- ctx->blitter.saved_blend_state = INVALID_PTR;
- ctx->blitter.saved_dsa_state = INVALID_PTR;
- ctx->blitter.saved_rs_state = INVALID_PTR;
- ctx->blitter.saved_fs = INVALID_PTR;
- ctx->blitter.saved_vs = INVALID_PTR;
- ctx->blitter.saved_velem_state = INVALID_PTR;
- ctx->blitter.saved_fb_state.nr_cbufs = ~0;
- ctx->blitter.saved_num_sampler_views = ~0;
- ctx->blitter.saved_num_sampler_states = ~0;
- ctx->blitter.saved_num_vertex_buffers = ~0;
+ ctx->base.saved_blend_state = INVALID_PTR;
+ ctx->base.saved_dsa_state = INVALID_PTR;
+ ctx->base.saved_rs_state = INVALID_PTR;
+ ctx->base.saved_fs = INVALID_PTR;
+ ctx->base.saved_vs = INVALID_PTR;
+ ctx->base.saved_velem_state = INVALID_PTR;
+ ctx->base.saved_fb_state.nr_cbufs = ~0;
+ ctx->base.saved_num_sampler_views = ~0;
+ ctx->base.saved_num_sampler_states = ~0;
+ ctx->base.saved_num_vertex_buffers = ~0;
/* blend state objects */
memset(&blend, 0, sizeof(blend));
@@ -146,6 +157,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
ctx->dsa_keep_depth_stencil =
pipe->create_depth_stencil_alpha_state(pipe, &dsa);
+ dsa.depth.writemask = 1;
+ ctx->dsa_flush_depth_stencil =
+ pipe->create_depth_stencil_alpha_state(pipe, &dsa);
+
dsa.depth.enabled = 1;
dsa.depth.writemask = 1;
dsa.depth.func = PIPE_FUNC_ALWAYS;
@@ -161,8 +176,12 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
dsa.stencil[0].writemask = 0xff;
ctx->dsa_write_depth_stencil =
pipe->create_depth_stencil_alpha_state(pipe, &dsa);
- /* The DSA state objects which write depth and stencil are created
- * on-demand. */
+
+
+ dsa.depth.enabled = 0;
+ dsa.depth.writemask = 0;
+ ctx->dsa_keep_depth_write_stencil =
+ pipe->create_depth_stencil_alpha_state(pipe, &dsa);
/* sampler state */
sampler_state = &ctx->template_sampler_state;
@@ -175,8 +194,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
/* rasterizer state */
memset(&rs_state, 0, sizeof(rs_state));
- rs_state.front_winding = PIPE_WINDING_CW;
- rs_state.cull_mode = PIPE_WINDING_NONE;
+ rs_state.cull_face = PIPE_FACE_NONE;
rs_state.gl_rasterization_rules = 1;
rs_state.flatshade = 1;
ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
@@ -216,17 +234,17 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
ctx->vertices[i][0][3] = 1; /*v.w*/
/* create the vertex buffer */
- ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
+ ctx->vbuf = pipe_buffer_create(ctx->base.pipe->screen,
PIPE_BIND_VERTEX_BUFFER,
sizeof(ctx->vertices));
- return &ctx->blitter;
+ return &ctx->base;
}
void util_blitter_destroy(struct blitter_context *blitter)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = blitter->pipe;
int i;
pipe->delete_blend_state(pipe, ctx->blend_write_color);
@@ -235,6 +253,8 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_depth_stencil_alpha_state(pipe,
ctx->dsa_write_depth_keep_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
+ pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
+ pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
pipe->delete_rasterizer_state(pipe, ctx->rs_state);
pipe->delete_vs_state(pipe, ctx->vs_col);
@@ -248,18 +268,14 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
}
- for (i = 0; i < PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++)
+ for (i = 0; i <= PIPE_MAX_COLOR_BUFS; i++)
if (ctx->fs_col[i])
pipe->delete_fs_state(pipe, ctx->fs_col[i]);
- for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+ for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS * 2; i++)
if (ctx->sampler_state[i])
pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
- if (ctx->sampler_view) {
- pipe_sampler_view_reference(&ctx->sampler_view, NULL);
- }
-
pipe_resource_reference(&ctx->vbuf, NULL);
FREE(ctx);
}
@@ -267,104 +283,117 @@ void util_blitter_destroy(struct blitter_context *blitter)
static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
{
/* make sure these CSOs have been saved */
- assert(ctx->blitter.saved_blend_state != INVALID_PTR &&
- ctx->blitter.saved_dsa_state != INVALID_PTR &&
- ctx->blitter.saved_rs_state != INVALID_PTR &&
- ctx->blitter.saved_fs != INVALID_PTR &&
- ctx->blitter.saved_vs != INVALID_PTR &&
- ctx->blitter.saved_velem_state != INVALID_PTR);
+ assert(ctx->base.saved_blend_state != INVALID_PTR &&
+ ctx->base.saved_dsa_state != INVALID_PTR &&
+ ctx->base.saved_rs_state != INVALID_PTR &&
+ ctx->base.saved_fs != INVALID_PTR &&
+ ctx->base.saved_vs != INVALID_PTR &&
+ ctx->base.saved_velem_state != INVALID_PTR);
}
static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
+ unsigned i;
/* restore the state objects which are always required to be saved */
- pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state);
- pipe->bind_depth_stencil_alpha_state(pipe, ctx->blitter.saved_dsa_state);
- pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
- pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
- pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
- pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state);
+ pipe->bind_blend_state(pipe, ctx->base.saved_blend_state);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state);
+ pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state);
+ pipe->bind_fs_state(pipe, ctx->base.saved_fs);
+ pipe->bind_vs_state(pipe, ctx->base.saved_vs);
+ pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state);
- ctx->blitter.saved_blend_state = INVALID_PTR;
- ctx->blitter.saved_dsa_state = INVALID_PTR;
- ctx->blitter.saved_rs_state = INVALID_PTR;
- ctx->blitter.saved_fs = INVALID_PTR;
- ctx->blitter.saved_vs = INVALID_PTR;
- ctx->blitter.saved_velem_state = INVALID_PTR;
+ ctx->base.saved_blend_state = INVALID_PTR;
+ ctx->base.saved_dsa_state = INVALID_PTR;
+ ctx->base.saved_rs_state = INVALID_PTR;
+ ctx->base.saved_fs = INVALID_PTR;
+ ctx->base.saved_vs = INVALID_PTR;
+ ctx->base.saved_velem_state = INVALID_PTR;
- pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref);
+ pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref);
- pipe->set_viewport_state(pipe, &ctx->blitter.saved_viewport);
- pipe->set_clip_state(pipe, &ctx->blitter.saved_clip);
+ pipe->set_viewport_state(pipe, &ctx->base.saved_viewport);
+ pipe->set_clip_state(pipe, &ctx->base.saved_clip);
/* restore the state objects which are required to be saved before copy/fill
*/
- if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) {
- pipe->set_framebuffer_state(pipe, &ctx->blitter.saved_fb_state);
- ctx->blitter.saved_fb_state.nr_cbufs = ~0;
+ if (ctx->base.saved_fb_state.nr_cbufs != ~0) {
+ pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state);
+ util_unreference_framebuffer_state(&ctx->base.saved_fb_state);
+ ctx->base.saved_fb_state.nr_cbufs = ~0;
}
- if (ctx->blitter.saved_num_sampler_states != ~0) {
+ if (ctx->base.saved_num_sampler_states != ~0) {
pipe->bind_fragment_sampler_states(pipe,
- ctx->blitter.saved_num_sampler_states,
- ctx->blitter.saved_sampler_states);
- ctx->blitter.saved_num_sampler_states = ~0;
+ ctx->base.saved_num_sampler_states,
+ ctx->base.saved_sampler_states);
+ ctx->base.saved_num_sampler_states = ~0;
}
- if (ctx->blitter.saved_num_sampler_views != ~0) {
+ if (ctx->base.saved_num_sampler_views != ~0) {
pipe->set_fragment_sampler_views(pipe,
- ctx->blitter.saved_num_sampler_views,
- ctx->blitter.saved_sampler_views);
- ctx->blitter.saved_num_sampler_views = ~0;
+ ctx->base.saved_num_sampler_views,
+ ctx->base.saved_sampler_views);
+
+ for (i = 0; i < ctx->base.saved_num_sampler_views; i++)
+ pipe_sampler_view_reference(&ctx->base.saved_sampler_views[i],
+ NULL);
+
+ ctx->base.saved_num_sampler_views = ~0;
}
- if (ctx->blitter.saved_num_vertex_buffers != ~0) {
+ if (ctx->base.saved_num_vertex_buffers != ~0) {
pipe->set_vertex_buffers(pipe,
- ctx->blitter.saved_num_vertex_buffers,
- ctx->blitter.saved_vertex_buffers);
- ctx->blitter.saved_num_vertex_buffers = ~0;
+ ctx->base.saved_num_vertex_buffers,
+ ctx->base.saved_vertex_buffers);
+
+ for (i = 0; i < ctx->base.saved_num_vertex_buffers; i++) {
+ if (ctx->base.saved_vertex_buffers[i].buffer) {
+ pipe_resource_reference(&ctx->base.saved_vertex_buffers[i].buffer,
+ NULL);
+ }
+ }
+ ctx->base.saved_num_vertex_buffers = ~0;
}
}
static void blitter_set_rectangle(struct blitter_context_priv *ctx,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2,
- unsigned width, unsigned height,
float depth)
{
int i;
/* set vertex positions */
- ctx->vertices[0][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v0.x*/
- ctx->vertices[0][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v0.y*/
+ ctx->vertices[0][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v0.x*/
+ ctx->vertices[0][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v0.y*/
- ctx->vertices[1][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v1.x*/
- ctx->vertices[1][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v1.y*/
+ ctx->vertices[1][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v1.x*/
+ ctx->vertices[1][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v1.y*/
- ctx->vertices[2][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v2.x*/
- ctx->vertices[2][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v2.y*/
+ ctx->vertices[2][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v2.x*/
+ ctx->vertices[2][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v2.y*/
- ctx->vertices[3][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v3.x*/
- ctx->vertices[3][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v3.y*/
+ ctx->vertices[3][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v3.x*/
+ ctx->vertices[3][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v3.y*/
for (i = 0; i < 4; i++)
ctx->vertices[i][0][2] = depth; /*z*/
/* viewport */
- ctx->viewport.scale[0] = 0.5f * width;
- ctx->viewport.scale[1] = 0.5f * height;
+ ctx->viewport.scale[0] = 0.5f * ctx->dst_width;
+ ctx->viewport.scale[1] = 0.5f * ctx->dst_height;
ctx->viewport.scale[2] = 1.0f;
ctx->viewport.scale[3] = 1.0f;
- ctx->viewport.translate[0] = 0.5f * width;
- ctx->viewport.translate[1] = 0.5f * height;
+ ctx->viewport.translate[0] = 0.5f * ctx->dst_width;
+ ctx->viewport.translate[1] = 0.5f * ctx->dst_height;
ctx->viewport.translate[2] = 0.0f;
ctx->viewport.translate[3] = 0.0f;
- ctx->pipe->set_viewport_state(ctx->pipe, &ctx->viewport);
+ ctx->base.pipe->set_viewport_state(ctx->base.pipe, &ctx->viewport);
/* clip */
- ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip);
+ ctx->base.pipe->set_clip_state(ctx->base.pipe, &ctx->clip);
}
static void blitter_set_clear_color(struct blitter_context_priv *ctx,
@@ -372,36 +401,72 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx,
{
int i;
- for (i = 0; i < 4; i++) {
- ctx->vertices[i][1][0] = rgba[0];
- ctx->vertices[i][1][1] = rgba[1];
- ctx->vertices[i][1][2] = rgba[2];
- ctx->vertices[i][1][3] = rgba[3];
+ if (rgba) {
+ for (i = 0; i < 4; i++) {
+ ctx->vertices[i][1][0] = rgba[0];
+ ctx->vertices[i][1][1] = rgba[1];
+ ctx->vertices[i][1][2] = rgba[2];
+ ctx->vertices[i][1][3] = rgba[3];
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ ctx->vertices[i][1][0] = 0;
+ ctx->vertices[i][1][1] = 0;
+ ctx->vertices[i][1][2] = 0;
+ ctx->vertices[i][1][3] = 0;
+ }
}
}
-static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
- struct pipe_surface *surf,
+static void get_texcoords(struct pipe_resource *src,
+ struct pipe_subresource subsrc,
unsigned x1, unsigned y1,
- unsigned x2, unsigned y2)
+ unsigned x2, unsigned y2,
+ boolean normalized, float out[4])
{
- int i;
- float s1 = x1 / (float)surf->width;
- float t1 = y1 / (float)surf->height;
- float s2 = x2 / (float)surf->width;
- float t2 = y2 / (float)surf->height;
-
- ctx->vertices[0][1][0] = s1; /*t0.s*/
- ctx->vertices[0][1][1] = t1; /*t0.t*/
+ if(normalized)
+ {
+ out[0] = x1 / (float)u_minify(src->width0, subsrc.level);
+ out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
+ out[2] = x2 / (float)u_minify(src->width0, subsrc.level);
+ out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
+ }
+ else
+ {
+ out[0] = x1;
+ out[1] = y1;
+ out[2] = x2;
+ out[3] = y2;
+ }
+}
- ctx->vertices[1][1][0] = s2; /*t1.s*/
- ctx->vertices[1][1][1] = t1; /*t1.t*/
+static void set_texcoords_in_vertices(const float coord[4],
+ float *out, unsigned stride)
+{
+ out[0] = coord[0]; /*t0.s*/
+ out[1] = coord[1]; /*t0.t*/
+ out += stride;
+ out[0] = coord[2]; /*t1.s*/
+ out[1] = coord[1]; /*t1.t*/
+ out += stride;
+ out[0] = coord[2]; /*t2.s*/
+ out[1] = coord[3]; /*t2.t*/
+ out += stride;
+ out[0] = coord[0]; /*t3.s*/
+ out[1] = coord[3]; /*t3.t*/
+}
- ctx->vertices[2][1][0] = s2; /*t2.s*/
- ctx->vertices[2][1][1] = t2; /*t2.t*/
+static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2)
+{
+ unsigned i;
+ float coord[4];
- ctx->vertices[3][1][0] = s1; /*t3.s*/
- ctx->vertices[3][1][1] = t2; /*t3.t*/
+ get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
+ set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8);
for (i = 0; i < 4; i++) {
ctx->vertices[i][1][2] = 0; /*r*/
@@ -410,42 +475,35 @@ static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
}
static void blitter_set_texcoords_3d(struct blitter_context_priv *ctx,
- struct pipe_surface *surf,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned zslice,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2)
{
int i;
- float depth = u_minify(surf->texture->depth0, surf->level);
- float r = surf->zslice / depth;
+ float r = zslice / (float)u_minify(src->depth0, subsrc.level);
- blitter_set_texcoords_2d(ctx, surf, x1, y1, x2, y2);
+ blitter_set_texcoords_2d(ctx, src, subsrc, x1, y1, x2, y2);
for (i = 0; i < 4; i++)
ctx->vertices[i][1][2] = r; /*r*/
}
static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
- struct pipe_surface *surf,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2)
{
int i;
- float s1 = x1 / (float)surf->width;
- float t1 = y1 / (float)surf->height;
- float s2 = x2 / (float)surf->width;
- float t2 = y2 / (float)surf->height;
+ float coord[4];
float st[4][2];
- st[0][0] = s1;
- st[0][1] = t1;
- st[1][0] = s2;
- st[1][1] = t1;
- st[2][0] = s2;
- st[2][1] = t2;
- st[3][0] = s1;
- st[3][1] = t2;
+ get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
+ set_texcoords_in_vertices(coord, &st[0][0], 2);
- util_map_texcoords2d_onto_cubemap(surf->face,
+ util_map_texcoords2d_onto_cubemap(subsrc.face,
/* pointer, stride in floats */
&st[0][0], 2,
&ctx->vertices[0][1][0], 8);
@@ -454,9 +512,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
ctx->vertices[i][1][3] = 1; /*q*/
}
+static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx,
+ unsigned width, unsigned height)
+{
+ ctx->dst_width = width;
+ ctx->dst_height = height;
+}
+
static void blitter_draw_quad(struct blitter_context_priv *ctx)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
/* write vertices and draw them */
pipe_buffer_write(pipe, ctx->vbuf,
@@ -469,72 +534,79 @@ static void blitter_draw_quad(struct blitter_context_priv *ctx)
static INLINE
void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
- int miplevel)
+ int miplevel, boolean normalized)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
/* Create the sampler state on-demand. */
- if (!ctx->sampler_state[miplevel]) {
+ if (!ctx->sampler_state[miplevel * 2 + normalized]) {
sampler_state->lod_bias = miplevel;
sampler_state->min_lod = miplevel;
sampler_state->max_lod = miplevel;
+ sampler_state->normalized_coords = normalized;
- ctx->sampler_state[miplevel] = pipe->create_sampler_state(pipe,
+ ctx->sampler_state[miplevel * 2 + normalized] = pipe->create_sampler_state(pipe,
sampler_state);
}
/* Return void** so that it can be passed to bind_fragment_sampler_states
* directly. */
- return &ctx->sampler_state[miplevel];
+ return &ctx->sampler_state[miplevel * 2 + normalized];
}
static INLINE
void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs)
{
- struct pipe_context *pipe = ctx->pipe;
- unsigned index = num_cbufs ? num_cbufs - 1 : 0;
+ struct pipe_context *pipe = ctx->base.pipe;
assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
- if (!ctx->fs_col[index])
- ctx->fs_col[index] =
+ if (!ctx->fs_col[num_cbufs])
+ ctx->fs_col[num_cbufs] =
util_make_fragment_clonecolor_shader(pipe, num_cbufs);
- return ctx->fs_col[index];
+ return ctx->fs_col[num_cbufs];
}
+/** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */
+static unsigned
+pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target)
+{
+ switch (pipe_tex_target) {
+ case PIPE_TEXTURE_1D:
+ return TGSI_TEXTURE_1D;
+ case PIPE_TEXTURE_2D:
+ return TGSI_TEXTURE_2D;
+ case PIPE_TEXTURE_RECT:
+ return TGSI_TEXTURE_RECT;
+ case PIPE_TEXTURE_3D:
+ return TGSI_TEXTURE_3D;
+ case PIPE_TEXTURE_CUBE:
+ return TGSI_TEXTURE_CUBE;
+ default:
+ assert(0 && "unexpected texture target");
+ return TGSI_TEXTURE_UNKNOWN;
+ }
+}
+
+
static INLINE
void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
unsigned tex_target)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
/* Create the fragment shader on-demand. */
if (!ctx->fs_texfetch_col[tex_target]) {
- switch (tex_target) {
- case PIPE_TEXTURE_1D:
- ctx->fs_texfetch_col[PIPE_TEXTURE_1D] =
- util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D);
- break;
- case PIPE_TEXTURE_2D:
- ctx->fs_texfetch_col[PIPE_TEXTURE_2D] =
- util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
- break;
- case PIPE_TEXTURE_3D:
- ctx->fs_texfetch_col[PIPE_TEXTURE_3D] =
- util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_3D);
- break;
- case PIPE_TEXTURE_CUBE:
- ctx->fs_texfetch_col[PIPE_TEXTURE_CUBE] =
- util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE);
- break;
- default:;
- }
+ unsigned tgsi_tex = pipe_tex_to_tgsi_tex(tex_target);
+
+ ctx->fs_texfetch_col[tex_target] =
+ util_make_fragment_tex_shader(pipe, tgsi_tex, TGSI_INTERPOLATE_LINEAR);
}
return ctx->fs_texfetch_col[tex_target];
@@ -544,36 +616,47 @@ static INLINE
void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
unsigned tex_target)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
/* Create the fragment shader on-demand. */
if (!ctx->fs_texfetch_depth[tex_target]) {
- switch (tex_target) {
- case PIPE_TEXTURE_1D:
- ctx->fs_texfetch_depth[PIPE_TEXTURE_1D] =
- util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_1D);
- break;
- case PIPE_TEXTURE_2D:
- ctx->fs_texfetch_depth[PIPE_TEXTURE_2D] =
- util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D);
- break;
- case PIPE_TEXTURE_3D:
- ctx->fs_texfetch_depth[PIPE_TEXTURE_3D] =
- util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_3D);
- break;
- case PIPE_TEXTURE_CUBE:
- ctx->fs_texfetch_depth[PIPE_TEXTURE_CUBE] =
- util_make_fragment_tex_shader_writedepth(pipe,TGSI_TEXTURE_CUBE);
- break;
- default:;
- }
+ unsigned tgsi_tex = pipe_tex_to_tgsi_tex(tex_target);
+
+ ctx->fs_texfetch_depth[tex_target] =
+ util_make_fragment_tex_shader_writedepth(pipe, tgsi_tex,
+ TGSI_INTERPOLATE_LINEAR);
}
return ctx->fs_texfetch_depth[tex_target];
}
+static void blitter_draw_rectangle(struct blitter_context *blitter,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4])
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+
+ switch (type) {
+ case UTIL_BLITTER_ATTRIB_COLOR:
+ blitter_set_clear_color(ctx, attrib);
+ break;
+
+ case UTIL_BLITTER_ATTRIB_TEXCOORD:
+ set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8);
+ break;
+
+ default:;
+ }
+
+ blitter_set_rectangle(ctx, x1, y1, x2, y2, depth);
+ blitter_draw_quad(ctx);
+}
+
void util_blitter_clear(struct blitter_context *blitter,
unsigned width, unsigned height,
unsigned num_cbufs,
@@ -582,7 +665,7 @@ void util_blitter_clear(struct blitter_context *blitter,
double depth, unsigned stencil)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_stencil_ref sr = { { 0 } };
assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
@@ -595,11 +678,19 @@ void util_blitter_clear(struct blitter_context *blitter,
else
pipe->bind_blend_state(pipe, ctx->blend_keep_color);
- if (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+ if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
sr.ref_value[0] = stencil & 0xff;
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
pipe->set_stencil_ref(pipe, &sr);
}
+ else if (clear_buffers & PIPE_CLEAR_DEPTH) {
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil);
+ }
+ else if (clear_buffers & PIPE_CLEAR_STENCIL) {
+ sr.ref_value[0] = stencil & 0xff;
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
+ pipe->set_stencil_ref(pipe, &sr);
+ }
else
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
@@ -608,244 +699,284 @@ void util_blitter_clear(struct blitter_context *blitter,
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
pipe->bind_vs_state(pipe, ctx->vs_col);
- blitter_set_clear_color(ctx, rgba);
- blitter_set_rectangle(ctx, 0, 0, width, height, width, height, depth);
- blitter_draw_quad(ctx);
+ blitter_set_dst_dimensions(ctx, width, height);
+ blitter->draw_rectangle(blitter, 0, 0, width, height, depth,
+ UTIL_BLITTER_ATTRIB_COLOR, rgba);
blitter_restore_CSOs(ctx);
}
-static boolean
-is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2,
- unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2)
+static
+boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2,
+ unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2)
{
- if (sx1 >= dx2 || sx2 <= dx1 || sy1 >= dy2 || sy2 <= dy1) {
- return FALSE;
- } else {
- return TRUE;
- }
+ return sx1 < dx2 && sx2 > dx1 && sy1 < dy2 && sy2 > dy1;
}
-static void util_blitter_do_copy(struct blitter_context *blitter,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface *src,
- unsigned srcx, unsigned srcy,
- unsigned width, unsigned height,
- boolean is_depth)
+void util_blitter_copy_region(struct blitter_context *blitter,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned srcx, unsigned srcy, unsigned srcz,
+ unsigned width, unsigned height,
+ boolean ignore_stencil)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_surface *dstsurf;
struct pipe_framebuffer_state fb_state;
struct pipe_sampler_view viewTempl, *view;
+ unsigned bind;
+ boolean is_stencil, is_depth;
+ boolean normalized;
+
+ /* Give up if textures are not set. */
+ assert(dst && src);
+ if (!dst || !src)
+ return;
+ /* Sanity checks. */
+ if (dst == src) {
+ assert(!is_overlap(srcx, srcx + width, srcy, srcy + height,
+ dstx, dstx + width, dsty, dsty + height));
+ } else {
+ assert(dst->format == src->format);
+ }
+ assert(src->target < PIPE_MAX_TEXTURE_TYPES);
+
+ /* Is this a ZS format? */
+ is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
+ is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0;
+
+ if (is_depth || is_stencil)
+ bind = PIPE_BIND_DEPTH_STENCIL;
+ else
+ bind = PIPE_BIND_RENDER_TARGET;
+
+ /* Check if we can sample from and render to the surfaces. */
+ /* (assuming copying a stencil buffer is not possible) */
+ if ((!ignore_stencil && is_stencil) ||
+ !screen->is_format_supported(screen, dst->format, dst->target,
+ dst->nr_samples, bind, 0) ||
+ !screen->is_format_supported(screen, src->format, src->target,
+ src->nr_samples, PIPE_BIND_SAMPLER_VIEW, 0)) {
+ util_resource_copy_region(pipe, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height);
+ return;
+ }
+
+ /* Get surfaces. */
+ dstsurf = screen->get_tex_surface(screen, dst,
+ subdst.face, subdst.level, dstz,
+ bind);
+
+ /* Check whether the states are properly saved. */
+ blitter_check_saved_CSOs(ctx);
assert(blitter->saved_fb_state.nr_cbufs != ~0);
assert(blitter->saved_num_sampler_views != ~0);
assert(blitter->saved_num_sampler_states != ~0);
- assert(src->texture->target < PIPE_MAX_TEXTURE_TYPES);
- /* bind CSOs */
- fb_state.width = dst->width;
- fb_state.height = dst->height;
+ /* Initialize framebuffer state. */
+ fb_state.width = dstsurf->width;
+ fb_state.height = dstsurf->height;
if (is_depth) {
pipe->bind_blend_state(pipe, ctx->blend_keep_color);
pipe->bind_depth_stencil_alpha_state(pipe,
ctx->dsa_write_depth_keep_stencil);
pipe->bind_fs_state(pipe,
- blitter_get_fs_texfetch_depth(ctx, src->texture->target));
+ blitter_get_fs_texfetch_depth(ctx, src->target));
fb_state.nr_cbufs = 0;
- fb_state.zsbuf = dst;
+ fb_state.zsbuf = dstsurf;
} else {
pipe->bind_blend_state(pipe, ctx->blend_write_color);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
pipe->bind_fs_state(pipe,
- blitter_get_fs_texfetch_col(ctx, src->texture->target));
+ blitter_get_fs_texfetch_col(ctx, src->target));
fb_state.nr_cbufs = 1;
- fb_state.cbufs[0] = dst;
+ fb_state.cbufs[0] = dstsurf;
fb_state.zsbuf = 0;
}
- u_sampler_view_default_template(&viewTempl,
- src->texture,
- src->texture->format);
- view = pipe->create_sampler_view(pipe,
- src->texture,
- &viewTempl);
+ normalized = src->target != PIPE_TEXTURE_RECT;
- if (ctx->sampler_view) {
- pipe_sampler_view_reference(&ctx->sampler_view, NULL);
- }
- ctx->sampler_view = view;
+ /* Initialize sampler view. */
+ u_sampler_view_default_template(&viewTempl, src, src->format);
+ view = pipe->create_sampler_view(pipe, src, &viewTempl);
+ /* Set rasterizer state, shaders, and textures. */
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_vs_state(pipe, ctx->vs_tex);
pipe->bind_fragment_sampler_states(pipe, 1,
- blitter_get_sampler_state(ctx, src->level));
+ blitter_get_sampler_state(ctx, subsrc.level, normalized));
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
pipe->set_fragment_sampler_views(pipe, 1, &view);
pipe->set_framebuffer_state(pipe, &fb_state);
- /* set texture coordinates */
- switch (src->texture->target) {
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+
+ switch (src->target) {
+ /* Draw the quad with the draw_rectangle callback. */
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
- blitter_set_texcoords_2d(ctx, src, srcx, srcy,
- srcx+width, srcy+height);
+ case PIPE_TEXTURE_RECT:
+ {
+ /* Set texture coordinates. */
+ float coord[4];
+ get_texcoords(src, subsrc, srcx, srcy,
+ srcx+width, srcy+height, normalized, coord);
+
+ /* Draw. */
+ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
+ UTIL_BLITTER_ATTRIB_TEXCOORD, coord);
+ }
break;
+
+ /* Draw the quad with the generic codepath. */
case PIPE_TEXTURE_3D:
- blitter_set_texcoords_3d(ctx, src, srcx, srcy,
- srcx+width, srcy+height);
- break;
case PIPE_TEXTURE_CUBE:
- blitter_set_texcoords_cube(ctx, src, srcx, srcy,
- srcx+width, srcy+height);
+ /* Set texture coordinates. */
+ if (src->target == PIPE_TEXTURE_3D)
+ blitter_set_texcoords_3d(ctx, src, subsrc, srcz,
+ srcx, srcy, srcx+width, srcy+height);
+ else
+ blitter_set_texcoords_cube(ctx, src, subsrc,
+ srcx, srcy, srcx+width, srcy+height);
+
+ /* Draw. */
+ blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0);
+ blitter_draw_quad(ctx);
break;
+
default:
assert(0);
+ return;
}
- blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, dst->width, dst->height, 0);
- blitter_draw_quad(ctx);
+ blitter_restore_CSOs(ctx);
+ pipe_surface_reference(&dstsurf, NULL);
+ pipe_sampler_view_reference(&view, NULL);
}
-static void util_blitter_overlap_copy(struct blitter_context *blitter,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface *src,
- unsigned srcx, unsigned srcy,
- unsigned width, unsigned height)
+/* Clear a region of a color surface to a constant value. */
+void util_blitter_clear_render_target(struct blitter_context *blitter,
+ struct pipe_surface *dstsurf,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
- struct pipe_screen *screen = pipe->screen;
+ struct pipe_context *pipe = ctx->base.pipe;
+ struct pipe_framebuffer_state fb_state;
- struct pipe_resource texTemp;
- struct pipe_resource *texture;
- struct pipe_surface *tex_surf;
+ assert(dstsurf->texture);
+ if (!dstsurf->texture)
+ return;
- /* check whether the states are properly saved */
+ /* check the saved state */
blitter_check_saved_CSOs(ctx);
+ assert(blitter->saved_fb_state.nr_cbufs != ~0);
- memset(&texTemp, 0, sizeof(texTemp));
- texTemp.target = PIPE_TEXTURE_2D;
- texTemp.format = dst->texture->format; /* XXX verify supported by driver! */
- texTemp.last_level = 0;
- texTemp.width0 = width;
- texTemp.height0 = height;
- texTemp.depth0 = 1;
+ /* bind CSOs */
+ pipe->bind_blend_state(pipe, ctx->blend_write_color);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+ pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+ pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
+ pipe->bind_vs_state(pipe, ctx->vs_col);
+ pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
- texture = screen->resource_create(screen, &texTemp);
- if (!texture)
- return;
+ /* set a framebuffer state */
+ fb_state.width = dstsurf->width;
+ fb_state.height = dstsurf->height;
+ fb_state.nr_cbufs = 1;
+ fb_state.cbufs[0] = dstsurf;
+ fb_state.zsbuf = 0;
+ pipe->set_framebuffer_state(pipe, &fb_state);
- tex_surf = screen->get_tex_surface(screen, texture, 0, 0, 0,
- PIPE_BIND_BLIT_SOURCE |
- PIPE_BIND_BLIT_DESTINATION);
-
- /* blit from the src to the temp */
- util_blitter_do_copy(blitter, tex_surf, 0, 0,
- src, srcx, srcy,
- width, height,
- FALSE);
- util_blitter_do_copy(blitter, dst, dstx, dsty,
- tex_surf, 0, 0,
- width, height,
- FALSE);
- pipe_surface_reference(&tex_surf, NULL);
- pipe_resource_reference(&texture, NULL);
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
+ UTIL_BLITTER_ATTRIB_COLOR, rgba);
blitter_restore_CSOs(ctx);
}
-void util_blitter_copy(struct blitter_context *blitter,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface *src,
- unsigned srcx, unsigned srcy,
- unsigned width, unsigned height,
- boolean ignore_stencil)
+/* Clear a region of a depth stencil surface. */
+void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *dstsurf,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
- struct pipe_screen *screen = pipe->screen;
- boolean is_stencil, is_depth;
- unsigned dst_tex_usage;
+ struct pipe_context *pipe = ctx->base.pipe;
+ struct pipe_framebuffer_state fb_state;
+ struct pipe_stencil_ref sr = { { 0 } };
- /* give up if textures are not set */
- assert(dst->texture && src->texture);
- if (!dst->texture || !src->texture)
+ assert(dstsurf->texture);
+ if (!dstsurf->texture)
return;
- if (dst->texture == src->texture) {
- if (is_overlap(srcx, srcx + width, srcy, srcy + height,
- dstx, dstx + width, dsty, dsty + height)) {
- util_blitter_overlap_copy(blitter, dst, dstx, dsty, src, srcx, srcy,
- width, height);
- return;
- }
- }
-
- is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
- is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0;
- dst_tex_usage = is_depth || is_stencil ? PIPE_BIND_DEPTH_STENCIL :
- PIPE_BIND_RENDER_TARGET;
+ /* check the saved state */
+ blitter_check_saved_CSOs(ctx);
+ assert(blitter->saved_fb_state.nr_cbufs != ~0);
- /* check if we can sample from and render to the surfaces */
- /* (assuming copying a stencil buffer is not possible) */
- if ((!ignore_stencil && is_stencil) ||
- !screen->is_format_supported(screen, dst->format, dst->texture->target,
- dst_tex_usage, 0) ||
- !screen->is_format_supported(screen, src->format, src->texture->target,
- PIPE_BIND_SAMPLER_VIEW, 0)) {
- util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy,
- width, height);
- return;
+ /* bind CSOs */
+ pipe->bind_blend_state(pipe, ctx->blend_keep_color);
+ if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
+ sr.ref_value[0] = stencil & 0xff;
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
+ pipe->set_stencil_ref(pipe, &sr);
+ }
+ else if (clear_flags & PIPE_CLEAR_DEPTH) {
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil);
+ }
+ else if (clear_flags & PIPE_CLEAR_STENCIL) {
+ sr.ref_value[0] = stencil & 0xff;
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
+ pipe->set_stencil_ref(pipe, &sr);
}
+ else
+ /* hmm that should be illegal probably, or make it a no-op somewhere */
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
- /* check whether the states are properly saved */
- blitter_check_saved_CSOs(ctx);
- util_blitter_do_copy(blitter,
- dst, dstx, dsty,
- src, srcx, srcy,
- width, height, is_depth);
+ pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+ pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0));
+ pipe->bind_vs_state(pipe, ctx->vs_col);
+ pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
+
+ /* set a framebuffer state */
+ fb_state.width = dstsurf->width;
+ fb_state.height = dstsurf->height;
+ fb_state.nr_cbufs = 0;
+ fb_state.cbufs[0] = 0;
+ fb_state.zsbuf = dstsurf;
+ pipe->set_framebuffer_state(pipe, &fb_state);
+
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, depth,
+ UTIL_BLITTER_ATTRIB_NONE, NULL);
blitter_restore_CSOs(ctx);
}
-void util_blitter_fill(struct blitter_context *blitter,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height,
- unsigned value)
+/* draw a rectangle across a region using a custom dsa stage - for r600g */
+void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *zsurf,
+ struct pipe_surface *cbsurf,
+ void *dsa_stage, float depth)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
- struct pipe_screen *screen = pipe->screen;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_framebuffer_state fb_state;
- float rgba[4];
- ubyte ub_rgba[4] = {0};
- union util_color color;
- int i;
-
- assert(dst->texture);
- if (!dst->texture)
- return;
- /* check if we can render to the surface */
- if (util_format_is_depth_or_stencil(dst->format) || /* unlikely, but you never know */
- !screen->is_format_supported(screen, dst->format, dst->texture->target,
- PIPE_BIND_RENDER_TARGET, 0)) {
- util_surface_fill(pipe, dst, dstx, dsty, width, height, value);
+ assert(zsurf->texture);
+ if (!zsurf->texture)
return;
- }
-
- /* unpack the color */
- color.ui = value;
- util_unpack_color_ub(dst->format, &color,
- ub_rgba, ub_rgba+1, ub_rgba+2, ub_rgba+3);
- for (i = 0; i < 4; i++)
- rgba[i] = ubyte_to_float(ub_rgba[i]);
/* check the saved state */
blitter_check_saved_CSOs(ctx);
@@ -853,22 +984,38 @@ void util_blitter_fill(struct blitter_context *blitter,
/* bind CSOs */
pipe->bind_blend_state(pipe, ctx->blend_write_color);
- pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+ pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
+
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
- pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
+ pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0));
pipe->bind_vs_state(pipe, ctx->vs_col);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
- fb_state.width = dst->width;
- fb_state.height = dst->height;
+ fb_state.width = zsurf->width;
+ fb_state.height = zsurf->height;
fb_state.nr_cbufs = 1;
- fb_state.cbufs[0] = dst;
- fb_state.zsbuf = 0;
+ if (cbsurf) {
+ fb_state.cbufs[0] = cbsurf;
+ fb_state.nr_cbufs = 1;
+ } else {
+ fb_state.cbufs[0] = NULL;
+ fb_state.nr_cbufs = 0;
+ }
+ fb_state.zsbuf = zsurf;
pipe->set_framebuffer_state(pipe, &fb_state);
- blitter_set_clear_color(ctx, rgba);
- blitter_set_rectangle(ctx, 0, 0, width, height, dst->width, dst->height, 0);
- blitter_draw_quad(ctx);
+ blitter_set_dst_dimensions(ctx, zsurf->width, zsurf->height);
+ blitter->draw_rectangle(blitter, 0, 0, zsurf->width, zsurf->height, depth,
+ UTIL_BLITTER_ATTRIB_NONE, NULL);
blitter_restore_CSOs(ctx);
}
+
+/* flush a region of a depth stencil surface for r300g */
+void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *dstsurf)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ util_blitter_custom_depth_stencil(blitter, dstsurf, NULL,
+ ctx->dsa_flush_depth_stencil, 0.0f);
+}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index f6e3ce4874e..f9f96f25c77 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -27,6 +27,8 @@
#ifndef U_BLITTER_H
#define U_BLITTER_H
+#include "util/u_framebuffer.h"
+#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "pipe/p_state.h"
@@ -38,9 +40,48 @@ extern "C" {
struct pipe_context;
+enum blitter_attrib_type {
+ UTIL_BLITTER_ATTRIB_NONE,
+ UTIL_BLITTER_ATTRIB_COLOR,
+ UTIL_BLITTER_ATTRIB_TEXCOORD
+};
+
struct blitter_context
{
+ /**
+ * Draw a rectangle.
+ *
+ * \param x1 An X coordinate of the top-left corner.
+ * \param y1 A Y coordinate of the top-left corner.
+ * \param x2 An X coordinate of the bottom-right corner.
+ * \param y2 A Y coordinate of the bottom-right corner.
+ * \param depth A depth which the rectangle is rendered at.
+ *
+ * \param type Semantics of the attributes "attrib".
+ * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them.
+ * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes
+ * make up a constant RGBA color, and should go to the COLOR0
+ * varying slot of a fragment shader.
+ * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and
+ * {a3, a4} specify top-left and bottom-right texture
+ * coordinates of the rectangle, respectively, and should go
+ * to the GENERIC0 varying slot of a fragment shader.
+ *
+ * \param attrib See type.
+ *
+ * \note A driver may optionally override this callback to implement
+ * a specialized hardware path for drawing a rectangle, e.g. using
+ * a rectangular point sprite.
+ */
+ void (*draw_rectangle)(struct blitter_context *blitter,
+ unsigned x1, unsigned y1, unsigned x2, unsigned y2,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4]);
+
/* Private members, really. */
+ struct pipe_context *pipe; /**< pipe context */
+
void *saved_blend_state; /**< blend state */
void *saved_dsa_state; /**< depth stencil alpha state */
void *saved_velem_state; /**< vertex elements state */
@@ -72,6 +113,15 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe);
*/
void util_blitter_destroy(struct blitter_context *blitter);
+/**
+ * Return the pipe context associated with a blitter context.
+ */
+static INLINE
+struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
+{
+ return blitter->pipe;
+}
+
/*
* These CSOs must be saved before any of the following functions is called:
* - blend state
@@ -112,52 +162,56 @@ void util_blitter_clear(struct blitter_context *blitter,
* - fragment sampler states
* - fragment sampler textures
*/
-void util_blitter_copy(struct blitter_context *blitter,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface *src,
- unsigned srcx, unsigned srcy,
- unsigned width, unsigned height,
- boolean ignore_stencil);
+void util_blitter_copy_region(struct blitter_context *blitter,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned srcx, unsigned srcy, unsigned srcz,
+ unsigned width, unsigned height,
+ boolean ignore_stencil);
/**
- * Fill a region of a surface with a constant value.
- *
- * If the surface cannot be rendered to or it's a depth-stencil format,
- * a software fallback path is taken.
+ * Clear a region of a (color) surface to a constant value.
*
* These states must be saved in the blitter in addition to the state objects
* already required to be saved:
* - framebuffer state
*/
-void util_blitter_fill(struct blitter_context *blitter,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height,
- unsigned value);
+void util_blitter_clear_render_target(struct blitter_context *blitter,
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height);
/**
- * Copy all pixels from one surface to another.
+ * Clear a region of a depth-stencil surface, both stencil and depth
+ * or only one of them if this is a combined depth-stencil surface.
*
- * The rules are the same as in util_blitter_copy with the addition that
- * surfaces must have the same size.
+ * These states must be saved in the blitter in addition to the state objects
+ * already required to be saved:
+ * - framebuffer state
*/
-static INLINE
-void util_blitter_copy_surface(struct blitter_context *blitter,
- struct pipe_surface *dst,
- struct pipe_surface *src,
- boolean ignore_stencil)
-{
- assert(dst->width == src->width && dst->height == src->height);
-
- util_blitter_copy(blitter, dst, 0, 0, src, 0, 0, src->width, src->height,
- ignore_stencil);
-}
-
+void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height);
+
+void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *dstsurf);
+
+void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *zsurf,
+ struct pipe_surface *cbsurf,
+ void *dsa_stage, float depth);
/* The functions below should be used to save currently bound constant state
* objects inside a driver. The objects are automatically restored at the end
- * of the util_blitter_{clear, fill, copy, copy_surface} functions and then
+ * of the util_blitter_{clear, copy_region, fill_region} functions and then
* forgotten.
*
* CSOs not listed here are not affected by util_blitter. */
@@ -213,9 +267,10 @@ void util_blitter_save_vertex_shader(struct blitter_context *blitter,
static INLINE
void util_blitter_save_framebuffer(struct blitter_context *blitter,
- struct pipe_framebuffer_state *state)
+ const struct pipe_framebuffer_state *state)
{
- blitter->saved_fb_state = *state;
+ blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
+ util_copy_framebuffer_state(&blitter->saved_fb_state, state);
}
static INLINE
@@ -250,12 +305,13 @@ util_blitter_save_fragment_sampler_views(struct blitter_context *blitter,
int num_views,
struct pipe_sampler_view **views)
{
+ unsigned i;
assert(num_views <= Elements(blitter->saved_sampler_views));
blitter->saved_num_sampler_views = num_views;
- memcpy(blitter->saved_sampler_views,
- views,
- num_views * sizeof(struct pipe_sampler_view *));
+ for (i = 0; i < num_views; i++)
+ pipe_sampler_view_reference(&blitter->saved_sampler_views[i],
+ views[i]);
}
static INLINE void
@@ -263,9 +319,18 @@ util_blitter_save_vertex_buffers(struct blitter_context *blitter,
int num_vertex_buffers,
struct pipe_vertex_buffer *vertex_buffers)
{
+ unsigned i;
assert(num_vertex_buffers <= Elements(blitter->saved_vertex_buffers));
blitter->saved_num_vertex_buffers = num_vertex_buffers;
+
+ for (i = 0; i < num_vertex_buffers; i++) {
+ if (vertex_buffers[i].buffer) {
+ pipe_resource_reference(&blitter->saved_vertex_buffers[i].buffer,
+ vertex_buffers[i].buffer);
+ }
+ }
+
memcpy(blitter->saved_vertex_buffers,
vertex_buffers,
num_vertex_buffers * sizeof(struct pipe_vertex_buffer));
diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h
index 919967b55a7..e9c71743fc8 100644
--- a/src/gallium/auxiliary/util/u_box.h
+++ b/src/gallium/auxiliary/util/u_box.h
@@ -60,6 +60,25 @@ void u_box_2d_zslice( unsigned x,
box->depth = 1;
}
+
+static INLINE
+void u_box_3d( unsigned x,
+ unsigned y,
+ unsigned z,
+ unsigned w,
+ unsigned h,
+ unsigned d,
+ struct pipe_box *box )
+{
+ box->x = x;
+ box->y = y;
+ box->z = z;
+ box->width = w;
+ box->height = h;
+ box->depth = d;
+}
+
+
static INLINE
struct pipe_subresource u_subresource( unsigned face,
unsigned level )
diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c
new file mode 100644
index 00000000000..e209a98b706
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_caps.c
@@ -0,0 +1,271 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+#include "util/u_debug.h"
+#include "u_caps.h"
+
+/**
+ * Iterates over a list of caps checks as defined in u_caps.h. Should
+ * all checks pass returns TRUE and out is set to the last element of
+ * the list (TERMINATE). Should any check fail returns FALSE and set
+ * out to the index of the start of the first failing check.
+ */
+boolean
+util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out)
+{
+ int i, tmpi;
+ float tmpf;
+
+ for (i = 0; list[i];) {
+ switch(list[i++]) {
+ case UTIL_CAPS_CHECK_CAP:
+ if (!screen->get_param(screen, list[i++])) {
+ *out = i - 2;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_INT:
+ tmpi = screen->get_param(screen, list[i++]);
+ if (tmpi < (int)list[i++]) {
+ *out = i - 3;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_FLOAT:
+ tmpf = screen->get_paramf(screen, list[i++]);
+ if (tmpf < (float)list[i++]) {
+ *out = i - 3;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_FORMAT:
+ if (!screen->is_format_supported(screen,
+ list[i++],
+ PIPE_TEXTURE_2D,
+ 0,
+ PIPE_BIND_SAMPLER_VIEW,
+ 0)) {
+ *out = i - 2;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_SHADER:
+ tmpi = screen->get_shader_param(screen, list[i] >> 24, list[i] & ((1 << 24) - 1));
+ ++i;
+ if (tmpi < (int)list[i++]) {
+ *out = i - 3;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_UNIMPLEMENTED:
+ *out = i - 1;
+ return FALSE;
+ default:
+ assert(!"Unsupported check");
+ return FALSE;
+ }
+ }
+
+ *out = i;
+ return TRUE;
+}
+
+/**
+ * Iterates over a list of caps checks as defined in u_caps.h.
+ * Returns TRUE if all caps checks pass returns FALSE otherwise.
+ */
+boolean
+util_check_caps(struct pipe_screen *screen, const unsigned *list)
+{
+ int out;
+ return util_check_caps_out(screen, list, &out);
+}
+
+
+/*
+ * Below follows some demo lists.
+ *
+ * None of these lists are exhausting lists of what is
+ * actually needed to support said API and more here for
+ * as example on how to uses the above functions. Especially
+ * for DX10 and DX11 where Gallium is missing features.
+ */
+
+/* DX 9_1 */
+static unsigned caps_dx_9_1[] = {
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 2),
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX 9_2 */
+static unsigned caps_dx_9_2[] = {
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX 9_3 */
+static unsigned caps_dx_9_3[] = {
+ UTIL_CHECK_CAP(SM3),
+ //UTIL_CHECK_CAP(INSTANCING),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 4),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 13), /* 4096 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX 10 */
+static unsigned caps_dx_10[] = {
+ UTIL_CHECK_CAP(SM3),
+ //UTIL_CHECK_CAP(INSTANCING),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 8192 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 8192 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX11 */
+static unsigned caps_dx_11[] = {
+ UTIL_CHECK_CAP(SM3),
+ //UTIL_CHECK_CAP(INSTANCING),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 16384 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 16384 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_FORMAT(B8G8R8A8_UNORM),
+ UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */
+ UTIL_CHECK_TERMINATE
+};
+
+/* OpenGL 2.1 */
+static unsigned caps_opengl_2_1[] = {
+ UTIL_CHECK_CAP(GLSL),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_CAP(TWO_SIDED_STENCIL),
+ UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 2),
+ UTIL_CHECK_TERMINATE
+};
+
+/* OpenGL 3.0 */
+/* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */
+
+/* Shader Model 3 */
+static unsigned caps_sm3[] = {
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_INSTRUCTIONS, 512),
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_INPUTS, 10),
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_TEMPS, 32),
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_ADDRS, 1),
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_CONSTS, 224),
+
+ UTIL_CHECK_SHADER(VERTEX, MAX_INSTRUCTIONS, 512),
+ UTIL_CHECK_SHADER(VERTEX, MAX_INPUTS, 16),
+ UTIL_CHECK_SHADER(VERTEX, MAX_TEMPS, 32),
+ UTIL_CHECK_SHADER(VERTEX, MAX_ADDRS, 2),
+ UTIL_CHECK_SHADER(VERTEX, MAX_CONSTS, 256),
+
+ UTIL_CHECK_TERMINATE
+};
+
+/**
+ * Demo function which checks against theoretical caps needed for different APIs.
+ */
+void util_caps_demo_print(struct pipe_screen *screen)
+{
+ struct {
+ char* name;
+ unsigned *list;
+ } list[] = {
+ {"DX 9.1", caps_dx_9_1},
+ {"DX 9.2", caps_dx_9_2},
+ {"DX 9.3", caps_dx_9_3},
+ {"DX 10", caps_dx_10},
+ {"DX 11", caps_dx_11},
+ {"OpenGL 2.1", caps_opengl_2_1},
+/* {"OpenGL 3.0", caps_opengl_3_0},*/
+ {"SM3", caps_sm3},
+ {NULL, NULL}
+ };
+ int i, out = 0;
+
+ for (i = 0; list[i].name; i++) {
+ if (util_check_caps_out(screen, list[i].list, &out)) {
+ debug_printf("%s: %s yes\n", __FUNCTION__, list[i].name);
+ continue;
+ }
+ switch (list[i].list[out]) {
+ case UTIL_CAPS_CHECK_CAP:
+ debug_printf("%s: %s no (cap %u not supported)\n", __FUNCTION__,
+ list[i].name,
+ list[i].list[out + 1]);
+ break;
+ case UTIL_CAPS_CHECK_INT:
+ debug_printf("%s: %s no (cap %u less then %u)\n", __FUNCTION__,
+ list[i].name,
+ list[i].list[out + 1],
+ list[i].list[out + 2]);
+ break;
+ case UTIL_CAPS_CHECK_FLOAT:
+ debug_printf("%s: %s no (cap %u less then %f)\n", __FUNCTION__,
+ list[i].name,
+ list[i].list[out + 1],
+ (double)(int)list[i].list[out + 2]);
+ break;
+ case UTIL_CAPS_CHECK_FORMAT:
+ debug_printf("%s: %s no (format %s not supported)\n", __FUNCTION__,
+ list[i].name,
+ util_format_name(list[i].list[out + 1]) + 12);
+ break;
+ case UTIL_CAPS_CHECK_UNIMPLEMENTED:
+ debug_printf("%s: %s no (not implemented in gallium or state tracker)\n",
+ __FUNCTION__, list[i].name);
+ break;
+ default:
+ assert(!"Unsupported check");
+ }
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_caps.h b/src/gallium/auxiliary/util/u_caps.h
new file mode 100644
index 00000000000..7bd23800414
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_caps.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_CAPS_H
+#define U_CAPS_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_screen;
+
+enum u_caps_check_enum {
+ UTIL_CAPS_CHECK_TERMINATE = 0,
+ UTIL_CAPS_CHECK_CAP,
+ UTIL_CAPS_CHECK_INT,
+ UTIL_CAPS_CHECK_FLOAT,
+ UTIL_CAPS_CHECK_FORMAT,
+ UTIL_CAPS_CHECK_SHADER,
+ UTIL_CAPS_CHECK_UNIMPLEMENTED,
+};
+
+#define UTIL_CHECK_CAP(cap) \
+ UTIL_CAPS_CHECK_CAP, PIPE_CAP_##cap
+
+#define UTIL_CHECK_INT(cap, higher) \
+ UTIL_CAPS_CHECK_INT, PIPE_CAP_##cap, (unsigned)(higher)
+
+/* Floats currently lose precision */
+#define UTIL_CHECK_FLOAT(cap, higher) \
+ UTIL_CAPS_CHECK_FLOAT, PIPE_CAP_##cap, (unsigned)(int)(higher)
+
+#define UTIL_CHECK_FORMAT(format) \
+ UTIL_CAPS_CHECK_FORMAT, PIPE_FORMAT_##format
+
+#define UTIL_CHECK_SHADER(shader, cap, higher) \
+ UTIL_CAPS_CHECK_SHADER, (PIPE_SHADER_##shader << 24) | PIPE_SHADER_CAP_##cap, (unsigned)(higher)
+
+#define UTIL_CHECK_UNIMPLEMENTED \
+ UTIL_CAPS_CHECK_UNIMPLEMENTED
+
+#define UTIL_CHECK_TERMINATE \
+ UTIL_CAPS_CHECK_TERMINATE
+
+boolean util_check_caps(struct pipe_screen *screen, const unsigned *list);
+boolean util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out);
+void util_caps_demo_print(struct pipe_screen *screen);
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h
index 2c32db61756..ad69df3f898 100644
--- a/src/gallium/auxiliary/util/u_clear.h
+++ b/src/gallium/auxiliary/util/u_clear.h
@@ -31,8 +31,6 @@
#include "pipe/p_context.h"
#include "pipe/p_state.h"
-#include "util/u_pack_color.h"
-#include "util/u_rect.h"
/**
@@ -45,26 +43,17 @@ util_clear(struct pipe_context *pipe,
const float *rgba, double depth, unsigned stencil)
{
if (buffers & PIPE_CLEAR_COLOR) {
- struct pipe_surface *ps = framebuffer->cbufs[0];
- union util_color uc;
-
- util_pack_color(rgba, ps->format, &uc);
- if (pipe->surface_fill) {
- pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui);
- } else {
- util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui);
+ unsigned i;
+ for (i = 0; i < framebuffer->nr_cbufs; i++) {
+ struct pipe_surface *ps = framebuffer->cbufs[i];
+ pipe->clear_render_target(pipe, ps, rgba, 0, 0, ps->width, ps->height);
}
}
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
struct pipe_surface *ps = framebuffer->zsbuf;
-
- if (pipe->surface_fill) {
- pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height,
- util_pack_z_stencil(ps->format, depth, stencil));
- } else {
- util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height,
- util_pack_z_stencil(ps->format, depth, stencil));
- }
+ pipe->clear_depth_stencil(pipe, ps, buffers & PIPE_CLEAR_DEPTHSTENCIL,
+ depth, stencil,
+ 0, 0, ps->width, ps->height);
}
}
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index a08241971ca..32519b148b6 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -38,7 +38,7 @@
#include "u_cpu_detect.h"
#if defined(PIPE_ARCH_PPC)
-#if defined(PIPE_OS_DARWIN)
+#if defined(PIPE_OS_APPLE)
#include <sys/sysctl.h>
#else
#include <signal.h>
@@ -73,66 +73,19 @@
#endif
-struct util_cpu_caps util_cpu_caps;
-
-static int has_cpuid(void);
-
-#if defined(PIPE_ARCH_X86)
-
-/* The sigill handlers */
-#if defined(PIPE_OS_LINUX) /*&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)*/
-static void
-sigill_handler_sse(int signal, struct sigcontext sc)
-{
- /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
- * instructions are 3 bytes long. We must increment the instruction
- * pointer manually to avoid repeated execution of the offending
- * instruction.
- *
- * If the SIGILL is caused by a divide-by-zero when unmasked
- * exceptions aren't supported, the SIMD FPU status and control
- * word will be restored at the end of the test, so we don't need
- * to worry about doing it here. Besides, we may not be able to...
- */
- sc.eip += 3;
-
- util_cpu_caps.has_sse=0;
-}
+#ifdef DEBUG
+DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
+#endif
-static void
-sigfpe_handler_sse(int signal, struct sigcontext sc)
-{
- if (sc.fpstate->magic != 0xffff) {
- /* Our signal context has the extended FPU state, so reset the
- * divide-by-zero exception mask and clear the divide-by-zero
- * exception bit.
- */
- sc.fpstate->mxcsr |= 0x00000200;
- sc.fpstate->mxcsr &= 0xfffffffb;
- } else {
- /* If we ever get here, we're completely hosed.
- */
- }
-}
-#endif /* PIPE_OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
-#if defined(PIPE_OS_WINDOWS)
-static LONG CALLBACK
-win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
-{
- if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
- ep->ContextRecord->Eip +=3;
- util_cpu_caps.has_sse=0;
- return EXCEPTION_CONTINUE_EXECUTION;
- }
- return EXCEPTION_CONTINUE_SEARCH;
-}
-#endif /* PIPE_OS_WINDOWS */
+struct util_cpu_caps util_cpu_caps;
-#endif /* PIPE_ARCH_X86 */
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+static int has_cpuid(void);
+#endif
-#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN)
+#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
static jmp_buf __lv_powerpc_jmpbuf;
static volatile sig_atomic_t __lv_powerpc_canjump = 0;
@@ -153,7 +106,7 @@ sigill_handler(int sig)
static void
check_os_altivec_support(void)
{
-#if defined(PIPE_OS_DARWIN)
+#if defined(PIPE_OS_APPLE)
int sels[2] = {CTL_HW, HW_VECTORUNIT};
int has_vu = 0;
int len = sizeof (has_vu);
@@ -166,8 +119,8 @@ check_os_altivec_support(void)
util_cpu_caps.has_altivec = 1;
}
}
-#else /* !PIPE_OS_DARWIN */
- /* no Darwin, do it the brute-force way */
+#else /* !PIPE_OS_APPLE */
+ /* not on Apple/Darwin, do it the brute-force way */
/* this is borrowed from the libmpeg2 library */
signal(SIGILL, sigill_handler);
if (setjmp(__lv_powerpc_jmpbuf)) {
@@ -184,127 +137,12 @@ check_os_altivec_support(void)
signal(SIGILL, SIG_DFL);
util_cpu_caps.has_altivec = 1;
}
-#endif /* PIPE_OS_DARWIN */
+#endif /* !PIPE_OS_APPLE */
}
#endif /* PIPE_ARCH_PPC */
-/* If we're running on a processor that can do SSE, let's see if we
- * are allowed to or not. This will catch 2.4.0 or later kernels that
- * haven't been configured for a Pentium III but are running on one,
- * and RedHat patched 2.2 kernels that have broken exception handling
- * support for user space apps that do SSE.
- */
-#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
-static void
-check_os_katmai_support(void)
-{
-#if defined(PIPE_ARCH_X86)
-#if defined(PIPE_OS_FREEBSD)
- int has_sse=0, ret;
- int len = sizeof (has_sse);
-
- ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
- if (ret || !has_sse)
- util_cpu_caps.has_sse=0;
-
-#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
- int has_sse, has_sse2, ret, mib[2];
- int varlen;
-
- mib[0] = CTL_MACHDEP;
- mib[1] = CPU_SSE;
- varlen = sizeof (has_sse);
-
- ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
- if (ret < 0 || !has_sse) {
- util_cpu_caps.has_sse = 0;
- } else {
- util_cpu_caps.has_sse = 1;
- }
-
- mib[1] = CPU_SSE2;
- varlen = sizeof (has_sse2);
- ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
- if (ret < 0 || !has_sse2) {
- util_cpu_caps.has_sse2 = 0;
- } else {
- util_cpu_caps.has_sse2 = 1;
- }
- util_cpu_caps.has_sse = 0; /* FIXME ?!?!? */
-
-#elif defined(PIPE_OS_WINDOWS)
- LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
- if (util_cpu_caps.has_sse) {
- exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
-#if defined(PIPE_CC_GCC)
- __asm __volatile ("xorps %xmm0, %xmm0");
-#elif defined(PIPE_CC_MSVC)
- __asm {
- xorps xmm0, xmm0 /* executing SSE instruction */
- }
-#else
-#error Unsupported compiler
-#endif
- SetUnhandledExceptionFilter(exc_fil);
- }
-#elif defined(PIPE_OS_LINUX)
- struct sigaction saved_sigill;
- struct sigaction saved_sigfpe;
-
- /* Save the original signal handlers.
- */
- sigaction(SIGILL, NULL, &saved_sigill);
- sigaction(SIGFPE, NULL, &saved_sigfpe);
-
- signal(SIGILL, (void (*)(int))sigill_handler_sse);
- signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
-
- /* Emulate test for OSFXSR in CR4. The OS will set this bit if it
- * supports the extended FPU save and restore required for SSE. If
- * we execute an SSE instruction on a PIII and get a SIGILL, the OS
- * doesn't support Streaming SIMD Exceptions, even if the processor
- * does.
- */
- if (util_cpu_caps.has_sse) {
- __asm __volatile ("xorps %xmm1, %xmm0");
- }
-
- /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if
- * it supports unmasked SIMD FPU exceptions. If we unmask the
- * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
- * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE
- * as expected, we're okay but we need to clean up after it.
- *
- * Are we being too stringent in our requirement that the OS support
- * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by
- * setting CR4.OSFXSR but don't support unmasked exceptions. Win98
- * doesn't even support them. We at least know the user-space SSE
- * support is good in kernels that do support unmasked exceptions,
- * and therefore to be safe I'm going to leave this test in here.
- */
- if (util_cpu_caps.has_sse) {
- /* test_os_katmai_exception_support(); */
- }
-
- /* Restore the original signal handlers.
- */
- sigaction(SIGILL, &saved_sigill, NULL);
- sigaction(SIGFPE, &saved_sigfpe, NULL);
-
-#else
- /* We can't use POSIX signal handling to test the availability of
- * SSE, so we disable it by default.
- */
- util_cpu_caps.has_sse = 0;
-#endif /* __linux__ */
-#endif
-
-#if defined(PIPE_ARCH_X86_64)
- util_cpu_caps.has_sse = 1;
-#endif
-}
-
+#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
static int has_cpuid(void)
{
#if defined(PIPE_ARCH_X86)
@@ -385,23 +223,6 @@ util_cpu_detect(void)
memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
- /* Check for arch type */
-#if defined(PIPE_ARCH_MIPS)
- util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS;
-#elif defined(PIPE_ARCH_ALPHA)
- util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA;
-#elif defined(PIPE_ARCH_SPARC)
- util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC;
-#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- util_cpu_caps.arch = UTIL_CPU_ARCH_X86;
- util_cpu_caps.little_endian = 1;
-#elif defined(PIPE_ARCH_PPC)
- util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC;
- util_cpu_caps.little_endian = 0;
-#else
- util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN;
-#endif
-
/* Count the number of CPUs in system */
#if defined(PIPE_OS_WINDOWS)
{
@@ -480,9 +301,6 @@ util_cpu_detect(void)
util_cpu_caps.cacheline = regs2[2] & 0xFF;
}
- if (util_cpu_caps.has_sse)
- check_os_katmai_support();
-
if (!util_cpu_caps.has_sse) {
util_cpu_caps.has_sse2 = 0;
util_cpu_caps.has_sse3 = 0;
@@ -497,23 +315,24 @@ util_cpu_detect(void)
#endif /* PIPE_ARCH_PPC */
#ifdef DEBUG
- debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch);
- debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
-
- debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
- debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
-
- debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc);
- debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
- debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
- debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
- debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
- debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
- debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
- debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
- debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
- debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
- debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
+ if (debug_get_option_dump_cpu()) {
+ debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
+
+ debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
+ debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
+
+ debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc);
+ debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
+ debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
+ debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
+ debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
+ debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
+ debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
+ debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
+ debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
+ debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
+ debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
+ }
#endif
util_cpu_detect_initialized = TRUE;
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index 4b3dc39c342..f3bef0993c7 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -36,26 +36,15 @@
#define _UTIL_CPU_DETECT_H
#include "pipe/p_compiler.h"
-
-enum util_cpu_arch {
- UTIL_CPU_ARCH_UNKNOWN = 0,
- UTIL_CPU_ARCH_MIPS,
- UTIL_CPU_ARCH_ALPHA,
- UTIL_CPU_ARCH_SPARC,
- UTIL_CPU_ARCH_X86,
- UTIL_CPU_ARCH_POWERPC
-};
+#include "pipe/p_config.h"
struct util_cpu_caps {
- enum util_cpu_arch arch;
unsigned nr_cpus;
/* Feature flags */
int x86_cpu_type;
unsigned cacheline;
- unsigned little_endian:1;
-
unsigned has_tsc:1;
unsigned has_mmx:1;
unsigned has_mmx2:1;
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 0de38e791d6..504e6d2a18f 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -42,6 +42,7 @@
#include "util/u_tile.h"
#include "util/u_prim.h"
+#include <limits.h> /* CHAR_BIT */
void _debug_vprintf(const char *format, va_list ap)
{
@@ -87,7 +88,7 @@ debug_get_option_should_print(void)
* but its cool since we set first to false
*/
first = FALSE;
- value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE);
+ value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", FALSE);
/* XXX should we print this option? Currently it wont */
return value;
}
@@ -123,8 +124,12 @@ debug_get_bool_option(const char *name, boolean dfault)
result = FALSE;
else if(!util_strcmp(str, "f"))
result = FALSE;
+ else if(!util_strcmp(str, "F"))
+ result = FALSE;
else if(!util_strcmp(str, "false"))
result = FALSE;
+ else if(!util_strcmp(str, "FALSE"))
+ result = FALSE;
else
result = TRUE;
@@ -177,16 +182,21 @@ debug_get_flags_option(const char *name,
{
unsigned long result;
const char *str;
+ const struct debug_named_value *orig = flags;
+ int namealign = 0;
str = os_get_option(name);
if(!str)
result = dfault;
else if (!util_strcmp(str, "help")) {
result = dfault;
- while (flags->name) {
- debug_printf("%s: help for %s: %s [0x%lx]\n", __FUNCTION__, name, flags->name, flags->value);
- flags++;
- }
+ _debug_printf("%s: help for %s:\n", __FUNCTION__, name);
+ for (; flags->name; ++flags)
+ namealign = MAX2(namealign, strlen(flags->name));
+ for (flags = orig; flags->name; ++flags)
+ _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
+ (int)sizeof(unsigned long)*CHAR_BIT/4, flags->value,
+ flags->desc ? " " : "", flags->desc ? flags->desc : "");
}
else {
result = 0;
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index e8ff2773e69..1c9624ea3ed 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -230,6 +230,7 @@ struct debug_named_value
{
const char *name;
unsigned long value;
+ const char *desc;
};
@@ -252,8 +253,9 @@ struct debug_named_value
* ...
* @endcode
*/
-#define DEBUG_NAMED_VALUE(__symbol) {#__symbol, (unsigned long)__symbol}
-#define DEBUG_NAMED_VALUE_END {NULL, 0}
+#define DEBUG_NAMED_VALUE(__symbol) DEBUG_NAMED_VALUE_WITH_DESCRIPTION(__symbol, NULL)
+#define DEBUG_NAMED_VALUE_WITH_DESCRIPTION(__symbol, __desc) {#__symbol, (unsigned long)__symbol, __desc}
+#define DEBUG_NAMED_VALUE_END {NULL, 0, NULL}
/**
diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
new file mode 100644
index 00000000000..1c90ff31069
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -0,0 +1,81 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <pipe/p_state.h>
+#include <util/u_format.h>
+#include <util/u_debug_describe.h>
+#include <util/u_string.h>
+
+void
+debug_describe_reference(char* buf, const struct pipe_reference*ptr)
+{
+ strcpy(buf, "pipe_object");
+}
+
+void
+debug_describe_resource(char* buf, const struct pipe_resource *ptr)
+{
+ switch(ptr->target)
+ {
+ case PIPE_BUFFER:
+ util_sprintf(buf, "pipe_buffer<%u>", (unsigned)util_format_get_stride(ptr->format, ptr->width0));
+ break;
+ case PIPE_TEXTURE_1D:
+ util_sprintf(buf, "pipe_texture1d<%u,%s,%u>", ptr->width0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ case PIPE_TEXTURE_2D:
+ util_sprintf(buf, "pipe_texture2d<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ case PIPE_TEXTURE_RECT:
+ util_sprintf(buf, "pipe_texture_rect<%u,%u,%s>", ptr->width0, ptr->height0, util_format_short_name(ptr->format));
+ break;
+ case PIPE_TEXTURE_CUBE:
+ util_sprintf(buf, "pipe_texture_cube<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ case PIPE_TEXTURE_3D:
+ util_sprintf(buf, "pipe_texture3d<%u,%u,%u,%s,%u>", ptr->width0, ptr->height0, ptr->depth0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ default:
+ util_sprintf(buf, "pipe_martian_resource<%u>", ptr->target);
+ break;
+ }
+}
+
+void
+debug_describe_surface(char* buf, const struct pipe_surface *ptr)
+{
+ char res[128];
+ debug_describe_resource(res, ptr->texture);
+ util_sprintf(buf, "pipe_surface<%s,%u,%u,%u>", res, ptr->face, ptr->level, ptr->zslice);
+}
+
+void
+debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr)
+{
+ char res[128];
+ debug_describe_resource(res, ptr->texture);
+ util_sprintf(buf, "pipe_sampler_view<%s,%s>", res, util_format_short_name(ptr->format));
+}
diff --git a/src/gallium/auxiliary/util/u_debug_describe.h b/src/gallium/auxiliary/util/u_debug_describe.h
new file mode 100644
index 00000000000..26d1f803bf0
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_describe.h
@@ -0,0 +1,49 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DEBUG_DESCRIBE_H_
+#define U_DEBUG_DESCRIBE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_reference;
+struct pipe_resource;
+struct pipe_surface;
+struct pipe_sampler_view;
+
+/* a 256-byte buffer is necessary and sufficient */
+void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
+void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
+void debug_describe_surface(char* buf, const struct pipe_surface *ptr);
+void debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_DEBUG_DESCRIBE_H_ */
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.c b/src/gallium/auxiliary/util/u_debug_refcnt.c
new file mode 100644
index 00000000000..40a26c9c697
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.c
@@ -0,0 +1,181 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
+
+/* see http://www.mozilla.org/performance/refcnt-balancer.html for what do with the output
+ * on Linux, use tools/addr2line.sh to postprocess it before anything else
+ **/
+#include <util/u_debug.h>
+#include <util/u_debug_refcnt.h>
+#include <util/u_debug_stack.h>
+#include <util/u_debug_symbol.h>
+#include <util/u_string.h>
+#include <util/u_hash_table.h>
+#include <os/os_thread.h>
+#include <os/os_stream.h>
+
+int debug_refcnt_state;
+
+struct os_stream* stream;
+
+/* TODO: maybe move this serial machinery to a stand-alone module and expose it? */
+static pipe_mutex serials_mutex;
+static struct util_hash_table* serials_hash;
+static unsigned serials_last;
+
+static unsigned hash_ptr(void* p)
+{
+ return (unsigned)(uintptr_t)p;
+}
+
+static int compare_ptr(void* a, void* b)
+{
+ if(a == b)
+ return 0;
+ else if(a < b)
+ return -1;
+ else
+ return 1;
+}
+
+static boolean debug_serial(void* p, unsigned* pserial)
+{
+ unsigned serial;
+ boolean found = TRUE;
+ pipe_mutex_lock(serials_mutex);
+ if(!serials_hash)
+ serials_hash = util_hash_table_create(hash_ptr, compare_ptr);
+ serial = (unsigned)(uintptr_t)util_hash_table_get(serials_hash, p);
+ if(!serial)
+ {
+ /* time to stop logging... (you'll have a 100 GB logfile at least at this point)
+ * TODO: avoid this
+ */
+ serial = ++serials_last;
+ if(!serial)
+ {
+ debug_error("More than 2^32 objects detected, aborting.\n");
+ os_abort();
+ }
+
+ util_hash_table_set(serials_hash, p, (void*)(uintptr_t)serial);
+ found = FALSE;
+ }
+ pipe_mutex_unlock(serials_mutex);
+ *pserial = serial;
+ return found;
+}
+
+static void debug_serial_delete(void* p)
+{
+ pipe_mutex_lock(serials_mutex);
+ util_hash_table_remove(serials_hash, p);
+ pipe_mutex_unlock(serials_mutex);
+}
+
+#define STACK_LEN 64
+
+static void dump_stack(const char* symbols[STACK_LEN])
+{
+ unsigned i;
+ for(i = 0; i < STACK_LEN; ++i)
+ {
+ if(symbols[i])
+ os_stream_printf(stream, "%s\n", symbols[i]);
+ }
+ os_stream_write(stream, "\n", 1);
+}
+
+void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+ if(debug_refcnt_state < 0)
+ return;
+
+ if(!debug_refcnt_state)
+ {
+ const char* filename = debug_get_option("GALLIUM_REFCNT_LOG", NULL);
+ if(filename && filename[0])
+ stream = os_file_stream_create(filename);
+
+ if(stream)
+ debug_refcnt_state = 1;
+ else
+ debug_refcnt_state = -1;
+ }
+
+ if(debug_refcnt_state > 0)
+ {
+ struct debug_stack_frame frames[STACK_LEN];
+ const char* symbols[STACK_LEN];
+ char buf[1024];
+
+ unsigned i;
+ unsigned refcnt = p->count;
+ unsigned serial;
+ boolean existing = debug_serial((void*)p, &serial);
+
+ debug_backtrace_capture(frames, 1, STACK_LEN);
+ for(i = 0; i < STACK_LEN; ++i)
+ {
+ if(frames[i].function)
+ symbols[i] = debug_symbol_name_cached(frames[i].function);
+ else
+ symbols[i] = 0;
+ }
+
+ get_desc(buf, p);
+
+ if(!existing)
+ {
+ os_stream_printf(stream, "<%s> %p %u Create\n", buf, p, serial);
+ dump_stack(symbols);
+
+ /* this is there to provide a gradual change even if we don't see the initialization */
+ for(i = 1; i <= refcnt - change; ++i)
+ {
+ os_stream_printf(stream, "<%s> %p %u AddRef %u\n", buf, p, serial, i);
+ dump_stack(symbols);
+ }
+ }
+
+ if(change)
+ {
+ os_stream_printf(stream, "<%s> %p %u %s %u\n", buf, p, serial, change > 0 ? "AddRef" : "Release", refcnt);
+ dump_stack(symbols);
+ }
+
+ if(!refcnt)
+ {
+ debug_serial_delete((void*)p);
+ os_stream_printf(stream, "<%s> %p %u Destroy\n", buf, p, serial);
+ dump_stack(symbols);
+ }
+
+ os_stream_flush(stream);
+ }
+}
+#endif
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.h b/src/gallium/auxiliary/util/u_debug_refcnt.h
new file mode 100644
index 00000000000..bea2d1c478a
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DEBUG_REFCNT_H_
+#define U_DEBUG_REFCNT_H_
+
+#include <pipe/p_config.h>
+#include <pipe/p_state.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*debug_reference_descriptor)(char*, const struct pipe_reference*);
+
+#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
+
+extern int debug_refcnt_state;
+
+void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change);
+
+static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+ if (debug_refcnt_state >= 0)
+ debug_reference_slowpath(p, get_desc, change);
+}
+
+#else
+
+static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_DEBUG_REFCNT_H_ */
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index 417d0cf04c9..332952af88b 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -33,9 +33,12 @@
*/
#include "pipe/p_compiler.h"
+#include "os/os_thread.h"
+#include "u_string.h"
#include "u_debug.h"
#include "u_debug_symbol.h"
+#include "u_hash_table.h"
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
@@ -67,21 +70,6 @@ BOOL WINAPI j_SymInitialize(HANDLE hProcess, PSTR UserSearchPath, BOOL fInvadePr
return FALSE;
}
-typedef BOOL (WINAPI *PFNSYMCLEANUP)(HANDLE);
-static PFNSYMCLEANUP pfnSymCleanup = NULL;
-
-static
-BOOL WINAPI j_SymCleanup(HANDLE hProcess)
-{
- if(
- (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) &&
- (pfnSymCleanup || (pfnSymCleanup = (PFNSYMCLEANUP) GetProcAddress(hModule_Imagehlp, "SymCleanup")))
- )
- return pfnSymCleanup(hProcess);
- else
- return FALSE;
-}
-
typedef DWORD (WINAPI *PFNSYMSETOPTIONS)(DWORD);
static PFNSYMSETOPTIONS pfnSymSetOptions = NULL;
@@ -97,36 +85,6 @@ DWORD WINAPI j_SymSetOptions(DWORD SymOptions)
return FALSE;
}
-typedef BOOL (WINAPI *PFNSYMUNDNAME)(PIMAGEHLP_SYMBOL, PSTR, DWORD);
-static PFNSYMUNDNAME pfnSymUnDName = NULL;
-
-static
-BOOL WINAPI j_SymUnDName(PIMAGEHLP_SYMBOL Symbol, PSTR UnDecName, DWORD UnDecNameLength)
-{
- if(
- (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) &&
- (pfnSymUnDName || (pfnSymUnDName = (PFNSYMUNDNAME) GetProcAddress(hModule_Imagehlp, "SymUnDName")))
- )
- return pfnSymUnDName(Symbol, UnDecName, UnDecNameLength);
- else
- return FALSE;
-}
-
-typedef PFUNCTION_TABLE_ACCESS_ROUTINE PFNSYMFUNCTIONTABLEACCESS;
-static PFNSYMFUNCTIONTABLEACCESS pfnSymFunctionTableAccess = NULL;
-
-static
-PVOID WINAPI j_SymFunctionTableAccess(HANDLE hProcess, DWORD AddrBase)
-{
- if(
- (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) &&
- (pfnSymFunctionTableAccess || (pfnSymFunctionTableAccess = (PFNSYMFUNCTIONTABLEACCESS) GetProcAddress(hModule_Imagehlp, "SymFunctionTableAccess")))
- )
- return pfnSymFunctionTableAccess(hProcess, AddrBase);
- else
- return NULL;
-}
-
typedef PGET_MODULE_BASE_ROUTINE PFNSYMGETMODULEBASE;
static PFNSYMGETMODULEBASE pfnSymGetModuleBase = NULL;
@@ -142,41 +100,6 @@ DWORD WINAPI j_SymGetModuleBase(HANDLE hProcess, DWORD dwAddr)
return 0;
}
-typedef BOOL (WINAPI *PFNSTACKWALK)(DWORD, HANDLE, HANDLE, LPSTACKFRAME, LPVOID, PREAD_PROCESS_MEMORY_ROUTINE, PFUNCTION_TABLE_ACCESS_ROUTINE, PGET_MODULE_BASE_ROUTINE, PTRANSLATE_ADDRESS_ROUTINE);
-static PFNSTACKWALK pfnStackWalk = NULL;
-
-static
-BOOL WINAPI j_StackWalk(
- DWORD MachineType,
- HANDLE hProcess,
- HANDLE hThread,
- LPSTACKFRAME StackFrame,
- PVOID ContextRecord,
- PREAD_PROCESS_MEMORY_ROUTINE ReadMemoryRoutine,
- PFUNCTION_TABLE_ACCESS_ROUTINE FunctionTableAccessRoutine,
- PGET_MODULE_BASE_ROUTINE GetModuleBaseRoutine,
- PTRANSLATE_ADDRESS_ROUTINE TranslateAddress
-)
-{
- if(
- (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) &&
- (pfnStackWalk || (pfnStackWalk = (PFNSTACKWALK) GetProcAddress(hModule_Imagehlp, "StackWalk")))
- )
- return pfnStackWalk(
- MachineType,
- hProcess,
- hThread,
- StackFrame,
- ContextRecord,
- ReadMemoryRoutine,
- FunctionTableAccessRoutine,
- GetModuleBaseRoutine,
- TranslateAddress
- );
- else
- return FALSE;
-}
-
typedef BOOL (WINAPI *PFNSYMGETSYMFROMADDR)(HANDLE, DWORD, LPDWORD, PIMAGEHLP_SYMBOL);
static PFNSYMGETSYMFROMADDR pfnSymGetSymFromAddr = NULL;
@@ -192,24 +115,9 @@ BOOL WINAPI j_SymGetSymFromAddr(HANDLE hProcess, DWORD Address, PDWORD Displacem
return FALSE;
}
-typedef BOOL (WINAPI *PFNSYMGETLINEFROMADDR)(HANDLE, DWORD, LPDWORD, PIMAGEHLP_LINE);
-static PFNSYMGETLINEFROMADDR pfnSymGetLineFromAddr = NULL;
-static
-BOOL WINAPI j_SymGetLineFromAddr(HANDLE hProcess, DWORD dwAddr, PDWORD pdwDisplacement, PIMAGEHLP_LINE Line)
-{
- if(
- (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) &&
- (pfnSymGetLineFromAddr || (pfnSymGetLineFromAddr = (PFNSYMGETLINEFROMADDR) GetProcAddress(hModule_Imagehlp, "SymGetLineFromAddr")))
- )
- return pfnSymGetLineFromAddr(hProcess, dwAddr, pdwDisplacement, Line);
- else
- return FALSE;
-}
-
-
-static INLINE boolean
-debug_symbol_print_imagehlp(const void *addr)
+static INLINE void
+debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size)
{
HANDLE hProcess;
BYTE symbolBuffer[1024];
@@ -226,25 +134,95 @@ debug_symbol_print_imagehlp(const void *addr)
if(j_SymInitialize(hProcess, NULL, TRUE))
bSymInitialized = TRUE;
}
-
+
if(!j_SymGetSymFromAddr(hProcess, (DWORD)addr, &dwDisplacement, pSymbol))
- return FALSE;
+ buf[0] = 0;
+ else
+ {
+ strncpy(buf, pSymbol->Name, size);
+ buf[size - 1] = 0;
+ }
+}
+#endif
- debug_printf("\t%s\n", pSymbol->Name);
+#ifdef __GLIBC__
+#include <execinfo.h>
- return TRUE;
-
+/* This can only provide dynamic symbols, or binary offsets into a file.
+ *
+ * To fix this, post-process the output with tools/addr2line.sh
+ */
+static INLINE void
+debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
+{
+ char** syms = backtrace_symbols((void**)&addr, 1);
+ strncpy(buf, syms[0], size);
+ buf[size - 1] = 0;
+ free(syms);
}
#endif
-
void
-debug_symbol_print(const void *addr)
+debug_symbol_name(const void *addr, char* buf, unsigned size)
{
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
- if(debug_symbol_print_imagehlp(addr))
+ debug_symbol_name_imagehlp(addr, buf, size);
+ if(buf[0])
return;
#endif
-
- debug_printf("\t%p\n", addr);
+
+#ifdef __GLIBC__
+ debug_symbol_name_glibc(addr, buf, size);
+ if(buf[0])
+ return;
+#endif
+
+ util_snprintf(buf, size, "%p", addr);
+ buf[size - 1] = 0;
+}
+
+void
+debug_symbol_print(const void *addr)
+{
+ char buf[1024];
+ debug_symbol_name(addr, buf, sizeof(buf));
+ debug_printf("\t%s\n", buf);
+}
+
+struct util_hash_table* symbols_hash;
+pipe_mutex symbols_mutex;
+
+static unsigned hash_ptr(void* p)
+{
+ return (unsigned)(uintptr_t)p;
+}
+
+static int compare_ptr(void* a, void* b)
+{
+ if(a == b)
+ return 0;
+ else if(a < b)
+ return -1;
+ else
+ return 1;
+}
+
+const char*
+debug_symbol_name_cached(const void *addr)
+{
+ const char* name;
+ pipe_mutex_lock(symbols_mutex);
+ if(!symbols_hash)
+ symbols_hash = util_hash_table_create(hash_ptr, compare_ptr);
+ name = util_hash_table_get(symbols_hash, (void*)addr);
+ if(!name)
+ {
+ char buf[1024];
+ debug_symbol_name(addr, buf, sizeof(buf));
+ name = strdup(buf);
+
+ util_hash_table_set(symbols_hash, (void*)addr, (void*)name);
+ }
+ pipe_mutex_unlock(symbols_mutex);
+ return name;
}
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.h b/src/gallium/auxiliary/util/u_debug_symbol.h
index 021586987b6..b247706c2a0 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.h
+++ b/src/gallium/auxiliary/util/u_debug_symbol.h
@@ -43,8 +43,13 @@ extern "C" {
void
-debug_symbol_print(const void *addr);
+debug_symbol_name(const void *addr, char* buf, unsigned size);
+
+const char*
+debug_symbol_name_cached(const void *addr);
+void
+debug_symbol_print(const void *addr);
#ifdef __cplusplus
}
diff --git a/src/gallium/auxiliary/util/u_dirty_flags.h b/src/gallium/auxiliary/util/u_dirty_flags.h
new file mode 100644
index 00000000000..7e1be45ad5a
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_dirty_flags.h
@@ -0,0 +1,28 @@
+#ifndef U_DIRTY_FLAGS_H
+#define U_DIRTY_FLAGS_H
+
+/* Here's a convenient list of dirty flags to use in a driver. Either
+ * include it directly or use it as a starting point for your own
+ * list.
+ */
+#define U_NEW_VIEWPORT 0x1
+#define U_NEW_RASTERIZER 0x2
+#define U_NEW_FS 0x4
+#define U_NEW_FS_CONSTANTS 0x8
+#define U_NEW_FS_SAMPLER_VIEW 0x10
+#define U_NEW_FS_SAMPLER_STATES 0x20
+#define U_NEW_VS 0x40
+#define U_NEW_VS_CONSTANTS 0x80
+#define U_NEW_VS_SAMPLER_VIEW 0x100
+#define U_NEW_VS_SAMPLER_STATES 0x200
+#define U_NEW_BLEND 0x400
+#define U_NEW_CLIP 0x800
+#define U_NEW_SCISSOR 0x1000
+#define U_NEW_POLYGON_STIPPLE 0x2000
+#define U_NEW_FRAMEBUFFER 0x4000
+#define U_NEW_VERTEX_ELEMENTS 0x8000
+#define U_NEW_VERTEX_BUFFER 0x10000
+#define U_NEW_QUERY 0x20000
+#define U_NEW_DEPTH_STENCIL 0x40000
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_dirty_surfaces.h b/src/gallium/auxiliary/util/u_dirty_surfaces.h
index 99f260bf967..fd1bbe5ffdf 100644
--- a/src/gallium/auxiliary/util/u_dirty_surfaces.h
+++ b/src/gallium/auxiliary/util/u_dirty_surfaces.h
@@ -1,9 +1,39 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#ifndef U_DIRTY_SURFACES_H_
#define U_DIRTY_SURFACES_H_
+#include "pipe/p_state.h"
+
#include "util/u_double_list.h"
#include "util/u_math.h"
+struct pipe_context;
+
typedef void (*util_dirty_surface_flush_t) (struct pipe_context *, struct pipe_surface *);
struct util_dirty_surfaces
diff --git a/src/gallium/auxiliary/util/u_dl.h b/src/gallium/auxiliary/util/u_dl.h
index 2853b447c61..80a00ed6796 100644
--- a/src/gallium/auxiliary/util/u_dl.h
+++ b/src/gallium/auxiliary/util/u_dl.h
@@ -35,10 +35,13 @@
#if defined(PIPE_OS_WINDOWS)
# define UTIL_DL_EXT ".dll"
+# define UTIL_DL_PREFIX ""
#elif defined(PIPE_OS_APPLE)
# define UTIL_DL_EXT ".dylib"
+# define UTIL_DL_PREFIX "lib"
#else
# define UTIL_DL_EXT ".so"
+# define UTIL_DL_PREFIX "lib"
#endif
diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h
index 53bb1342ddc..42adb1f0699 100644
--- a/src/gallium/auxiliary/util/u_double_list.h
+++ b/src/gallium/auxiliary/util/u_double_list.h
@@ -98,5 +98,20 @@ struct list_head
#define LIST_IS_EMPTY(__list) \
((__list)->next == (__list))
-
+#ifndef container_of
+#define container_of(ptr, sample, member) \
+ (void *)((char *)(ptr) \
+ - ((char *)&(sample)->member - (char *)(sample)))
+#endif
+
+#define LIST_FOR_EACH_ENTRY(pos, head, member) \
+ for (pos = container_of((head)->next, pos, member); \
+ &pos->member != (head); \
+ pos = container_of(pos->member.next, pos, member))
+
+#define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \
+ for (pos = container_of((head)->next, pos, member), \
+ storage = container_of(pos->member.next, pos, member); \
+ &pos->member != (head); \
+ pos = storage, storage = container_of(storage->member.next, storage, member))
#endif /*_U_DOUBLE_LIST_H_*/
diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h
new file mode 100644
index 00000000000..f06d09ef91d
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_draw.h
@@ -0,0 +1,139 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DRAW_H
+#define U_DRAW_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+
+static INLINE void
+util_draw_init_info(struct pipe_draw_info *info)
+{
+ memset(info, 0, sizeof(*info));
+ info->instance_count = 1;
+ info->max_index = 0xffffffff;
+}
+
+
+static INLINE void
+util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.min_index = start;
+ info.max_index = start + count - 1;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+static INLINE void
+util_draw_elements(struct pipe_context *pipe, int index_bias,
+ uint mode, uint start, uint count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.indexed = TRUE;
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.index_bias = index_bias;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+static INLINE void
+util_draw_arrays_instanced(struct pipe_context *pipe,
+ uint mode, uint start, uint count,
+ uint start_instance,
+ uint instance_count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.start_instance = start_instance;
+ info.instance_count = instance_count;
+ info.min_index = start;
+ info.max_index = start + count - 1;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+static INLINE void
+util_draw_elements_instanced(struct pipe_context *pipe,
+ int index_bias,
+ uint mode, uint start, uint count,
+ uint start_instance,
+ uint instance_count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.indexed = TRUE;
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.index_bias = index_bias;
+ info.start_instance = start_instance;
+ info.instance_count = instance_count;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+static INLINE void
+util_draw_range_elements(struct pipe_context *pipe,
+ int index_bias,
+ uint min_index,
+ uint max_index,
+ uint mode, uint start, uint count)
+{
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+ info.indexed = TRUE;
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.index_bias = index_bias;
+ info.min_index = min_index;
+ info.max_index = max_index;
+
+ pipe->draw_vbo(pipe, &info);
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index b37b48b5aef..0b6dc5880f3 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -60,7 +60,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
/* note: vertex elements already set by caller */
/* draw */
- pipe->draw_arrays(pipe, prim_type, 0, num_verts);
+ util_draw_arrays(pipe, prim_type, 0, num_verts);
}
diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h
index 42eb1844289..52994fe05c3 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.h
+++ b/src/gallium/auxiliary/util/u_draw_quad.h
@@ -29,12 +29,18 @@
#define U_DRAWQUAD_H
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+
+
#ifdef __cplusplus
extern "C" {
#endif
struct pipe_resource;
+#include "util/u_draw.h"
+
extern void
util_draw_vertex_buffer(struct pipe_context *pipe,
struct pipe_resource *vbuf, uint offset,
diff --git a/src/gallium/auxiliary/util/u_dump.h b/src/gallium/auxiliary/util/u_dump.h
index bdc73ac47d2..49536c0d593 100644
--- a/src/gallium/auxiliary/util/u_dump.h
+++ b/src/gallium/auxiliary/util/u_dump.h
@@ -71,9 +71,15 @@ const char *
util_dump_blend_func(unsigned value, boolean shortened);
const char *
+util_dump_logicop(unsigned value, boolean shortened);
+
+const char *
util_dump_func(unsigned value, boolean shortened);
const char *
+util_dump_stencil_op(unsigned value, boolean shortened);
+
+const char *
util_dump_tex_target(unsigned value, boolean shortened);
const char *
diff --git a/src/gallium/auxiliary/util/u_dump_defines.c b/src/gallium/auxiliary/util/u_dump_defines.c
index 96a22563473..692d4447c66 100644
--- a/src/gallium/auxiliary/util/u_dump_defines.c
+++ b/src/gallium/auxiliary/util/u_dump_defines.c
@@ -160,6 +160,49 @@ DEFINE_UTIL_DUMP_CONTINUOUS(blend_func)
static const char *
+util_dump_logicop_names[] = {
+ "PIPE_LOGICOP_CLEAR",
+ "PIPE_LOGICOP_NOR",
+ "PIPE_LOGICOP_AND_INVERTED",
+ "PIPE_LOGICOP_COPY_INVERTED",
+ "PIPE_LOGICOP_AND_REVERSE",
+ "PIPE_LOGICOP_INVERT",
+ "PIPE_LOGICOP_XOR",
+ "PIPE_LOGICOP_NAND",
+ "PIPE_LOGICOP_AND",
+ "PIPE_LOGICOP_EQUIV",
+ "PIPE_LOGICOP_NOOP",
+ "PIPE_LOGICOP_OR_INVERTED",
+ "PIPE_LOGICOP_COPY",
+ "PIPE_LOGICOP_OR_REVERSE",
+ "PIPE_LOGICOP_OR",
+ "PIPE_LOGICOP_SET"
+};
+
+static const char *
+util_dump_logicop_short_names[] = {
+ "clear",
+ "nor",
+ "and_inverted",
+ "copy_inverted",
+ "and_reverse",
+ "invert",
+ "xor",
+ "nand",
+ "and",
+ "equiv",
+ "noop",
+ "or_inverted",
+ "copy",
+ "or_reverse",
+ "or",
+ "set"
+};
+
+DEFINE_UTIL_DUMP_CONTINUOUS(logicop)
+
+
+static const char *
util_dump_func_names[] = {
"PIPE_FUNC_NEVER",
"PIPE_FUNC_LESS",
@@ -187,7 +230,35 @@ DEFINE_UTIL_DUMP_CONTINUOUS(func)
static const char *
+util_dump_stencil_op_names[] = {
+ "PIPE_STENCIL_OP_KEEP",
+ "PIPE_STENCIL_OP_ZERO",
+ "PIPE_STENCIL_OP_REPLACE",
+ "PIPE_STENCIL_OP_INCR",
+ "PIPE_STENCIL_OP_DECR",
+ "PIPE_STENCIL_OP_INCR_WRAP",
+ "PIPE_STENCIL_OP_DECR_WRAP",
+ "PIPE_STENCIL_OP_INVERT"
+};
+
+static const char *
+util_dump_stencil_op_short_names[] = {
+ "keep",
+ "zero",
+ "replace",
+ "incr",
+ "decr",
+ "incr_wrap",
+ "decr_wrap",
+ "invert"
+};
+
+DEFINE_UTIL_DUMP_CONTINUOUS(stencil_op)
+
+
+static const char *
util_dump_tex_target_names[] = {
+ "PIPE_BUFFER",
"PIPE_TEXTURE_1D",
"PIPE_TEXTURE_2D",
"PIPE_TEXTURE_3D",
@@ -196,6 +267,7 @@ util_dump_tex_target_names[] = {
static const char *
util_dump_tex_target_short_names[] = {
+ "buffer",
"1d",
"2d",
"3d",
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index 2ce643e90cd..cda5b8ba512 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -300,12 +300,13 @@ util_dump_rasterizer_state(struct os_stream *stream, const struct pipe_rasterize
util_dump_member(stream, bool, state, flatshade);
util_dump_member(stream, bool, state, light_twoside);
- util_dump_member(stream, uint, state, front_winding);
- util_dump_member(stream, uint, state, cull_mode);
- util_dump_member(stream, uint, state, fill_cw);
- util_dump_member(stream, uint, state, fill_ccw);
- util_dump_member(stream, bool, state, offset_cw);
- util_dump_member(stream, bool, state, offset_ccw);
+ util_dump_member(stream, uint, state, front_ccw);
+ util_dump_member(stream, uint, state, cull_face);
+ util_dump_member(stream, uint, state, fill_front);
+ util_dump_member(stream, uint, state, fill_back);
+ util_dump_member(stream, bool, state, offset_point);
+ util_dump_member(stream, bool, state, offset_line);
+ util_dump_member(stream, bool, state, offset_tri);
util_dump_member(stream, bool, state, scissor);
util_dump_member(stream, bool, state, poly_smooth);
util_dump_member(stream, bool, state, poly_stipple_enable);
diff --git a/src/gallium/auxiliary/util/u_dynarray.h b/src/gallium/auxiliary/util/u_dynarray.h
index 9d1c1713a7c..980cadf22d1 100644
--- a/src/gallium/auxiliary/util/u_dynarray.h
+++ b/src/gallium/auxiliary/util/u_dynarray.h
@@ -106,6 +106,9 @@ util_dynarray_trim(struct util_dynarray *buf)
#define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type)))
#define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type)
#define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type))
+#define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx))
+#define util_dynarray_begin(buf) ((buf)->data)
+#define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, (buf)->size))
#endif /* U_DYNARRAY_H */
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
index c50c807eb89..4896faa12bf 100644
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -120,11 +120,67 @@ util_format_write_4ub(enum pipe_format format, const uint8_t *src, unsigned src_
}
-static INLINE boolean
+boolean
+util_is_format_compatible(const struct util_format_description *src_desc,
+ const struct util_format_description *dst_desc)
+{
+ unsigned chan;
+
+ if (src_desc->format == dst_desc->format) {
+ return TRUE;
+ }
+
+ if (src_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+ dst_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ return FALSE;
+ }
+
+ if (src_desc->block.bits != dst_desc->block.bits ||
+ src_desc->nr_channels != dst_desc->nr_channels ||
+ src_desc->colorspace != dst_desc->colorspace) {
+ return FALSE;
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ if (src_desc->channel[chan].size !=
+ dst_desc->channel[chan].size) {
+ return FALSE;
+ }
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = dst_desc->swizzle[chan];
+
+ if (swizzle < 4) {
+ if (src_desc->swizzle[chan] != swizzle) {
+ return FALSE;
+ }
+ if ((src_desc->channel[swizzle].type !=
+ dst_desc->channel[swizzle].type) ||
+ (src_desc->channel[swizzle].normalized !=
+ dst_desc->channel[swizzle].normalized)) {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+
+boolean
util_format_fits_8unorm(const struct util_format_description *format_desc)
{
unsigned chan;
+ /*
+ * After linearized sRGB values require more than 8bits.
+ */
+
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ return FALSE;
+ }
+
switch (format_desc->layout) {
case UTIL_FORMAT_LAYOUT_S3TC:
@@ -189,11 +245,14 @@ util_format_translate(enum pipe_format dst_format,
const struct util_format_description *src_format_desc;
uint8_t *dst_row;
const uint8_t *src_row;
- unsigned y_step;
+ unsigned x_step, y_step;
unsigned dst_step;
unsigned src_step;
- if (dst_format == src_format) {
+ dst_format_desc = util_format_description(dst_format);
+ src_format_desc = util_format_description(src_format);
+
+ if (util_is_format_compatible(src_format_desc, dst_format_desc)) {
/*
* Trivial case.
*/
@@ -204,9 +263,6 @@ util_format_translate(enum pipe_format dst_format,
return;
}
- dst_format_desc = util_format_description(dst_format);
- src_format_desc = util_format_description(src_format);
-
assert(dst_x % dst_format_desc->block.width == 0);
assert(dst_y % dst_format_desc->block.height == 0);
assert(src_x % src_format_desc->block.width == 0);
@@ -221,6 +277,7 @@ util_format_translate(enum pipe_format dst_format,
*/
y_step = MAX2(dst_format_desc->block.height, src_format_desc->block.height);
+ x_step = MAX2(dst_format_desc->block.width, src_format_desc->block.width);
assert(y_step % dst_format_desc->block.height == 0);
assert(y_step % src_format_desc->block.height == 0);
@@ -237,7 +294,7 @@ util_format_translate(enum pipe_format dst_format,
unsigned tmp_stride;
uint8_t *tmp_row;
- tmp_stride = width * 4 * sizeof *tmp_row;
+ tmp_stride = MAX2(width, x_step) * 4 * sizeof *tmp_row;
tmp_row = MALLOC(y_step * tmp_stride);
if (!tmp_row)
return;
@@ -262,7 +319,7 @@ util_format_translate(enum pipe_format dst_format,
unsigned tmp_stride;
float *tmp_row;
- tmp_stride = width * 4 * sizeof *tmp_row;
+ tmp_stride = MAX2(width, x_step) * 4 * sizeof *tmp_row;
tmp_row = MALLOC(y_step * tmp_stride);
if (!tmp_row)
return;
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 605b13bd114..03b73c0e98f 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -213,6 +213,16 @@ struct util_format_description
unsigned width, unsigned height);
/**
+ * Fetch a single pixel (i, j) from a block.
+ *
+ * XXX: Only defined for a very few select formats.
+ */
+ void
+ (*fetch_rgba_8unorm)(uint8_t *dst,
+ const uint8_t *src,
+ unsigned i, unsigned j);
+
+ /**
* Unpack pixel blocks to R32G32B32A32_FLOAT.
* Note: strides are in bytes.
*
@@ -332,12 +342,60 @@ util_format_name(enum pipe_format format)
assert(desc);
if (!desc) {
- return "???";
+ return "PIPE_FORMAT_???";
}
return desc->name;
}
+static INLINE const char *
+util_format_short_name(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ assert(desc);
+ if (!desc) {
+ return "???";
+ }
+
+ return desc->short_name;
+}
+
+/**
+ * Whether this format is plain, see UTIL_FORMAT_LAYOUT_PLAIN for more info.
+ */
+static INLINE boolean
+util_format_is_plain(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (!format) {
+ return FALSE;
+ }
+
+ return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ? TRUE : FALSE;
+}
+
+static INLINE boolean
+util_format_is_compressed(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ assert(desc);
+ if (!desc) {
+ return FALSE;
+ }
+
+ switch (desc->layout) {
+ case UTIL_FORMAT_LAYOUT_S3TC:
+ case UTIL_FORMAT_LAYOUT_RGTC:
+ /* XXX add other formats in the future */
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
static INLINE boolean
util_format_is_s3tc(enum pipe_format format)
{
@@ -382,6 +440,48 @@ util_format_is_depth_and_stencil(enum pipe_format format)
desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE) ? TRUE : FALSE;
}
+
+/**
+ * Give the RGBA colormask of the channels that can be represented in this
+ * format.
+ *
+ * That is, the channels whose values are preserved.
+ */
+static INLINE unsigned
+util_format_colormask(const struct util_format_description *desc)
+{
+ unsigned colormask;
+ unsigned chan;
+
+ switch (desc->colorspace) {
+ case UTIL_FORMAT_COLORSPACE_RGB:
+ case UTIL_FORMAT_COLORSPACE_SRGB:
+ case UTIL_FORMAT_COLORSPACE_YUV:
+ colormask = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ if (desc->swizzle[chan] < 4) {
+ colormask |= (1 << chan);
+ }
+ }
+ return colormask;
+ case UTIL_FORMAT_COLORSPACE_ZS:
+ return 0;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+/**
+ * Whether the src format can be blitted to destation format with a simple
+ * memcpy.
+ */
+boolean
+util_is_format_compatible(const struct util_format_description *src_desc,
+ const struct util_format_description *dst_desc);
+
+
/**
* Whether this format is a rgab8 variant.
*
@@ -573,6 +673,44 @@ util_format_has_alpha(enum pipe_format format)
}
/**
+ * Return the matching SRGB format, or PIPE_FORMAT_NONE if none.
+ */
+static INLINE enum pipe_format
+util_format_srgb(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_L8_UNORM:
+ return PIPE_FORMAT_L8_SRGB;
+ case PIPE_FORMAT_L8A8_UNORM:
+ return PIPE_FORMAT_L8A8_SRGB;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return PIPE_FORMAT_R8G8B8_SRGB;
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ return PIPE_FORMAT_A8B8G8R8_SRGB;
+ case PIPE_FORMAT_X8B8G8R8_UNORM:
+ return PIPE_FORMAT_X8B8G8R8_SRGB;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return PIPE_FORMAT_B8G8R8A8_SRGB;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ return PIPE_FORMAT_B8G8R8X8_SRGB;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return PIPE_FORMAT_A8R8G8B8_SRGB;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return PIPE_FORMAT_X8R8G8B8_SRGB;
+ case PIPE_FORMAT_DXT1_RGB:
+ return PIPE_FORMAT_DXT1_SRGB;
+ case PIPE_FORMAT_DXT1_RGBA:
+ return PIPE_FORMAT_DXT1_SRGBA;
+ case PIPE_FORMAT_DXT3_RGBA:
+ return PIPE_FORMAT_DXT3_SRGBA;
+ case PIPE_FORMAT_DXT5_RGBA:
+ return PIPE_FORMAT_DXT5_SRGBA;
+ default:
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+/**
* Return the number of components stored.
* Formats with block size != 1x1 will always have 1 component (the block).
*/
@@ -615,6 +753,9 @@ util_format_write_4ub(enum pipe_format format,
* Generic format conversion;
*/
+boolean
+util_format_fits_8unorm(const struct util_format_description *format_desc);
+
void
util_format_translate(enum pipe_format dst_format,
void *dst, unsigned dst_stride,
diff --git a/src/gallium/auxiliary/util/u_format_other.c b/src/gallium/auxiliary/util/u_format_other.c
index 723fa8c3bf9..fa42ec37138 100644
--- a/src/gallium/auxiliary/util/u_format_other.c
+++ b/src/gallium/auxiliary/util/u_format_other.c
@@ -121,6 +121,15 @@ util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
* A.k.a. D3DFMT_CxV8U8
*/
+static uint8_t
+r8g8bx_derive(int16_t r, int16_t g)
+{
+ /* Derive blue from red and green components.
+ * Apparently, we must always use integers to perform calculations,
+ * otherwise the results won't match D3D's CxV8U8 definition.
+ */
+ return (uint8_t)sqrtf(0x7f * 0x7f - r * r - g * g) * 0xff / 0x7f;
+}
void
util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
@@ -145,7 +154,7 @@ util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
dst[0] = (float)(r * (1.0f/0x7f)); /* r */
dst[1] = (float)(g * (1.0f/0x7f)); /* g */
- dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */
+ dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */
dst[3] = 1.0f; /* a */
dst += 4;
}
@@ -177,7 +186,7 @@ util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_strid
dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */
dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */
- dst[2] = (uint8_t)sqrtf(0x7f*0x7f - r * r - g * g) * 0xff / 0x7f; /* b */
+ dst[2] = r8g8bx_derive(r, g); /* b */
dst[3] = 255; /* a */
dst += 4;
}
@@ -262,6 +271,6 @@ util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src,
dst[0] = r * (1.0f/0x7f); /* r */
dst[1] = g * (1.0f/0x7f); /* g */
- dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */
+ dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */
dst[3] = 1.0f; /* a */
}
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index 0c1bbc84c17..6d0016c0ad8 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -37,9 +37,6 @@
'''
-import sys
-import math
-
from u_format_parse import *
diff --git a/src/gallium/auxiliary/util/u_format_parse.py b/src/gallium/auxiliary/util/u_format_parse.py
index 7076c676aaf..ddb9f2443d9 100755
--- a/src/gallium/auxiliary/util/u_format_parse.py
+++ b/src/gallium/auxiliary/util/u_format_parse.py
@@ -43,7 +43,7 @@ ZS = 'zs'
def is_pot(x):
- return (x & (x - 1)) == 0;
+ return (x & (x - 1)) == 0
VERY_LARGE = 99999999999999999999999
diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c
index 5b279b8fe26..bb989c29d81 100644
--- a/src/gallium/auxiliary/util/u_format_s3tc.c
+++ b/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -120,7 +120,7 @@ util_format_s3tc_init(void)
library = util_dl_open(DXTN_LIBNAME);
if (!library) {
debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
- "compression/decompression unavailable");
+ "compression/decompression unavailable\n");
return;
}
@@ -142,7 +142,7 @@ util_format_s3tc_init(void)
!util_format_dxtn_pack) {
debug_printf("couldn't reference all symbols in " DXTN_LIBNAME
", software DXTn compression/decompression "
- "unavailable");
+ "unavailable\n");
util_dl_close(library);
return;
}
diff --git a/src/gallium/auxiliary/util/u_format_srgb.py b/src/gallium/auxiliary/util/u_format_srgb.py
index a4c76dc00b3..3e8000f3687 100644
--- a/src/gallium/auxiliary/util/u_format_srgb.py
+++ b/src/gallium/auxiliary/util/u_format_srgb.py
@@ -39,7 +39,6 @@
'''
-import sys
import math
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index ae9a5981973..f0b407b8b8e 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -132,12 +132,17 @@ def write_format_table(formats):
if format.colorspace != ZS:
print " &util_format_%s_unpack_rgba_8unorm," % format.short_name()
print " &util_format_%s_pack_rgba_8unorm," % format.short_name()
+ if format.layout == 's3tc':
+ print " &util_format_%s_fetch_rgba_8unorm," % format.short_name()
+ else:
+ print " NULL, /* fetch_rgba_8unorm */"
print " &util_format_%s_unpack_rgba_float," % format.short_name()
print " &util_format_%s_pack_rgba_float," % format.short_name()
print " &util_format_%s_fetch_rgba_float," % format.short_name()
else:
print " NULL, /* unpack_rgba_8unorm */"
print " NULL, /* pack_rgba_8unorm */"
+ print " NULL, /* fetch_rgba_8unorm */"
print " NULL, /* unpack_rgba_float */"
print " NULL, /* pack_rgba_float */"
print " NULL, /* fetch_rgba_float */"
diff --git a/src/gallium/auxiliary/util/u_framebuffer.c b/src/gallium/auxiliary/util/u_framebuffer.c
new file mode 100644
index 00000000000..7803ec6a8b5
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_framebuffer.c
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright 2009-2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Framebuffer utility functions.
+ *
+ * @author Brian Paul
+ */
+
+
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+
+#include "util/u_memory.h"
+#include "util/u_framebuffer.h"
+
+
+/**
+ * Compare pipe_framebuffer_state objects.
+ * \return TRUE if same, FALSE if different
+ */
+boolean
+util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst,
+ const struct pipe_framebuffer_state *src)
+{
+ unsigned i;
+
+ if (dst->width != src->width ||
+ dst->height != src->height)
+ return FALSE;
+
+ for (i = 0; i < Elements(src->cbufs); i++) {
+ if (dst->cbufs[i] != src->cbufs[i]) {
+ return FALSE;
+ }
+ }
+
+ if (dst->nr_cbufs != src->nr_cbufs) {
+ return FALSE;
+ }
+
+ if (dst->zsbuf != src->zsbuf) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Copy framebuffer state from src to dst, updating refcounts.
+ */
+void
+util_copy_framebuffer_state(struct pipe_framebuffer_state *dst,
+ const struct pipe_framebuffer_state *src)
+{
+ unsigned i;
+
+ dst->width = src->width;
+ dst->height = src->height;
+
+ for (i = 0; i < src->nr_cbufs; i++)
+ pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
+
+ for (i = src->nr_cbufs; i < dst->nr_cbufs; i++)
+ pipe_surface_reference(&dst->cbufs[i], NULL);
+
+ dst->nr_cbufs = src->nr_cbufs;
+
+ pipe_surface_reference(&dst->zsbuf, src->zsbuf);
+}
+
+
+void
+util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb)
+{
+ unsigned i;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ pipe_surface_reference(&fb->cbufs[i], NULL);
+ }
+
+ pipe_surface_reference(&fb->zsbuf, NULL);
+
+ fb->width = fb->height = 0;
+ fb->nr_cbufs = 0;
+}
+
+
+/* Where multiple sizes are allowed for framebuffer surfaces, find the
+ * minimum width and height of all bound surfaces.
+ */
+boolean
+util_framebuffer_min_size(const struct pipe_framebuffer_state *fb,
+ unsigned *width,
+ unsigned *height)
+{
+ unsigned w = ~0;
+ unsigned h = ~0;
+ unsigned i;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ w = MIN2(w, fb->cbufs[i]->width);
+ h = MIN2(h, fb->cbufs[i]->height);
+ }
+
+ if (fb->zsbuf) {
+ w = MIN2(w, fb->zsbuf->width);
+ h = MIN2(h, fb->zsbuf->height);
+ }
+
+ if (w == ~0) {
+ *width = 0;
+ *height = 0;
+ return FALSE;
+ }
+ else {
+ *width = w;
+ *height = h;
+ return TRUE;
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_framebuffer.h b/src/gallium/auxiliary/util/u_framebuffer.h
new file mode 100644
index 00000000000..e7dc1e9e41d
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_framebuffer.h
@@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2009-2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef U_FRAMEBUFFER_H
+#define U_FRAMEBUFFER_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+
+extern boolean
+util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst,
+ const struct pipe_framebuffer_state *src);
+
+extern void
+util_copy_framebuffer_state(struct pipe_framebuffer_state *dst,
+ const struct pipe_framebuffer_state *src);
+
+
+extern void
+util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb);
+
+
+extern boolean
+util_framebuffer_min_size(const struct pipe_framebuffer_state *fb,
+ unsigned *width,
+ unsigned *height);
+
+#endif /* U_FRAMEBUFFER_H */
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index eee6030ddcc..6a931a95819 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -1255,6 +1255,7 @@ fallback_gen_mipmap(struct gen_mipmap_state *ctx,
make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel);
break;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel);
break;
@@ -1295,8 +1296,7 @@ util_create_gen_mipmap(struct pipe_context *pipe,
/* rasterizer */
memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
- ctx->rasterizer.front_winding = PIPE_WINDING_CW;
- ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
+ ctx->rasterizer.cull_face = PIPE_FACE_NONE;
ctx->rasterizer.gl_rasterization_rules = 1;
/* sampler state */
@@ -1328,8 +1328,10 @@ util_create_gen_mipmap(struct pipe_context *pipe,
}
/* fragment shader */
- ctx->fs2d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
- ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE);
+ ctx->fs2d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR);
+ ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE,
+ TGSI_INTERPOLATE_LINEAR);
/* vertex data that doesn't change */
for (i = 0; i < 4; i++) {
@@ -1494,7 +1496,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
/* check if we can render in the texture's format */
if (!screen->is_format_supported(screen, psv->format, PIPE_TEXTURE_2D,
- PIPE_BIND_RENDER_TARGET, 0)) {
+ pt->nr_samples, PIPE_BIND_RENDER_TARGET, 0)) {
fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel);
return;
}
diff --git a/src/gallium/auxiliary/util/u_half.py b/src/gallium/auxiliary/util/u_half.py
index 8007482e971..915cf3b9273 100644
--- a/src/gallium/auxiliary/util/u_half.py
+++ b/src/gallium/auxiliary/util/u_half.py
@@ -83,11 +83,11 @@ for i in xrange(1, 1024):
# normalize number
while (m & 0x00800000) == 0:
- e -= 0x00800000;
- m <<= 1;
+ e -= 0x00800000
+ m <<= 1
- m &= ~0x00800000;
- e += 0x38800000;
+ m &= ~0x00800000
+ e += 0x38800000
value(m | e)
# normals
diff --git a/src/gallium/auxiliary/util/u_index_modify.c b/src/gallium/auxiliary/util/u_index_modify.c
new file mode 100644
index 00000000000..65b079ed537
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_index_modify.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2010 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "pipe/p_context.h"
+#include "util/u_index_modify.h"
+#include "util/u_inlines.h"
+
+void util_shorten_ubyte_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start,
+ unsigned count)
+{
+ struct pipe_screen* screen = context->screen;
+ struct pipe_resource* new_elts;
+ unsigned char *in_map;
+ unsigned short *out_map;
+ struct pipe_transfer *src_transfer, *dst_transfer;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer);
+ out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer);
+
+ in_map += start;
+
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, src_transfer);
+ pipe_buffer_unmap(context, new_elts, dst_transfer);
+
+ *elts = new_elts;
+}
+
+void util_rebuild_ushort_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count)
+{
+ struct pipe_transfer *in_transfer = NULL;
+ struct pipe_transfer *out_transfer = NULL;
+ struct pipe_resource *new_elts;
+ unsigned short *in_map;
+ unsigned short *out_map;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(context->screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts,
+ PIPE_TRANSFER_READ, &in_transfer);
+ out_map = pipe_buffer_map(context, new_elts,
+ PIPE_TRANSFER_WRITE, &out_transfer);
+
+ in_map += start;
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, in_transfer);
+ pipe_buffer_unmap(context, new_elts, out_transfer);
+
+ *elts = new_elts;
+}
+
+void util_rebuild_uint_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count)
+{
+ struct pipe_transfer *in_transfer = NULL;
+ struct pipe_transfer *out_transfer = NULL;
+ struct pipe_resource *new_elts;
+ unsigned int *in_map;
+ unsigned int *out_map;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(context->screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts,
+ PIPE_TRANSFER_READ, &in_transfer);
+ out_map = pipe_buffer_map(context, new_elts,
+ PIPE_TRANSFER_WRITE, &out_transfer);
+
+ in_map += start;
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned int)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, in_transfer);
+ pipe_buffer_unmap(context, new_elts, out_transfer);
+
+ *elts = new_elts;
+}
diff --git a/src/gallium/auxiliary/util/u_index_modify.h b/src/gallium/auxiliary/util/u_index_modify.h
new file mode 100644
index 00000000000..01a6cae94fc
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_index_modify.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2010 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef UTIL_INDEX_MODIFY_H
+#define UTIL_INDEX_MODIFY_H
+
+void util_shorten_ubyte_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start,
+ unsigned count);
+
+void util_rebuild_ushort_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count);
+
+void util_rebuild_uint_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count);
+#endif
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index a48689ee8be..6ed39561fbe 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -33,6 +33,8 @@
#include "pipe/p_state.h"
#include "pipe/p_screen.h"
#include "util/u_debug.h"
+#include "util/u_debug_describe.h"
+#include "util/u_debug_refcnt.h"
#include "util/u_atomic.h"
#include "util/u_box.h"
#include "util/u_math.h"
@@ -67,7 +69,9 @@ pipe_is_referenced(struct pipe_reference *reference)
* \return TRUE if the object's refcount hits zero and should be destroyed.
*/
static INLINE boolean
-pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
+pipe_reference_described(struct pipe_reference *ptr,
+ struct pipe_reference *reference,
+ debug_reference_descriptor get_desc)
{
boolean destroy = FALSE;
@@ -76,6 +80,7 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
if (reference) {
assert(pipe_is_referenced(reference));
p_atomic_inc(&reference->count);
+ debug_reference(reference, get_desc, 1);
}
if (ptr) {
@@ -83,41 +88,49 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
if (p_atomic_dec_zero(&ptr->count)) {
destroy = TRUE;
}
+ debug_reference(ptr, get_desc, -1);
}
}
return destroy;
}
+static INLINE boolean
+pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
+{
+ return pipe_reference_described(ptr, reference,
+ (debug_reference_descriptor)debug_describe_reference);
+}
static INLINE void
pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
{
struct pipe_surface *old_surf = *ptr;
- if (pipe_reference(&(*ptr)->reference, &surf->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &surf->reference,
+ (debug_reference_descriptor)debug_describe_surface))
old_surf->texture->screen->tex_surface_destroy(old_surf);
*ptr = surf;
}
-
static INLINE void
pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
{
struct pipe_resource *old_tex = *ptr;
- if (pipe_reference(&(*ptr)->reference, &tex->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &tex->reference,
+ (debug_reference_descriptor)debug_describe_resource))
old_tex->screen->resource_destroy(old_tex->screen, old_tex);
*ptr = tex;
}
-
static INLINE void
pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view)
{
struct pipe_sampler_view *old_view = *ptr;
- if (pipe_reference(&(*ptr)->reference, &view->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &view->reference,
+ (debug_reference_descriptor)debug_describe_sampler_view))
old_view->context->sampler_view_destroy(old_view->context, old_view);
*ptr = view;
}
@@ -369,6 +382,23 @@ pipe_transfer_destroy( struct pipe_context *context,
}
+static INLINE boolean util_get_offset(
+ const struct pipe_rasterizer_state *templ,
+ unsigned fill_mode)
+{
+ switch(fill_mode) {
+ case PIPE_POLYGON_MODE_POINT:
+ return templ->offset_point;
+ case PIPE_POLYGON_MODE_LINE:
+ return templ->offset_line;
+ case PIPE_POLYGON_MODE_FILL:
+ return templ->offset_tri;
+ default:
+ assert(0);
+ return FALSE;
+ }
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h
index 42c40b2aa75..81ffc9fb27d 100644
--- a/src/gallium/auxiliary/util/u_linear.h
+++ b/src/gallium/auxiliary/util/u_linear.h
@@ -33,6 +33,7 @@
#ifndef U_LINEAR_H
#define U_LINEAR_H
+#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
struct u_linear_format_block
diff --git a/src/gallium/auxiliary/util/u_linkage.c b/src/gallium/auxiliary/util/u_linkage.c
new file mode 100644
index 00000000000..2f6f41ba843
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linkage.c
@@ -0,0 +1,149 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "util/u_linkage.h"
+
+/* we must only record the registers that are actually used, not just declared */
+static INLINE boolean
+util_semantic_set_test_and_set(struct util_semantic_set *set, unsigned value)
+{
+ unsigned mask = 1 << (value % (sizeof(long) * 8));
+ unsigned long *p = &set->masks[value / (sizeof(long) * 8)];
+ unsigned long v = *p & mask;
+ *p |= mask;
+ return !!v;
+}
+
+unsigned
+util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file)
+{
+ struct tgsi_shader_info info;
+ struct tgsi_parse_context parse;
+ unsigned count = 0;
+ ubyte *semantic_name;
+ ubyte *semantic_index;
+
+ tgsi_scan_shader(tokens, &info);
+
+ if(file == TGSI_FILE_INPUT)
+ {
+ semantic_name = info.input_semantic_name;
+ semantic_index = info.input_semantic_index;
+ }
+ else if(file == TGSI_FILE_OUTPUT)
+ {
+ semantic_name = info.output_semantic_name;
+ semantic_index = info.output_semantic_index;
+ }
+ else
+ {
+ assert(0);
+ semantic_name = NULL;
+ semantic_index = NULL;
+ }
+
+ tgsi_parse_init(&parse, tokens);
+
+ memset(set->masks, 0, sizeof(set->masks));
+ while(!tgsi_parse_end_of_tokens(&parse))
+ {
+ tgsi_parse_token(&parse);
+
+ if(parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION)
+ {
+ const struct tgsi_full_instruction *finst = &parse.FullToken.FullInstruction;
+ unsigned i;
+ for(i = 0; i < finst->Instruction.NumDstRegs; ++i)
+ {
+ if(finst->Dst[i].Register.File == file)
+ {
+ unsigned idx = finst->Dst[i].Register.Index;
+ if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
+ {
+ if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
+ ++count;
+ }
+ }
+ }
+
+ for(i = 0; i < finst->Instruction.NumSrcRegs; ++i)
+ {
+ if(finst->Src[i].Register.File == file)
+ {
+ unsigned idx = finst->Src[i].Register.Index;
+ if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
+ {
+ if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
+ ++count;
+ }
+ }
+ }
+ }
+ }
+ tgsi_parse_free(&parse);
+
+ return count;
+}
+
+#define UTIL_SEMANTIC_SET_FOR_EACH(i, set) for(i = 0; i < 256; ++i) if(set->masks[i / (sizeof(long) * 8)] & (1 << (i % (sizeof(long) * 8))))
+
+void
+util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots)
+{
+ int first = -1;
+ int last = -1;
+ unsigned i;
+
+ memset(layout, 0xff, num_slots);
+
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ {
+ if(first < 0)
+ first = i;
+ last = i;
+ }
+
+ if(last < efficient_slots)
+ {
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ layout[i] = i;
+ }
+ else if((last - first) < efficient_slots)
+ {
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ layout[i - first] = i;
+ }
+ else
+ {
+ unsigned idx = 0;
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ layout[idx++] = i;
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_linkage.h b/src/gallium/auxiliary/util/u_linkage.h
new file mode 100644
index 00000000000..4720e0ee603
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linkage.h
@@ -0,0 +1,66 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_LINKAGE_H_
+#define U_LINKAGE_H_
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+
+struct util_semantic_set
+{
+ unsigned long masks[256 / 8 / sizeof(unsigned long)];
+};
+
+static INLINE bool
+util_semantic_set_contains(struct util_semantic_set *set, unsigned char value)
+{
+ return !!(set->masks[value / (sizeof(long) * 8)] & (1 << (value / (sizeof(long) * 8))));
+}
+
+unsigned util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file);
+
+/* efficient_slots is the number of slots such that hardware performance is
+ * the same for using that amount, with holes, or less slots but with less
+ * holes.
+ *
+ * num_slots is the size of the layout array and hardware limit instead.
+ *
+ * efficient_slots == 0 or efficient_solts == num_slots are typical settings.
+ */
+void util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots);
+
+static INLINE void
+util_semantic_table_from_layout(unsigned char *table, unsigned char *layout, unsigned char first_slot_value, unsigned char num_slots)
+{
+ int i;
+ memset(table, 0xff, sizeof(table));
+
+ for(i = 0; i < num_slots; ++i)
+ table[layout[i]] = first_slot_value + i;
+}
+
+#endif /* U_LINKAGE_H_ */
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index d1ec13def30..69a76814945 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -168,6 +168,9 @@ static INLINE float logf( float f )
#undef logf
#define logf(x) ((float)log((double)(x)))
#endif /* logf */
+
+#define isfinite(x) _finite((double)(x))
+#define isnan(x) _isnan((double)(x))
#endif
static INLINE double log2( double x )
@@ -335,26 +338,25 @@ util_iround(float f)
}
-
/**
- * Test if x is NaN or +/- infinity.
+ * Approximate floating point comparison
*/
static INLINE boolean
-util_is_inf_or_nan(float x)
+util_is_approx(float a, float b, float tol)
{
- union fi tmp;
- tmp.f = x;
- return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31);
+ return fabs(b - a) <= tol;
}
/**
- * Test whether x is a power of two.
+ * Test if x is NaN or +/- infinity.
*/
static INLINE boolean
-util_is_pot(unsigned x)
+util_is_inf_or_nan(float x)
{
- return (x & (x - 1)) == 0;
+ union fi tmp;
+ tmp.f = x;
+ return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31);
}
@@ -554,13 +556,30 @@ util_bswap16(uint16_t n)
#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C )
#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C )
+#define MIN4( A, B, C, D ) MIN2( MIN2( A, B ), MIN2(C, D) )
+#define MAX4( A, B, C, D ) MAX2( MAX2( A, B ), MAX2(C, D) )
+
+/**
+ * Align a value, only works pot alignemnts.
+ */
static INLINE int
align(int value, int alignment)
{
return (value + alignment - 1) & ~(alignment - 1);
}
+/**
+ * Works like align but on npot alignments.
+ */
+static INLINE size_t
+util_align_npot(size_t value, size_t alignment)
+{
+ if (value % alignment)
+ return value + (alignment - (value % alignment));
+ return value;
+}
+
static INLINE unsigned
u_minify(unsigned value, unsigned levels)
{
diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c
new file mode 100644
index 00000000000..1f336b39a1a
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_mempool.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2010 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "util/u_mempool.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+
+#include <stdio.h>
+
+#define UTIL_MEMPOOL_MAGIC 0xcafe4321
+
+/* The block is either allocated memory or free space. */
+struct util_mempool_block {
+ /* The header. */
+ /* The first next free block. */
+ struct util_mempool_block *next_free;
+
+ intptr_t magic;
+
+ /* Memory after the last member is dedicated to the block itself.
+ * The allocated size is always larger than this structure. */
+};
+
+static struct util_mempool_block *
+util_mempool_get_block(struct util_mempool *pool,
+ struct util_mempool_page *page, unsigned index)
+{
+ return (struct util_mempool_block*)
+ ((uint8_t*)page + sizeof(struct util_mempool_page) +
+ (pool->block_size * index));
+}
+
+static void util_mempool_add_new_page(struct util_mempool *pool)
+{
+ struct util_mempool_page *page;
+ struct util_mempool_block *block;
+ int i;
+
+ page = MALLOC(pool->page_size);
+ insert_at_tail(&pool->list, page);
+
+ /* Mark all blocks as free. */
+ for (i = 0; i < pool->num_blocks-1; i++) {
+ block = util_mempool_get_block(pool, page, i);
+ block->next_free = util_mempool_get_block(pool, page, i+1);
+ block->magic = UTIL_MEMPOOL_MAGIC;
+ }
+
+ block = util_mempool_get_block(pool, page, pool->num_blocks-1);
+ block->next_free = pool->first_free;
+ block->magic = UTIL_MEMPOOL_MAGIC;
+ pool->first_free = util_mempool_get_block(pool, page, 0);
+ pool->num_pages++;
+
+#if 0
+ fprintf(stderr, "New page! Num of pages: %i\n", pool->num_pages);
+#endif
+}
+
+static void *util_mempool_malloc_st(struct util_mempool *pool)
+{
+ struct util_mempool_block *block;
+
+ if (!pool->first_free)
+ util_mempool_add_new_page(pool);
+
+ block = pool->first_free;
+ assert(block->magic == UTIL_MEMPOOL_MAGIC);
+ pool->first_free = block->next_free;
+
+ return (uint8_t*)block + sizeof(struct util_mempool_block);
+}
+
+static void util_mempool_free_st(struct util_mempool *pool, void *ptr)
+{
+ struct util_mempool_block *block =
+ (struct util_mempool_block*)
+ ((uint8_t*)ptr - sizeof(struct util_mempool_block));
+
+ assert(block->magic == UTIL_MEMPOOL_MAGIC);
+ block->next_free = pool->first_free;
+ pool->first_free = block;
+}
+
+static void *util_mempool_malloc_mt(struct util_mempool *pool)
+{
+ void *mem;
+
+ pipe_mutex_lock(pool->mutex);
+ mem = util_mempool_malloc_st(pool);
+ pipe_mutex_unlock(pool->mutex);
+ return mem;
+}
+
+static void util_mempool_free_mt(struct util_mempool *pool, void *ptr)
+{
+ pipe_mutex_lock(pool->mutex);
+ util_mempool_free_st(pool, ptr);
+ pipe_mutex_unlock(pool->mutex);
+}
+
+void util_mempool_set_thread_safety(struct util_mempool *pool,
+ enum util_mempool_threading threading)
+{
+ pool->threading = threading;
+
+ if (threading) {
+ pool->malloc = util_mempool_malloc_mt;
+ pool->free = util_mempool_free_mt;
+ } else {
+ pool->malloc = util_mempool_malloc_st;
+ pool->free = util_mempool_free_st;
+ }
+}
+
+void util_mempool_create(struct util_mempool *pool,
+ unsigned item_size,
+ unsigned num_blocks,
+ enum util_mempool_threading threading)
+{
+ item_size = align(item_size, sizeof(intptr_t));
+
+ pool->num_pages = 0;
+ pool->num_blocks = num_blocks;
+ pool->block_size = sizeof(struct util_mempool_block) + item_size;
+ pool->block_size = align(pool->block_size, sizeof(intptr_t));
+ pool->page_size = sizeof(struct util_mempool_page) +
+ num_blocks * pool->block_size;
+ pool->first_free = NULL;
+
+ make_empty_list(&pool->list);
+
+ pipe_mutex_init(pool->mutex);
+
+ util_mempool_set_thread_safety(pool, threading);
+}
+
+void util_mempool_destroy(struct util_mempool *pool)
+{
+ struct util_mempool_page *page, *temp;
+
+ foreach_s(page, temp, &pool->list) {
+ remove_from_list(page);
+ FREE(page);
+ }
+
+ pipe_mutex_destroy(pool->mutex);
+}
diff --git a/src/gallium/auxiliary/util/u_mempool.h b/src/gallium/auxiliary/util/u_mempool.h
new file mode 100644
index 00000000000..a5b5d6a9b7c
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_mempool.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2010 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * @file
+ * Simple memory pool for equally sized memory allocations.
+ * util_mempool_malloc and util_mempool_free are in O(1).
+ *
+ * Good for allocations which have very low lifetime and are allocated
+ * and freed very often. Use a profiler first!
+ *
+ * Candidates: get_transfer, user_buffer_create
+ *
+ * @author Marek Olšák
+ */
+
+#ifndef U_MEMPOOL_H
+#define U_MEMPOOL_H
+
+#include "os/os_thread.h"
+
+enum util_mempool_threading {
+ UTIL_MEMPOOL_SINGLETHREADED = FALSE,
+ UTIL_MEMPOOL_MULTITHREADED = TRUE
+};
+
+/* The page is an array of blocks (allocations). */
+struct util_mempool_page {
+ /* The header (linked-list pointers). */
+ struct util_mempool_page *prev, *next;
+
+ /* Memory after the last member is dedicated to the page itself.
+ * The allocated size is always larger than this structure. */
+};
+
+struct util_mempool {
+ /* Public members. */
+ void *(*malloc)(struct util_mempool *pool);
+ void (*free)(struct util_mempool *pool, void *ptr);
+
+ /* Private members. */
+ struct util_mempool_block *first_free;
+
+ struct util_mempool_page list;
+
+ unsigned block_size;
+ unsigned page_size;
+ unsigned num_blocks;
+ unsigned num_pages;
+ enum util_mempool_threading threading;
+
+ pipe_mutex mutex;
+};
+
+void util_mempool_create(struct util_mempool *pool,
+ unsigned item_size,
+ unsigned num_blocks,
+ enum util_mempool_threading threading);
+
+void util_mempool_destroy(struct util_mempool *pool);
+
+void util_mempool_set_thread_safety(struct util_mempool *pool,
+ enum util_mempool_threading threading);
+
+#define util_mempool_malloc(pool) (pool)->malloc(pool)
+#define util_mempool_free(pool, ptr) (pool)->free(pool, ptr)
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c
index 87ee0e47685..77f2c5fc7de 100644
--- a/src/gallium/auxiliary/util/u_network.c
+++ b/src/gallium/auxiliary/util/u_network.c
@@ -6,7 +6,7 @@
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# include <winsock2.h>
# include <windows.h>
-#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE)
+#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_CYGWIN)
# include <sys/socket.h>
# include <netinet/in.h>
# include <unistd.h>
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 3ebef9fb749..5378f2d782f 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -42,12 +42,18 @@
#include "util/u_math.h"
-
+/**
+ * Helper union for packing pixel values.
+ * Will often contain values in formats which are too complex to be described
+ * in simple terms, hence might just effectively contain a number of bytes.
+ * Must be big enough to hold data for all formats (currently 256 bits).
+ */
union util_color {
ubyte ub;
ushort us;
uint ui;
float f[4];
+ double d[4];
};
/**
@@ -388,7 +394,7 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
return;
case PIPE_FORMAT_B4G4R4A4_UNORM:
{
- uc->ub = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
+ uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
}
return;
case PIPE_FORMAT_A8_UNORM:
@@ -425,13 +431,65 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
}
}
+/* Integer versions of util_pack_z and util_pack_z_stencil - useful for
+ * constructing clear masks.
+ */
+static INLINE uint32_t
+util_pack_mask_z(enum pipe_format format, uint32_t z)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return z & 0xffff;
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ return z;
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return z & 0xffffff;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return (z & 0xffffff) << 8;
+ case PIPE_FORMAT_S8_USCALED:
+ return 0;
+ default:
+ debug_print_format("gallium: unhandled format in util_pack_mask_z()", format);
+ assert(0);
+ return 0;
+ }
+}
+
+static INLINE uint32_t
+util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
+{
+ uint32_t packed = util_pack_mask_z(format, z);
+
+ switch (format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ packed |= (uint32_t)s << 24;
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ packed |= s;
+ break;
+ case PIPE_FORMAT_S8_USCALED:
+ packed |= s;
+ break;
+ default:
+ break;
+ }
+
+ return packed;
+}
+
+
/**
* Note: it's assumed that z is in [0,1]
*/
-static INLINE uint
+static INLINE uint32_t
util_pack_z(enum pipe_format format, double z)
{
+ union fi fui;
+
if (z == 0.0)
return 0;
@@ -439,24 +497,25 @@ util_pack_z(enum pipe_format format, double z)
case PIPE_FORMAT_Z16_UNORM:
if (z == 1.0)
return 0xffff;
- return (uint) (z * 0xffff);
+ return (uint32_t) (z * 0xffff);
case PIPE_FORMAT_Z32_UNORM:
/* special-case to avoid overflow */
if (z == 1.0)
return 0xffffffff;
- return (uint) (z * 0xffffffff);
+ return (uint32_t) (z * 0xffffffff);
case PIPE_FORMAT_Z32_FLOAT:
- return (uint)z;
+ fui.f = (float)z;
+ return fui.ui;
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z24X8_UNORM:
if (z == 1.0)
return 0xffffff;
- return (uint) (z * 0xffffff);
+ return (uint32_t) (z * 0xffffff);
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
if (z == 1.0)
return 0xffffff00;
- return ((uint) (z * 0xffffff)) << 8;
+ return ((uint32_t) (z * 0xffffff)) << 8;
case PIPE_FORMAT_S8_USCALED:
/* this case can get it via util_pack_z_stencil() */
return 0;
@@ -472,14 +531,14 @@ util_pack_z(enum pipe_format format, double z)
* Pack Z and/or stencil values into a 32-bit value described by format.
* Note: it's assumed that z is in [0,1] and s in [0,255]
*/
-static INLINE uint
-util_pack_z_stencil(enum pipe_format format, double z, uint s)
+static INLINE uint32_t
+util_pack_z_stencil(enum pipe_format format, double z, uint8_t s)
{
- unsigned packed = util_pack_z(format, z);
+ uint32_t packed = util_pack_z(format, z);
switch (format) {
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- packed |= s << 24;
+ packed |= (uint32_t)s << 24;
break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
packed |= s;
diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h
index e1af9f11cb9..cce0c7430e7 100644
--- a/src/gallium/auxiliary/util/u_pointer.h
+++ b/src/gallium/auxiliary/util/u_pointer.h
@@ -98,6 +98,29 @@ align16( void *unaligned )
return align_pointer( unaligned, 16 );
}
+typedef void (*func_pointer)(void);
+
+static INLINE func_pointer
+pointer_to_func( void *p )
+{
+ union {
+ void *p;
+ func_pointer f;
+ } pf;
+ pf.p = p;
+ return pf.f;
+}
+
+static INLINE void *
+func_to_pointer( func_pointer f )
+{
+ union {
+ void *p;
+ func_pointer f;
+ } pf;
+ pf.f = f;
+ return pf.p;
+}
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h
index 64390e13851..3c851f73401 100644
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@@ -108,6 +108,20 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
ok = (*nr >= 4);
*nr -= (*nr % 2);
break;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ ok = (*nr >= 4);
+ *nr -= (*nr % 4);
+ break;
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ ok = (*nr >= 4);
+ break;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ ok = (*nr >= 6);
+ *nr -= (*nr % 5);
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ ok = (*nr >= 4);
+ break;
default:
ok = 0;
break;
@@ -169,6 +183,52 @@ u_vertices_per_prim(int primitive)
}
}
+/**
+ * Returns the number of decomposed primitives for the given
+ * vertex count.
+ * Geometry shader is invoked once for each triangle in
+ * triangle strip, triangle fans and triangles and once
+ * for each line in line strip, line loop, lines.
+ */
+static INLINE unsigned
+u_gs_prims_for_vertices(int primitive, int vertices)
+{
+ switch(primitive) {
+ case PIPE_PRIM_POINTS:
+ return vertices;
+ case PIPE_PRIM_LINES:
+ return vertices / 2;
+ case PIPE_PRIM_LINE_LOOP:
+ return vertices;
+ case PIPE_PRIM_LINE_STRIP:
+ return vertices - 1;
+ case PIPE_PRIM_TRIANGLES:
+ return vertices / 3;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return vertices - 2;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return vertices - 2;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ return vertices / 2;
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ return vertices - 1;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ return vertices / 3;
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return vertices - 2;
+
+ /* following primitives should never be used
+ * with geometry shaders abd their size is
+ * undefined */
+ case PIPE_PRIM_POLYGON:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ default:
+ debug_printf("Unrecognized geometry shader primitive");
+ return 3;
+ }
+}
+
const char *u_prim_name( unsigned pipe_prim );
#endif
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index 098cdfd58b1..56fcfac0693 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -30,13 +30,9 @@
*/
-#include "pipe/p_defines.h"
-#include "pipe/p_format.h"
-#include "pipe/p_context.h"
-#include "pipe/p_screen.h"
#include "util/u_format.h"
-#include "util/u_inlines.h"
#include "util/u_rect.h"
+#include "util/u_pack_color.h"
/**
@@ -99,7 +95,7 @@ util_fill_rect(ubyte * dst,
unsigned dst_y,
unsigned width,
unsigned height,
- uint32_t value)
+ union util_color *uc)
{
unsigned i, j;
unsigned width_size;
@@ -115,193 +111,54 @@ util_fill_rect(ubyte * dst,
dst_y /= blockheight;
width = (width + blockwidth - 1)/blockwidth;
height = (height + blockheight - 1)/blockheight;
-
+
dst += dst_x * blocksize;
dst += dst_y * dst_stride;
width_size = width * blocksize;
-
+
switch (blocksize) {
case 1:
if(dst_stride == width_size)
- memset(dst, (ubyte) value, height * width_size);
+ memset(dst, uc->ub, height * width_size);
else {
- for (i = 0; i < height; i++) {
- memset(dst, (ubyte) value, width_size);
- dst += dst_stride;
- }
+ for (i = 0; i < height; i++) {
+ memset(dst, uc->ub, width_size);
+ dst += dst_stride;
+ }
}
break;
case 2:
for (i = 0; i < height; i++) {
- uint16_t *row = (uint16_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = (uint16_t) value;
- dst += dst_stride;
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = uc->us;
+ dst += dst_stride;
}
break;
case 4:
for (i = 0; i < height; i++) {
- uint32_t *row = (uint32_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = value;
- dst += dst_stride;
+ uint32_t *row = (uint32_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = uc->ui;
+ dst += dst_stride;
}
break;
- default:
- assert(0);
- break;
- }
-}
-
-
-
-/**
- * Fallback function for pipe->surface_copy().
- * Note: (X,Y)=(0,0) is always the upper-left corner.
- * if do_flip, flip the image vertically on its way from src rect to dst rect.
- * XXX should probably put this in new u_surface.c file...
- */
-void
-util_surface_copy(struct pipe_context *pipe,
- boolean do_flip,
- struct pipe_surface *dst,
- unsigned dst_x, unsigned dst_y,
- struct pipe_surface *src,
- unsigned src_x, unsigned src_y,
- unsigned w, unsigned h)
-{
- struct pipe_transfer *src_trans, *dst_trans;
- void *dst_map;
- const void *src_map;
- enum pipe_format src_format, dst_format;
-
- assert(src->texture && dst->texture);
- if (!src->texture || !dst->texture)
- return;
-
- src_format = src->texture->format;
- dst_format = dst->texture->format;
-
- src_trans = pipe_get_transfer(pipe,
- src->texture,
- src->face,
- src->level,
- src->zslice,
- PIPE_TRANSFER_READ,
- src_x, src_y, w, h);
-
- dst_trans = pipe_get_transfer(pipe,
- dst->texture,
- dst->face,
- dst->level,
- dst->zslice,
- PIPE_TRANSFER_WRITE,
- dst_x, dst_y, w, h);
-
- assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format));
- assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format));
- assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format));
-
- src_map = pipe->transfer_map(pipe, src_trans);
- dst_map = pipe->transfer_map(pipe, dst_trans);
-
- assert(src_map);
- assert(dst_map);
-
- if (src_map && dst_map) {
- /* If do_flip, invert src_y position and pass negative src stride */
- util_copy_rect(dst_map,
- dst_format,
- dst_trans->stride,
- 0, 0,
- w, h,
- src_map,
- do_flip ? -(int) src_trans->stride : src_trans->stride,
- 0,
- do_flip ? h - 1 : 0);
- }
-
- pipe->transfer_unmap(pipe, src_trans);
- pipe->transfer_unmap(pipe, dst_trans);
-
- pipe->transfer_destroy(pipe, src_trans);
- pipe->transfer_destroy(pipe, dst_trans);
-}
-
-
-
-#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8))
-
-
-/**
- * Fallback for pipe->surface_fill() function.
- * XXX should probably put this in new u_surface.c file...
- */
-void
-util_surface_fill(struct pipe_context *pipe,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height, unsigned value)
-{
- struct pipe_transfer *dst_trans;
- void *dst_map;
-
- assert(dst->texture);
- if (!dst->texture)
- return;
- dst_trans = pipe_get_transfer(pipe,
- dst->texture,
- dst->face,
- dst->level,
- dst->zslice,
- PIPE_TRANSFER_WRITE,
- dstx, dsty, width, height);
-
- dst_map = pipe->transfer_map(pipe, dst_trans);
-
- assert(dst_map);
-
- if (dst_map) {
- assert(dst_trans->stride > 0);
-
- switch (util_format_get_blocksize(dst->texture->format)) {
- case 1:
- case 2:
- case 4:
- util_fill_rect(dst_map, dst->texture->format,
- dst_trans->stride,
- 0, 0, width, height, value);
- break;
- case 8:
- {
- /* expand the 4-byte clear value to an 8-byte value */
- ushort *row = (ushort *) dst_map;
- ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff);
- ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff);
- ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff);
- ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff);
- unsigned i, j;
- val0 = (val0 << 8) | val0;
- val1 = (val1 << 8) | val1;
- val2 = (val2 << 8) | val2;
- val3 = (val3 << 8) | val3;
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- row[j*4+0] = val0;
- row[j*4+1] = val1;
- row[j*4+2] = val2;
- row[j*4+3] = val3;
- }
- row += dst_trans->stride/2;
- }
+ case 8:
+ case 12:
+ case 16:
+ case 24:
+ case 32:
+ for (i = 0; i < height; i++) {
+ ubyte *row = dst;
+ for (j = 0; j < width; j++) {
+ memcpy(row, uc, blocksize);
+ row += blocksize;
+ }
+ dst += dst_stride;
}
break;
- default:
- assert(0);
- break;
- }
+ default:
+ assert(0);
+ break;
}
-
- pipe->transfer_unmap(pipe, dst_trans);
- pipe->transfer_destroy(pipe, dst_trans);
}
diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index b44d821904b..4cb90d3c316 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -26,20 +26,67 @@
**************************************************************************/
-/**
- * Pipe copy/fill rect helpers.
+#ifndef U_RECT_H
+#define U_RECT_H
+
+#include "pipe/p_compiler.h"
+
+struct u_rect {
+ int x0, x1;
+ int y0, y1;
+};
+
+/* Do two rectangles intersect?
*/
+static INLINE boolean
+u_rect_test_intersection(const struct u_rect *a,
+ const struct u_rect *b)
+{
+ return (!(a->x1 < b->x0 ||
+ b->x1 < a->x0 ||
+ a->y1 < b->y0 ||
+ b->y1 < a->y0));
+}
+/* Find the intersection of two rectangles known to intersect.
+ */
+static INLINE void
+u_rect_find_intersection(const struct u_rect *a,
+ struct u_rect *b)
+{
+ /* Caller should verify intersection exists before calling.
+ */
+ if (b->x0 < a->x0) b->x0 = a->x0;
+ if (b->x1 > a->x1) b->x1 = a->x1;
+ if (b->y0 < a->y0) b->y0 = a->y0;
+ if (b->y1 > a->y1) b->y1 = a->y1;
+}
-#ifndef U_RECT_H
-#define U_RECT_H
+static INLINE void
+u_rect_possible_intersection(const struct u_rect *a,
+ struct u_rect *b)
+{
+ if (u_rect_test_intersection(a,b)) {
+ u_rect_find_intersection(a,b);
+ }
+ else {
+ b->x0 = b->x1 = b->y0 = b->y1 = 0;
+ }
+}
#include "pipe/p_format.h"
+#include "util/u_pack_color.h"
+
-struct pipe_context;
-struct pipe_surface;
+/**********************************************************************
+ * Pipe copy/fill rect helpers.
+ */
+
+/* These really should move to a different file:
+ */
+#include "pipe/p_format.h"
extern void
util_copy_rect(ubyte * dst, enum pipe_format format,
@@ -50,23 +97,7 @@ util_copy_rect(ubyte * dst, enum pipe_format format,
extern void
util_fill_rect(ubyte * dst, enum pipe_format format,
unsigned dst_stride, unsigned dst_x, unsigned dst_y,
- unsigned width, unsigned height, uint32_t value);
-
-
-extern void
-util_surface_copy(struct pipe_context *pipe,
- boolean do_flip,
- struct pipe_surface *dst,
- unsigned dst_x, unsigned dst_y,
- struct pipe_surface *src,
- unsigned src_x, unsigned src_y,
- unsigned w, unsigned h);
-
-extern void
-util_surface_fill(struct pipe_context *pipe,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height, unsigned value);
+ unsigned width, unsigned height, union util_color *uc);
#endif /* U_RECT_H */
diff --git a/src/gallium/auxiliary/util/u_simple_list.h b/src/gallium/auxiliary/util/u_simple_list.h
index f5f43b0faa2..fe59771371b 100644
--- a/src/gallium/auxiliary/util/u_simple_list.h
+++ b/src/gallium/auxiliary/util/u_simple_list.h
@@ -46,6 +46,8 @@
do { \
(elem)->next->prev = (elem)->prev; \
(elem)->prev->next = (elem)->next; \
+ (elem)->next = elem; \
+ (elem)->prev = elem; \
} while (0)
/**
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index 019dda767d0..58ef68377fc 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -37,6 +37,7 @@
#include "pipe/p_context.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
#include "util/u_simple_shaders.h"
#include "util/u_debug.h"
#include "tgsi/tgsi_ureg.h"
@@ -87,10 +88,15 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
* MOV OUT[0], IMM[0] // (if writemask != 0xf)
* TEX OUT[0].writemask, IN[0], SAMP[0], 2D;
* END;
+ *
+ * \param tex_target one of PIPE_TEXTURE_x
+ * \parma interp_mode either TGSI_INTERPOLATE_LINEAR or PERSPECTIVE
+ * \param writemask mask of TGSI_WRITEMASK_x
*/
void *
util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
unsigned tex_target,
+ unsigned interp_mode,
unsigned writemask )
{
struct ureg_program *ureg;
@@ -98,6 +104,9 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
struct ureg_src tex;
struct ureg_dst out;
+ assert(interp_mode == TGSI_INTERPOLATE_LINEAR ||
+ interp_mode == TGSI_INTERPOLATE_PERSPECTIVE);
+
ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
if (ureg == NULL)
return NULL;
@@ -106,7 +115,7 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
tex = ureg_DECL_fs_input( ureg,
TGSI_SEMANTIC_GENERIC, 0,
- TGSI_INTERPOLATE_PERSPECTIVE );
+ interp_mode );
out = ureg_DECL_output( ureg,
TGSI_SEMANTIC_COLOR,
@@ -133,10 +142,12 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
* \param tex_target one of PIPE_TEXTURE_x
*/
void *
-util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target )
+util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target,
+ unsigned interp_mode)
{
return util_make_fragment_tex_shader_writemask( pipe,
tex_target,
+ interp_mode,
TGSI_WRITEMASK_XYZW );
}
@@ -147,7 +158,8 @@ util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target )
*/
void *
util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
- unsigned tex_target)
+ unsigned tex_target,
+ unsigned interp_mode)
{
struct ureg_program *ureg;
struct ureg_src sampler;
@@ -163,7 +175,7 @@ util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
tex = ureg_DECL_fs_input( ureg,
TGSI_SEMANTIC_GENERIC, 0,
- TGSI_INTERPOLATE_PERSPECTIVE );
+ interp_mode );
out = ureg_DECL_output( ureg,
TGSI_SEMANTIC_COLOR,
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h
index 6e760942e25..4aa34bc4757 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.h
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -52,15 +52,18 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
extern void *
util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
unsigned tex_target,
+ unsigned interp_mode,
unsigned writemask);
extern void *
-util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target);
+util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target,
+ unsigned interp_mode);
extern void *
util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
- unsigned tex_target);
+ unsigned tex_target,
+ unsigned interp_mode);
extern void *
diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
new file mode 100644
index 00000000000..7f80fc12700
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -0,0 +1,114 @@
+/* Originally written by Ben Skeggs for the nv50 driver*/
+
+#ifndef U_SPLIT_PRIM_H
+#define U_SPLIT_PRIM_H
+
+#include "pipe/p_defines.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_debug.h"
+
+struct util_split_prim {
+ void *priv;
+ void (*emit)(void *priv, unsigned start, unsigned count);
+ void (*edge)(void *priv, boolean enabled);
+
+ unsigned mode;
+ unsigned start;
+ unsigned p_start;
+ unsigned p_end;
+
+ uint repeat_first:1;
+ uint close_first:1;
+ uint edgeflag_off:1;
+};
+
+static INLINE void
+util_split_prim_init(struct util_split_prim *s,
+ unsigned mode, unsigned start, unsigned count)
+{
+ if (mode == PIPE_PRIM_LINE_LOOP) {
+ s->mode = PIPE_PRIM_LINE_STRIP;
+ s->close_first = 1;
+ } else {
+ s->mode = mode;
+ s->close_first = 0;
+ }
+ s->start = start;
+ s->p_start = start;
+ s->p_end = start + count;
+ s->edgeflag_off = 0;
+ s->repeat_first = 0;
+}
+
+static INLINE boolean
+util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
+{
+ int repeat = 0;
+
+ if (s->repeat_first) {
+ s->emit(s->priv, s->start, 1);
+ max_verts--;
+ if (s->edgeflag_off) {
+ s->edge(s->priv, TRUE);
+ s->edgeflag_off = FALSE;
+ }
+ }
+
+ if ((s->p_end - s->p_start) + s->close_first <= max_verts) {
+ s->emit(s->priv, s->p_start, s->p_end - s->p_start);
+ if (s->close_first)
+ s->emit(s->priv, s->start, 1);
+ return TRUE;
+ }
+
+ switch (s->mode) {
+ case PIPE_PRIM_LINES:
+ max_verts &= ~1;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ repeat = 1;
+ break;
+ case PIPE_PRIM_POLYGON:
+ max_verts--;
+ s->emit(s->priv, s->p_start, max_verts);
+ s->edge(s->priv, FALSE);
+ s->emit(s->priv, s->p_start + max_verts, 1);
+ s->p_start += max_verts;
+ s->repeat_first = TRUE;
+ s->edgeflag_off = TRUE;
+ return FALSE;
+ case PIPE_PRIM_TRIANGLES:
+ max_verts = max_verts - (max_verts % 3);
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ /* to ensure winding stays correct, always split
+ * on an even number of generated triangles
+ */
+ max_verts = max_verts & ~1;
+ repeat = 2;
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ s->repeat_first = TRUE;
+ repeat = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ max_verts &= ~3;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ max_verts &= ~1;
+ repeat = 2;
+ break;
+ case PIPE_PRIM_POINTS:
+ break;
+ default:
+ /* TODO: implement adjacency primitives */
+ assert(0);
+ }
+
+ s->emit (s->priv, s->p_start, max_verts);
+ s->p_start += (max_verts - repeat);
+ return FALSE;
+}
+
+#endif /* U_SPLIT_PRIM_H */
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index e2a8491e62c..03198c91da4 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -41,7 +41,6 @@
#if defined(PIPE_ARCH_SSE)
-#include <xmmintrin.h>
#include <emmintrin.h>
@@ -72,6 +71,33 @@ _mm_castps_si128(__m128 a)
#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */
+
+#if defined(PIPE_ARCH_SSSE3)
+
+#include <tmmintrin.h>
+
+#else /* !PIPE_ARCH_SSSE3 */
+
+/**
+ * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
+ * where -mssse3 is not supported/enabled.
+ *
+ * MSVC will never get in here as its intrinsics support do not rely on
+ * compiler command line options.
+ */
+static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8(__m128i a, __m128i mask)
+{
+ __m128i result;
+ __asm__("pshufb %1, %0"
+ : "=x" (result)
+ : "xm" (mask), "0" (a));
+ return result;
+}
+
+#endif /* !PIPE_ARCH_SSSE3 */
+
+
#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
#endif /* U_SSE_H_ */
diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
new file mode 100644
index 00000000000..c5d68f8df86
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -0,0 +1,117 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_staging.h"
+#include "pipe/p_context.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+static void
+util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template)
+{
+ memset(template, 0, sizeof(struct pipe_resource));
+ if(pt->target != PIPE_BUFFER && depth <= 1)
+ template->target = PIPE_TEXTURE_RECT;
+ else
+ template->target = pt->target;
+ template->format = pt->format;
+ template->width0 = width;
+ template->height0 = height;
+ template->depth0 = depth;
+ template->last_level = 0;
+ template->nr_samples = pt->nr_samples;
+ template->bind = 0;
+ template->usage = PIPE_USAGE_STAGING;
+ template->flags = 0;
+}
+
+struct util_staging_transfer *
+util_staging_transfer_init(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ bool direct, struct util_staging_transfer *tx)
+{
+ struct pipe_screen *pscreen = pipe->screen;
+
+ struct pipe_resource staging_resource_template;
+
+ pipe_resource_reference(&tx->base.resource, pt);
+ tx->base.sr = sr;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ if (direct)
+ {
+ tx->staging_resource = pt;
+ return tx;
+ }
+
+ util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template);
+ tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template);
+ if (!tx->staging_resource)
+ {
+ pipe_resource_reference(&tx->base.resource, NULL);
+ FREE(tx);
+ return NULL;
+ }
+
+ if (usage & PIPE_TRANSFER_READ)
+ {
+ struct pipe_subresource dstsr;
+ unsigned zi;
+ dstsr.face = 0;
+ dstsr.level = 0;
+ for(zi = 0; zi < box->depth; ++zi)
+ pipe->resource_copy_region(pipe, tx->staging_resource, dstsr, 0, 0, 0, tx->base.resource, sr, box->x, box->y, box->z + zi, box->width, box->height);
+ }
+
+ return tx;
+}
+
+void
+util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+ struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx;
+
+ if (tx->staging_resource != tx->base.resource)
+ {
+ if(tx->base.usage & PIPE_TRANSFER_WRITE) {
+ struct pipe_subresource srcsr;
+ unsigned zi;
+ srcsr.face = 0;
+ srcsr.level = 0;
+ for(zi = 0; zi < tx->base.box.depth; ++zi)
+ pipe->resource_copy_region(pipe, tx->base.resource, tx->base.sr, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, tx->staging_resource, srcsr, 0, 0, 0, tx->base.box.width, tx->base.box.height);
+ }
+
+ pipe_resource_reference(&tx->staging_resource, NULL);
+ }
+
+ pipe_resource_reference(&ptx->resource, NULL);
+ FREE(ptx);
+}
diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h
new file mode 100644
index 00000000000..1aab78cc881
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_staging.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Direct3D 10/11 has no concept of transfers. Applications instead
+ * create resources with a STAGING or DYNAMIC usage, copy between them
+ * and the real resource and use Map to map the STAGING/DYNAMIC resource.
+ *
+ * This util module allows to implement Gallium drivers as a Direct3D
+ * driver would be implemented: transfers allocate a resource with
+ * PIPE_USAGE_STAGING, and copy the data between it and the real resource
+ * with resource_copy_region.
+ */
+
+#ifndef U_STAGING_H
+#define U_STAGING_H
+
+#include "pipe/p_state.h"
+
+struct util_staging_transfer {
+ struct pipe_transfer base;
+
+ /* if direct, same as base.resource, otherwise the temporary staging resource */
+ struct pipe_resource *staging_resource;
+};
+
+/* user must be stride, slice_stride and offset */
+/* pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING should be a good value to pass for direct */
+/* staging resource is currently created with PIPE_USAGE_STAGING */
+struct util_staging_transfer *
+util_staging_transfer_init(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ bool direct, struct util_staging_transfer *tx);
+
+void
+util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx);
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index 42440d0d673..f78b6838a72 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -32,13 +32,15 @@
*/
+#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_rect.h"
#include "util/u_surface.h"
+#include "util/u_pack_color.h"
/**
@@ -68,7 +70,7 @@ util_create_rgba_surface(struct pipe_screen *screen,
/* Choose surface format */
for (i = 0; rgbaFormats[i]; i++) {
if (screen->is_format_supported(screen, rgbaFormats[i],
- target, bind, 0)) {
+ target, 0, bind, 0)) {
format = rgbaFormats[i];
break;
}
@@ -118,70 +120,231 @@ util_destroy_rgba_surface(struct pipe_resource *texture,
/**
- * Compare pipe_framebuffer_state objects.
- * \return TRUE if same, FALSE if different
+ * Fallback function for pipe->resource_copy_region().
+ * Note: (X,Y)=(0,0) is always the upper-left corner.
*/
-boolean
-util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst,
- const struct pipe_framebuffer_state *src)
+void
+util_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dst_x, unsigned dst_y, unsigned dst_z,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned src_x, unsigned src_y, unsigned src_z,
+ unsigned w, unsigned h)
{
- unsigned i;
+ struct pipe_transfer *src_trans, *dst_trans;
+ void *dst_map;
+ const void *src_map;
+ enum pipe_format src_format, dst_format;
+
+ assert(src && dst);
+ if (!src || !dst)
+ return;
+
+ src_format = src->format;
+ dst_format = dst->format;
+
+ src_trans = pipe_get_transfer(pipe,
+ src,
+ subsrc.face,
+ subsrc.level,
+ src_z,
+ PIPE_TRANSFER_READ,
+ src_x, src_y, w, h);
+
+ dst_trans = pipe_get_transfer(pipe,
+ dst,
+ subdst.face,
+ subdst.level,
+ src_z,
+ PIPE_TRANSFER_WRITE,
+ dst_x, dst_y, w, h);
+
+ assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format));
+ assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format));
+ assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format));
+
+ src_map = pipe->transfer_map(pipe, src_trans);
+ dst_map = pipe->transfer_map(pipe, dst_trans);
+
+ assert(src_map);
+ assert(dst_map);
+
+ if (src_map && dst_map) {
+ util_copy_rect(dst_map,
+ dst_format,
+ dst_trans->stride,
+ 0, 0,
+ w, h,
+ src_map,
+ src_trans->stride,
+ 0,
+ 0);
+ }
- if (dst->width != src->width ||
- dst->height != src->height)
- return FALSE;
+ pipe->transfer_unmap(pipe, src_trans);
+ pipe->transfer_unmap(pipe, dst_trans);
- for (i = 0; i < Elements(src->cbufs); i++) {
- if (dst->cbufs[i] != src->cbufs[i]) {
- return FALSE;
- }
- }
+ pipe->transfer_destroy(pipe, src_trans);
+ pipe->transfer_destroy(pipe, dst_trans);
+}
- if (dst->nr_cbufs != src->nr_cbufs) {
- return FALSE;
- }
- if (dst->zsbuf != src->zsbuf) {
- return FALSE;
- }
- return TRUE;
-}
+#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8))
/**
- * Copy framebuffer state from src to dst, updating refcounts.
+ * Fallback for pipe->clear_render_target() function.
+ * XXX this looks too hackish to be really useful.
+ * cpp > 4 looks like a gross hack at best...
+ * Plus can't use these transfer fallbacks when clearing
+ * multisampled surfaces for instance.
*/
void
-util_copy_framebuffer_state(struct pipe_framebuffer_state *dst,
- const struct pipe_framebuffer_state *src)
+util_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
{
- unsigned i;
+ struct pipe_transfer *dst_trans;
+ void *dst_map;
+ union util_color uc;
- dst->width = src->width;
- dst->height = src->height;
+ assert(dst->texture);
+ if (!dst->texture)
+ return;
- for (i = 0; i < Elements(src->cbufs); i++) {
- pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
- }
+ dst_trans = pipe_get_transfer(pipe,
+ dst->texture,
+ dst->face,
+ dst->level,
+ dst->zslice,
+ PIPE_TRANSFER_WRITE,
+ dstx, dsty, width, height);
- dst->nr_cbufs = src->nr_cbufs;
+ dst_map = pipe->transfer_map(pipe, dst_trans);
- pipe_surface_reference(&dst->zsbuf, src->zsbuf);
-}
+ assert(dst_map);
+
+ if (dst_map) {
+ assert(dst_trans->stride > 0);
+ util_pack_color(rgba, dst->texture->format, &uc);
+ util_fill_rect(dst_map, dst->texture->format,
+ dst_trans->stride,
+ 0, 0, width, height, &uc);
+ }
+
+ pipe->transfer_unmap(pipe, dst_trans);
+ pipe->transfer_destroy(pipe, dst_trans);
+}
+/**
+ * Fallback for pipe->clear_stencil() function.
+ * sw fallback doesn't look terribly useful here.
+ * Plus can't use these transfer fallbacks when clearing
+ * multisampled surfaces for instance.
+ */
void
-util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb)
+util_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
{
- unsigned i;
-
- for (i = 0; i < fb->nr_cbufs; i++) {
- pipe_surface_reference(&fb->cbufs[i], NULL);
+ struct pipe_transfer *dst_trans;
+ ubyte *dst_map;
+ boolean need_rmw = FALSE;
+
+ if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) &&
+ ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
+ util_format_is_depth_and_stencil(dst->format))
+ need_rmw = TRUE;
+
+ assert(dst->texture);
+ if (!dst->texture)
+ return;
+ dst_trans = pipe_get_transfer(pipe,
+ dst->texture,
+ dst->face,
+ dst->level,
+ dst->zslice,
+ (need_rmw ? PIPE_TRANSFER_READ_WRITE :
+ PIPE_TRANSFER_WRITE),
+ dstx, dsty, width, height);
+
+ dst_map = pipe->transfer_map(pipe, dst_trans);
+
+ assert(dst_map);
+
+ if (dst_map) {
+ unsigned dst_stride = dst_trans->stride;
+ unsigned zstencil = util_pack_z_stencil(dst->texture->format, depth, stencil);
+ unsigned i, j;
+ assert(dst_trans->stride > 0);
+
+ switch (util_format_get_blocksize(dst->format)) {
+ case 1:
+ assert(dst->format == PIPE_FORMAT_S8_USCALED);
+ if(dst_stride == width)
+ memset(dst_map, (ubyte) zstencil, height * width);
+ else {
+ for (i = 0; i < height; i++) {
+ memset(dst_map, (ubyte) zstencil, width);
+ dst_map += dst_stride;
+ }
+ }
+ break;
+ case 2:
+ assert(dst->format == PIPE_FORMAT_Z16_UNORM);
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst_map;
+ for (j = 0; j < width; j++)
+ *row++ = (uint16_t) zstencil;
+ dst_map += dst_stride;
+ }
+ break;
+ case 4:
+ if (!need_rmw) {
+ for (i = 0; i < height; i++) {
+ uint32_t *row = (uint32_t *)dst_map;
+ for (j = 0; j < width; j++)
+ *row++ = zstencil;
+ dst_map += dst_stride;
+ }
+ }
+ else {
+ uint32_t dst_mask;
+ if (dst->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
+ dst_mask = 0xffffff00;
+ else {
+ assert(dst->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
+ dst_mask = 0xffffff;
+ }
+ if (clear_flags & PIPE_CLEAR_DEPTH)
+ dst_mask = ~dst_mask;
+ for (i = 0; i < height; i++) {
+ uint32_t *row = (uint32_t *)dst_map;
+ for (j = 0; j < width; j++) {
+ uint32_t tmp = *row & dst_mask;
+ *row++ = tmp | (zstencil & ~dst_mask);
+ }
+ dst_map += dst_stride;
+ }
+ }
+ break;
+ case 8:
+ default:
+ assert(0);
+ break;
+ }
}
- pipe_surface_reference(&fb->zsbuf, NULL);
-
- fb->width = fb->height = 0;
- fb->nr_cbufs = 0;
+ pipe->transfer_unmap(pipe, dst_trans);
+ pipe->transfer_destroy(pipe, dst_trans);
}
diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h
index 119fcd4ce8e..6cd12af3a8b 100644
--- a/src/gallium/auxiliary/util/u_surface.h
+++ b/src/gallium/auxiliary/util/u_surface.h
@@ -33,23 +33,6 @@
#include "pipe/p_state.h"
-/**
- * Are s1 and s2 the same surface?
- * Surfaces are basically views into textures so check if the two surfaces
- * name the same part of the same texture.
- */
-static INLINE boolean
-util_same_surface(const struct pipe_surface *s1, const struct pipe_surface *s2)
-{
- return (s1->texture == s2->texture &&
- s1->face == s2->face &&
- s1->level == s2->level &&
- s1->zslice == s2->zslice);
-}
-
-
-
-
extern boolean
util_create_rgba_surface(struct pipe_screen *screen,
uint width, uint height, uint bind,
@@ -62,17 +45,32 @@ util_destroy_rgba_surface(struct pipe_resource *texture,
struct pipe_surface *surface);
-extern boolean
-util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst,
- const struct pipe_framebuffer_state *src);
extern void
-util_copy_framebuffer_state(struct pipe_framebuffer_state *dst,
- const struct pipe_framebuffer_state *src);
+util_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dst_x, unsigned dst_y, unsigned dst_z,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned src_x, unsigned src_y, unsigned src_z,
+ unsigned w, unsigned h);
+extern void
+util_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height);
extern void
-util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb);
+util_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height);
#endif /* U_SURFACE_H */
diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c
index 668da8c5c27..404e1219952 100644
--- a/src/gallium/auxiliary/util/u_surfaces.c
+++ b/src/gallium/auxiliary/util/u_surfaces.c
@@ -1,42 +1,50 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#include "u_surfaces.h"
#include "util/u_hash_table.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
-/* TODO: ouch, util_hash_table should do these by default when passed a null function pointer
- * this indirect function call is quite bad
- */
-static unsigned
-hash(void *key)
-{
- return (unsigned)(uintptr_t)key;
-}
-
-static int
-compare(void *key1, void *key2)
-{
- return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2;
-}
-
struct pipe_surface *
util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
{
struct pipe_surface *ps;
- void *key = NULL;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- { /* or 2D array */
- if(!us->u.table)
- us->u.table = util_hash_table_create(hash, compare);
- key = (void *)(((zslice + face) << 8) | level);
- /* TODO: ouch, should have a get-reference function...
- * also, shouldn't allocate a two-pointer structure for each item... */
- ps = util_hash_table_get(us->u.table, key);
+ { /* or 2D array */
+ if(!us->u.hash)
+ us->u.hash = cso_hash_create();
+
+ ps = cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level));
}
else
{
if(!us->u.array)
- us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *));
+ us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *));
ps = us->u.array[level];
}
@@ -54,7 +62,7 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, str
ps->offset = ~0;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- util_hash_table_set(us->u.table, key, ps);
+ cso_hash_insert(us->u.hash, ((zslice + face) << 8) | level, ps);
else
us->u.array[level] = ps;
@@ -66,47 +74,44 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps)
{
struct pipe_resource *pt = ps->texture;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- { /* or 2D array */
- void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level);
- util_hash_table_remove(us->u.table, key);
+ { /* or 2D array */
+ cso_hash_erase(us->u.hash, cso_hash_find(us->u.hash, ((ps->zslice + ps->face) << 8) | ps->level));
}
else
us->u.array[ps->level] = 0;
}
-static enum pipe_error
-util_surfaces_destroy_callback(void *key, void *value, void *data)
-{
- void (*destroy_surface) (struct pipe_surface * ps) = data;
- destroy_surface((struct pipe_surface *)value);
- return PIPE_OK;
-}
-
void
util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *))
{
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
- { /* or 2D array */
- if(us->u.table)
+ { /* or 2D array */
+ if(us->u.hash)
{
- util_hash_table_foreach(us->u.table, util_surfaces_destroy_callback, destroy_surface);
- util_hash_table_destroy(us->u.table);
- us->u.table = NULL;
+ struct cso_hash_iter iter;
+ iter = cso_hash_first_node(us->u.hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ destroy_surface(cso_hash_iter_data(iter));
+ iter = cso_hash_iter_next(iter);
+ }
+
+ cso_hash_delete(us->u.hash);
+ us->u.hash = NULL;
}
}
else
{
if(us->u.array)
{
- unsigned i;
- for(i = 0; i < pt->last_level; ++i)
- {
- struct pipe_surface *ps = us->u.array[i];
- if(ps)
- destroy_surface(ps);
- }
- FREE(us->u.array);
- us->u.array = NULL;
+ unsigned i;
+ for(i = 0; i <= pt->last_level; ++i)
+ {
+ struct pipe_surface *ps = us->u.array[i];
+ if(ps)
+ destroy_surface(ps);
+ }
+ FREE(us->u.array);
+ us->u.array = NULL;
}
}
}
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
index 0195bf5afba..17d8a5d3a5b 100644
--- a/src/gallium/auxiliary/util/u_surfaces.h
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -1,18 +1,44 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#ifndef U_SURFACES_H_
#define U_SURFACES_H_
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "util/u_atomic.h"
-
-struct util_hash_table;
+#include "cso_cache/cso_hash.h"
struct util_surfaces
{
union
{
- struct util_hash_table *table;
+ struct cso_hash *hash;
struct pipe_surface **array;
+ void* pv;
} u;
};
@@ -22,7 +48,7 @@ struct pipe_surface *util_surfaces_do_get(struct util_surfaces *us, unsigned sur
static INLINE struct pipe_surface *
util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
{
- if(likely(pt->target == PIPE_TEXTURE_2D && us->u.array))
+ if(likely((pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT) && us->u.array))
{
struct pipe_surface *ps = us->u.array[level];
if(ps)
@@ -35,12 +61,24 @@ util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct
return util_surfaces_do_get(us, surface_struct_size, pscreen, pt, face, level, zslice, flags);
}
+static INLINE struct pipe_surface *
+util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice)
+{
+ if(!us->u.pv)
+ return 0;
+
+ if(unlikely(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE))
+ return cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level));
+ else
+ return us->u.array[level];
+}
+
void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
static INLINE void
util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps)
{
- if(likely(ps->texture->target == PIPE_TEXTURE_2D))
+ if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT))
{
us->u.array[ps->level] = 0;
return;
diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
index 986eee07435..558351d0ce5 100644
--- a/src/gallium/auxiliary/util/u_tile.h
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -29,7 +29,10 @@
#define P_TILE_H
#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+struct pipe_context;
struct pipe_transfer;
/**
diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c
index bedace3b1dc..69f6fab9504 100644
--- a/src/gallium/auxiliary/util/u_transfer.c
+++ b/src/gallium/auxiliary/util/u_transfer.c
@@ -35,12 +35,12 @@ void u_default_transfer_inline_write( struct pipe_context *pipe,
util_copy_rect(map,
resource->format,
- transfer->stride, /* bytes? */
+ transfer->stride, /* bytes */
0, 0,
box->width,
box->height,
data,
- box->width, /* bytes? texels? */
+ stride, /* bytes */
0, 0);
out:
diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h
index eb07945d15f..e3a38730f21 100644
--- a/src/gallium/auxiliary/util/u_transfer.h
+++ b/src/gallium/auxiliary/util/u_transfer.h
@@ -8,6 +8,7 @@
#include "pipe/p_state.h"
struct pipe_context;
+struct winsys_handle;
boolean u_default_resource_get_handle(struct pipe_screen *screen,
struct pipe_resource *resource,
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index 75d44432d9e..af229e61a00 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -59,6 +59,8 @@ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
unsigned usage )
{
struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr );
+ if (!upload)
+ return NULL;
upload->pipe = pipe;
upload->default_size = default_size;
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index a124924fc80..de016df02e0 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -32,11 +32,8 @@
#ifndef U_UPLOAD_MGR_H
#define U_UPLOAD_MGR_H
-#include "pipe/p_defines.h"
-
-struct pipe_screen;
+struct pipe_context;
struct pipe_resource;
-struct u_upload_mgr;
struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,