summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorYounes Manton <[email protected]>2010-04-30 20:42:30 -0400
committerYounes Manton <[email protected]>2010-04-30 20:42:30 -0400
commita8ea1dacc63ac567498049e5756c247b9fec6cd9 (patch)
tree4031e2e2b6166bd926b43fa4bbb3aab773a30ee5 /src/gallium/auxiliary
parent404fb63b4649f58fce443615e49337d42b8ddece (diff)
parent35d960cc744c374ccaad48c3d80559b59c74e28a (diff)
Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
Conflicts: src/gallium/auxiliary/Makefile src/gallium/auxiliary/SConscript src/gallium/auxiliary/util/u_format.csv src/gallium/auxiliary/vl/vl_compositor.c src/gallium/auxiliary/vl/vl_compositor.h src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h src/gallium/drivers/identity/id_objects.c src/gallium/drivers/identity/id_objects.h src/gallium/drivers/identity/id_screen.c src/gallium/drivers/nv40/Makefile src/gallium/drivers/nv40/nv40_screen.c src/gallium/drivers/softpipe/sp_texture.c src/gallium/drivers/softpipe/sp_texture.h src/gallium/drivers/softpipe/sp_video_context.c src/gallium/drivers/softpipe/sp_video_context.h src/gallium/include/pipe/p_format.h src/gallium/include/pipe/p_screen.h src/gallium/include/pipe/p_video_context.h src/gallium/include/pipe/p_video_state.h src/gallium/include/state_tracker/dri1_api.h src/gallium/include/state_tracker/drm_api.h src/gallium/state_trackers/dri/common/dri_context.c src/gallium/state_trackers/xorg/xvmc/attributes.c src/gallium/state_trackers/xorg/xvmc/block.c src/gallium/state_trackers/xorg/xvmc/context.c src/gallium/state_trackers/xorg/xvmc/subpicture.c src/gallium/state_trackers/xorg/xvmc/surface.c src/gallium/state_trackers/xorg/xvmc/tests/.gitignore src/gallium/state_trackers/xorg/xvmc/tests/Makefile src/gallium/state_trackers/xorg/xvmc/xvmc_private.h src/gallium/winsys/drm/radeon/core/radeon_drm.c src/gallium/winsys/g3dvl/vl_winsys.h src/gallium/winsys/g3dvl/xlib/xsp_winsys.c src/gallium/winsys/sw/Makefile
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile53
-rw-r--r--src/gallium/auxiliary/SConscript59
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c21
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.h16
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c336
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h52
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c121
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h12
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c12
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c855
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h159
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm_translate.c497
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c130
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c39
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c81
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c42
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c43
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_line.c35
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c44
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h30
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c48
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_decompose.h40
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c47
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c9
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c45
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c32
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c416
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c12
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h99
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.c37
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h35
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h17
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c25
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_io.c12
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c131
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c23
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld.h (renamed from src/gallium/auxiliary/gallivm/lp_bld_alpha.h)29
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c299
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_blend.h107
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c360
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c109
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c298
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.c28
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.h10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c34
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_depth.c213
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.c132
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.h23
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format.h51
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c264
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_query.c72
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_soa.c230
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c (renamed from src/gallium/auxiliary/gallivm/lp_bld_init.cpp)57
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.h16
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_interp.c408
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_interp.h96
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_intr.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c167
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c21
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_printf.c121
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_printf.h (renamed from src/gallium/auxiliary/gallivm/lp_bld_alpha.c)40
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c100
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h48
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c1385
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_struct.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c464
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.c13
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h54
-rw-r--r--src/gallium/auxiliary/os/os_thread.h50
-rw-r--r--src/gallium/auxiliary/os/os_time.h2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h44
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c34
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h1
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h1
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c9
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c10
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c4
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c4
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c4
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_validate.c6
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c2
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h6
-rw-r--r--src/gallium/auxiliary/target-helpers/wrap_screen.c (renamed from src/gallium/auxiliary/util/u_timed_winsys.h)37
-rw-r--r--src/gallium/auxiliary/target-helpers/wrap_screen.h16
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt36
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c87
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c19
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c16
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c4
-rw-r--r--src/gallium/auxiliary/translate/translate.h3
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c27
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c14
-rw-r--r--src/gallium/auxiliary/util/.gitignore4
-rw-r--r--src/gallium/auxiliary/util/u_atomic.h2
-rw-r--r--src/gallium/auxiliary/util/u_blit.c102
-rw-r--r--src/gallium/auxiliary/util/u_blit.h6
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c105
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h48
-rw-r--r--src/gallium/auxiliary/util/u_box.h73
-rw-r--r--src/gallium/auxiliary/util/u_cache.c2
-rw-r--r--src/gallium/auxiliary/util/u_debug.c116
-rw-r--r--src/gallium/auxiliary/util/u_debug.h60
-rw-r--r--src/gallium/auxiliary/util/u_dirty_surfaces.h88
-rw-r--r--src/gallium/auxiliary/util/u_dl.c13
-rw-r--r--src/gallium/auxiliary/util/u_dl.h7
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.c118
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.h4
-rw-r--r--src/gallium/auxiliary/util/u_dump.h2
-rw-r--r--src/gallium/auxiliary/util/u_dump_state.c20
-rw-r--r--src/gallium/auxiliary/util/u_dynarray.h111
-rw-r--r--src/gallium/auxiliary/util/u_format.c286
-rw-r--r--src/gallium/auxiliary/util/u_format.csv109
-rw-r--r--src/gallium/auxiliary/util/u_format.h235
-rw-r--r--src/gallium/auxiliary/util/u_format_access.py341
-rw-r--r--src/gallium/auxiliary/util/u_format_other.c267
-rw-r--r--src/gallium/auxiliary/util/u_format_other.h108
-rw-r--r--src/gallium/auxiliary/util/u_format_pack.py647
-rwxr-xr-xsrc/gallium/auxiliary/util/u_format_parse.py24
-rw-r--r--src/gallium/auxiliary/util/u_format_s3tc.c728
-rw-r--r--src/gallium/auxiliary/util/u_format_s3tc.h218
-rw-r--r--src/gallium/auxiliary/util/u_format_srgb.h106
-rw-r--r--src/gallium/auxiliary/util/u_format_srgb.py101
-rwxr-xr-xsrc/gallium/auxiliary/util/u_format_table.py55
-rw-r--r--src/gallium/auxiliary/util/u_format_tests.c970
-rw-r--r--src/gallium/auxiliary/util/u_format_tests.h71
-rw-r--r--src/gallium/auxiliary/util/u_format_yuv.c1182
-rw-r--r--src/gallium/auxiliary/util/u_format_yuv.h358
-rw-r--r--src/gallium/auxiliary/util/u_format_zs.c920
-rw-r--r--src/gallium/auxiliary/util/u_format_zs.h196
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c122
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.h4
-rw-r--r--src/gallium/auxiliary/util/u_half.h90
-rw-r--r--src/gallium/auxiliary/util/u_half.py179
-rw-r--r--src/gallium/auxiliary/util/u_init.h52
-rw-r--r--src/gallium/auxiliary/util/u_inlines.h329
-rw-r--r--src/gallium/auxiliary/util/u_memory.h2
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h32
-rw-r--r--src/gallium/auxiliary/util/u_rect.c116
-rw-r--r--src/gallium/auxiliary/util/u_resource.c96
-rw-r--r--src/gallium/auxiliary/util/u_sampler.c100
-rw-r--r--src/gallium/auxiliary/util/u_sampler.h (renamed from src/gallium/auxiliary/gallivm/lp_bld_depth.h)48
-rw-r--r--src/gallium/auxiliary/util/u_simple_screen.c153
-rw-r--r--src/gallium/auxiliary/util/u_simple_screen.h28
-rw-r--r--src/gallium/auxiliary/util/u_surface.c23
-rw-r--r--src/gallium/auxiliary/util/u_surface.h6
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.c112
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.h54
-rw-r--r--src/gallium/auxiliary/util/u_tile.c1180
-rw-r--r--src/gallium/auxiliary/util/u_tile.h50
-rw-r--r--src/gallium/auxiliary/util/u_timed_winsys.c312
-rw-r--r--src/gallium/auxiliary/util/u_transfer.c110
-rw-r--r--src/gallium/auxiliary/util/u_transfer.h145
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.c75
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.h10
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.c55
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.h5
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c210
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h18
179 files changed, 14645 insertions, 6311 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index ac9d50af7a9..16bb50fe168 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -43,7 +43,6 @@ C_SOURCES = \
draw/draw_vs_aos_io.c \
draw/draw_vs_aos_machine.c \
draw/draw_vs_exec.c \
- draw/draw_vs_llvm.c \
draw/draw_vs_ppc.c \
draw/draw_vs_sse.c \
indices/u_indices_gen.c \
@@ -105,9 +104,16 @@ C_SOURCES = \
util/u_cpu_detect.c \
util/u_dl.c \
util/u_draw_quad.c \
- util/u_format_access.c \
+ util/u_format.c \
+ util/u_format_other.c \
+ util/u_format_s3tc.c \
+ util/u_format_srgb.c \
util/u_format_table.c \
+ util/u_format_tests.c \
+ util/u_format_yuv.c \
+ util/u_format_zs.c \
util/u_gen_mipmap.c \
+ util/u_half.c \
util/u_handle_table.c \
util/u_hash_table.c \
util/u_hash.c \
@@ -118,53 +124,53 @@ C_SOURCES = \
util/u_mm.c \
util/u_rect.c \
util/u_ringbuffer.c \
+ util/u_sampler.c \
util/u_simple_shaders.c \
util/u_snprintf.c \
util/u_surface.c \
+ util/u_surfaces.c \
util/u_texture.c \
util/u_tile.c \
- util/u_timed_winsys.c \
+ util/u_transfer.c \
+ util/u_resource.c \
util/u_upload_mgr.c \
- util/u_simple_screen.c \
+ target-helpers/wrap_screen.c \
vl/vl_bitstream_parser.c \
vl/vl_mpeg12_mc_renderer.c \
vl/vl_compositor.c \
vl/vl_csc.c
GALLIVM_SOURCES = \
- gallivm/lp_bld_alpha.c \
gallivm/lp_bld_arit.c \
- gallivm/lp_bld_blend_aos.c \
- gallivm/lp_bld_blend_logicop.c \
- gallivm/lp_bld_blend_soa.c \
gallivm/lp_bld_const.c \
gallivm/lp_bld_conv.c \
gallivm/lp_bld_debug.c \
- gallivm/lp_bld_depth.c \
gallivm/lp_bld_flow.c \
gallivm/lp_bld_format_aos.c \
- gallivm/lp_bld_format_query.c \
gallivm/lp_bld_format_soa.c \
- gallivm/lp_bld_interp.c \
+ gallivm/lp_bld_init.c \
gallivm/lp_bld_intr.c \
gallivm/lp_bld_logic.c \
gallivm/lp_bld_pack.c \
+ gallivm/lp_bld_printf.c \
gallivm/lp_bld_sample.c \
gallivm/lp_bld_sample_soa.c \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
gallivm/lp_bld_tgsi_soa.c \
- gallivm/lp_bld_type.c
+ gallivm/lp_bld_type.c \
+ draw/draw_llvm.c \
+ draw/draw_pt_fetch_shade_pipeline_llvm.c \
+ draw/draw_llvm_translate.c
-GALLIVM_CPP_SOURCES = \
- gallivm/lp_bld_init.cpp
+GALLIVM_CPP_SOURCES =
GENERATED_SOURCES = \
indices/u_indices_gen.c \
indices/u_unfilled_gen.c \
- util/u_format_access.c \
- util/u_format_pack.h \
- util/u_format_table.c
+ util/u_format_srgb.c \
+ util/u_format_table.c \
+ util/u_half.c
ifeq ($(MESA_LLVM),1)
@@ -187,12 +193,11 @@ indices/u_indices_gen.c: indices/u_indices_gen.py
indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
python $< > $@
-util/u_format_table.c: util/u_format_table.py util/u_format_parse.py util/u_format.csv
- python util/u_format_table.py util/u_format.csv > $@
-
-util/u_format_pack.h: util/u_format_pack.py util/u_format_parse.py util/u_format.csv
- python util/u_format_pack.py util/u_format.csv > $@
+util/u_format_srgb.c: util/u_format_srgb.py
+ python $< > $@
-util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv
- python util/u_format_access.py util/u_format.csv > $@
+util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_format_parse.py util/u_format.csv
+ python util/u_format_table.py util/u_format.csv > $@
+util/u_half.c: util/u_half.py
+ python util/u_half.py > $@
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index f0f3e783a3b..6cea83060a9 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -7,6 +7,8 @@ env.Append(CPPPATH = [
'util',
])
+env.Tool('udis86')
+
env.CodeGenerate(
target = 'indices/u_indices_gen.c',
script = 'indices/u_indices_gen.py',
@@ -22,26 +24,31 @@ env.CodeGenerate(
)
env.CodeGenerate(
- target = 'util/u_format_table.c',
- script = 'util/u_format_table.py',
- source = ['util/u_format.csv'],
- command = 'python $SCRIPT $SOURCE > $TARGET'
+ target = 'util/u_format_srgb.c',
+ script = 'util/u_format_srgb.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
)
env.CodeGenerate(
- target = File('util/u_format_pack.h').srcnode(),
- script = 'util/u_format_pack.py',
+ target = 'util/u_format_table.c',
+ script = 'util/u_format_table.py',
source = ['util/u_format.csv'],
command = 'python $SCRIPT $SOURCE > $TARGET'
)
env.CodeGenerate(
- target = 'util/u_format_access.c',
- script = 'util/u_format_access.py',
- source = ['util/u_format.csv'],
- command = 'python $SCRIPT $SOURCE > $TARGET'
+ target = 'util/u_half.c',
+ script = 'util/u_half.py',
+ source = [],
+ command = 'python $SCRIPT > $TARGET'
)
+env.Depends('util/u_format_table.c', [
+ 'util/u_format_parse.py',
+ 'util/u_format_pack.py',
+])
+
source = [
'cso_cache/cso_context.c',
'cso_cache/cso_cache.c',
@@ -80,7 +87,6 @@ source = [
'draw/draw_vs_aos_io.c',
'draw/draw_vs_aos_machine.c',
'draw/draw_vs_exec.c',
- 'draw/draw_vs_llvm.c',
'draw/draw_vs_ppc.c',
'draw/draw_vs_sse.c',
'draw/draw_vs_varient.c',
@@ -147,9 +153,16 @@ source = [
'util/u_dump_state.c',
'util/u_dl.c',
'util/u_draw_quad.c',
- 'util/u_format_access.c',
+ 'util/u_format.c',
+ 'util/u_format_other.c',
+ 'util/u_format_s3tc.c',
+ 'util/u_format_srgb.c',
'util/u_format_table.c',
+ 'util/u_format_tests.c',
+ 'util/u_format_yuv.c',
+ 'util/u_format_zs.c',
'util/u_gen_mipmap.c',
+ 'util/u_half.c',
'util/u_handle_table.c',
'util/u_hash.c',
'util/u_hash_table.c',
@@ -158,47 +171,47 @@ source = [
'util/u_math.c',
'util/u_mm.c',
'util/u_rect.c',
+ 'util/u_resource.c',
'util/u_ringbuffer.c',
+ 'util/u_sampler.c',
'util/u_simple_shaders.c',
'util/u_snprintf.c',
'util/u_surface.c',
+ 'util/u_surfaces.c',
'util/u_texture.c',
'util/u_tile.c',
- 'util/u_timed_winsys.c',
+ 'util/u_transfer.c',
'util/u_upload_mgr.c',
- 'util/u_simple_screen.c',
'vl/vl_bitstream_parser.c',
'vl/vl_mpeg12_mc_renderer.c',
'vl/vl_compositor.c',
'vl/vl_csc.c',
+ 'target-helpers/wrap_screen.c',
]
-if drawllvm:
+if env['llvm']:
source += [
- 'gallivm/lp_bld_alpha.c',
'gallivm/lp_bld_arit.c',
- 'gallivm/lp_bld_blend_aos.c',
- 'gallivm/lp_bld_blend_logicop.c',
- 'gallivm/lp_bld_blend_soa.c',
'gallivm/lp_bld_const.c',
'gallivm/lp_bld_conv.c',
'gallivm/lp_bld_debug.c',
- 'gallivm/lp_bld_depth.c',
'gallivm/lp_bld_flow.c',
'gallivm/lp_bld_format_aos.c',
- 'gallivm/lp_bld_format_query.c',
'gallivm/lp_bld_format_soa.c',
- 'gallivm/lp_bld_interp.c',
'gallivm/lp_bld_intr.c',
'gallivm/lp_bld_logic.c',
- 'gallivm/lp_bld_init.cpp',
+ 'gallivm/lp_bld_init.c',
'gallivm/lp_bld_pack.c',
+ 'gallivm/lp_bld_printf.c',
'gallivm/lp_bld_sample.c',
'gallivm/lp_bld_sample_soa.c',
'gallivm/lp_bld_struct.c',
'gallivm/lp_bld_swizzle.c',
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
+ 'draw/draw_llvm.c',
+ 'draw/draw_pt_fetch_shade_pipeline_llvm.c',
+ 'draw/draw_llvm_translate.c'
]
gallium = env.ConvenienceLibrary(
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index a6a07e72c2f..900c64df4b9 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -43,6 +43,7 @@ struct cso_cache {
struct cso_hash *vs_hash;
struct cso_hash *rasterizer_hash;
struct cso_hash *sampler_hash;
+ struct cso_hash *velements_hash;
int max_size;
cso_sanitize_callback sanitize_cb;
@@ -108,6 +109,9 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_
case CSO_VERTEX_SHADER:
hash = sc->vs_hash;
break;
+ case CSO_VELEMENTS:
+ hash = sc->velements_hash;
+ break;
}
return hash;
@@ -161,6 +165,13 @@ static void delete_vs_state(void *state, void *data)
FREE(state);
}
+static void delete_velements(void *state, void *data)
+{
+ struct cso_velements *cso = (struct cso_velements *)state;
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+}
static INLINE void delete_cso(void *state, enum cso_cache_type type)
{
@@ -183,6 +194,9 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type)
case CSO_VERTEX_SHADER:
delete_vs_state(state, 0);
break;
+ case CSO_VELEMENTS:
+ delete_velements(state, 0);
+ break;
default:
assert(0);
FREE(state);
@@ -294,6 +308,7 @@ struct cso_cache *cso_cache_create(void)
sc->rasterizer_hash = cso_hash_create();
sc->fs_hash = cso_hash_create();
sc->vs_hash = cso_hash_create();
+ sc->velements_hash = cso_hash_create();
sc->sanitize_cb = sanitize_cb;
sc->sanitize_data = 0;
@@ -325,6 +340,9 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
case CSO_VERTEX_SHADER:
hash = sc->vs_hash;
break;
+ case CSO_VELEMENTS:
+ hash = sc->velements_hash;
+ break;
}
iter = cso_hash_first_node(hash);
@@ -351,6 +369,7 @@ void cso_cache_delete(struct cso_cache *sc)
cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0);
cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0);
cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0);
+ cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0);
cso_hash_delete(sc->blend_hash);
cso_hash_delete(sc->sampler_hash);
@@ -358,6 +377,7 @@ void cso_cache_delete(struct cso_cache *sc)
cso_hash_delete(sc->rasterizer_hash);
cso_hash_delete(sc->fs_hash);
cso_hash_delete(sc->vs_hash);
+ cso_hash_delete(sc->velements_hash);
FREE(sc);
}
@@ -372,6 +392,7 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number)
sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size);
sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size);
sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size);
+ sanitize_hash(sc, sc->velements_hash, CSO_VELEMENTS, sc->max_size);
}
int cso_maximum_cache_size(const struct cso_cache *sc)
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h
index eea60b940bb..fb09b83c623 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.h
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.h
@@ -53,6 +53,7 @@
* - rasterizer (old setup)
* - sampler
* - vertex shader
+ * - vertex elements
*
* Things that are not constant state objects include:
* - blend_color
@@ -90,7 +91,8 @@ enum cso_cache_type {
CSO_DEPTH_STENCIL_ALPHA,
CSO_RASTERIZER,
CSO_FRAGMENT_SHADER,
- CSO_VERTEX_SHADER
+ CSO_VERTEX_SHADER,
+ CSO_VELEMENTS
};
typedef void (*cso_state_callback)(void *ctx, void *obj);
@@ -144,6 +146,18 @@ struct cso_sampler {
struct pipe_context *context;
};
+struct cso_velems_state {
+ unsigned count;
+ struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS];
+};
+
+struct cso_velements {
+ struct cso_velems_state state;
+ void *data;
+ cso_state_callback delete_state;
+ struct pipe_context *context;
+};
+
unsigned cso_construct_key(void *item, int item_size);
struct cso_cache *cso_cache_create(void);
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index a7335c340ca..6d0b4207986 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -37,6 +37,7 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
+#include "util/u_math.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_parse.h"
@@ -69,17 +70,17 @@ struct cso_context {
unsigned nr_vertex_samplers_saved;
void *vertex_samplers_saved[PIPE_MAX_VERTEX_SAMPLERS];
- struct pipe_texture *textures[PIPE_MAX_SAMPLERS];
- uint nr_textures;
+ uint nr_fragment_sampler_views;
+ struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
- struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
- uint nr_vertex_textures;
+ uint nr_vertex_sampler_views;
+ struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS];
- uint nr_textures_saved;
- struct pipe_texture *textures_saved[PIPE_MAX_SAMPLERS];
+ uint nr_fragment_sampler_views_saved;
+ struct pipe_sampler_view *fragment_sampler_views_saved[PIPE_MAX_SAMPLERS];
- uint nr_vertex_textures_saved;
- struct pipe_texture *vertex_textures_saved[PIPE_MAX_SAMPLERS];
+ uint nr_vertex_sampler_views_saved;
+ struct pipe_sampler_view *vertex_sampler_views_saved[PIPE_MAX_VERTEX_SAMPLERS];
/** Current and saved state.
* The saved state is used as a 1-deep stack.
@@ -89,6 +90,7 @@ struct cso_context {
void *rasterizer, *rasterizer_saved;
void *fragment_shader, *fragment_shader_saved, *geometry_shader;
void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved;
+ void *velements, *velements_saved;
struct pipe_clip_state clip;
struct pipe_clip_state clip_saved;
@@ -174,6 +176,20 @@ static boolean delete_vs_state(struct cso_context *ctx, void *state)
return FALSE;
}
+static boolean delete_vertex_elements(struct cso_context *ctx,
+ void *state)
+{
+ struct cso_velements *cso = (struct cso_velements *)state;
+
+ if (ctx->velements == cso->data)
+ return FALSE;
+
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+ return TRUE;
+}
+
static INLINE boolean delete_cso(struct cso_context *ctx,
void *state, enum cso_cache_type type)
@@ -197,6 +213,9 @@ static INLINE boolean delete_cso(struct cso_context *ctx,
case CSO_VERTEX_SHADER:
return delete_vs_state(ctx, state);
break;
+ case CSO_VELEMENTS:
+ return delete_vertex_elements(ctx, state);
+ break;
default:
assert(0);
FREE(state);
@@ -271,16 +290,17 @@ void cso_release_all( struct cso_context *ctx )
ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL );
ctx->pipe->bind_fs_state( ctx->pipe, NULL );
ctx->pipe->bind_vs_state( ctx->pipe, NULL );
+ ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
}
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- pipe_texture_reference(&ctx->textures[i], NULL);
- pipe_texture_reference(&ctx->textures_saved[i], NULL);
+ pipe_sampler_view_reference(&ctx->fragment_sampler_views[i], NULL);
+ pipe_sampler_view_reference(&ctx->fragment_sampler_views_saved[i], NULL);
}
for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
- pipe_texture_reference(&ctx->vertex_textures[i], NULL);
- pipe_texture_reference(&ctx->vertex_textures_saved[i], NULL);
+ pipe_sampler_view_reference(&ctx->vertex_sampler_views[i], NULL);
+ pipe_sampler_view_reference(&ctx->vertex_sampler_views_saved[i], NULL);
}
free_framebuffer_state(&ctx->fb);
@@ -329,6 +349,7 @@ enum pipe_error cso_set_blend(struct cso_context *ctx,
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
+ memset(&cso->state, 0, sizeof cso->state);
memcpy(&cso->state, templ, key_size);
cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state;
@@ -597,114 +618,6 @@ cso_restore_vertex_samplers(struct cso_context *ctx)
}
-enum pipe_error cso_set_sampler_textures( struct cso_context *ctx,
- uint count,
- struct pipe_texture **textures )
-{
- uint i;
-
- ctx->nr_textures = count;
-
- for (i = 0; i < count; i++)
- pipe_texture_reference(&ctx->textures[i], textures[i]);
- for ( ; i < PIPE_MAX_SAMPLERS; i++)
- pipe_texture_reference(&ctx->textures[i], NULL);
-
- ctx->pipe->set_fragment_sampler_textures(ctx->pipe, count, textures);
-
- return PIPE_OK;
-}
-
-void cso_save_sampler_textures( struct cso_context *ctx )
-{
- uint i;
-
- ctx->nr_textures_saved = ctx->nr_textures;
- for (i = 0; i < ctx->nr_textures; i++) {
- assert(!ctx->textures_saved[i]);
- pipe_texture_reference(&ctx->textures_saved[i], ctx->textures[i]);
- }
-}
-
-void cso_restore_sampler_textures( struct cso_context *ctx )
-{
- uint i;
-
- ctx->nr_textures = ctx->nr_textures_saved;
-
- for (i = 0; i < ctx->nr_textures; i++) {
- pipe_texture_reference(&ctx->textures[i], NULL);
- ctx->textures[i] = ctx->textures_saved[i];
- ctx->textures_saved[i] = NULL;
- }
- for ( ; i < PIPE_MAX_SAMPLERS; i++)
- pipe_texture_reference(&ctx->textures[i], NULL);
-
- ctx->pipe->set_fragment_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures);
-
- ctx->nr_textures_saved = 0;
-}
-
-
-
-enum pipe_error
-cso_set_vertex_sampler_textures(struct cso_context *ctx,
- uint count,
- struct pipe_texture **textures)
-{
- uint i;
-
- ctx->nr_vertex_textures = count;
-
- for (i = 0; i < count; i++) {
- pipe_texture_reference(&ctx->vertex_textures[i], textures[i]);
- }
- for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
- pipe_texture_reference(&ctx->vertex_textures[i], NULL);
- }
-
- ctx->pipe->set_vertex_sampler_textures(ctx->pipe, count, textures);
-
- return PIPE_OK;
-}
-
-void
-cso_save_vertex_sampler_textures(struct cso_context *ctx)
-{
- uint i;
-
- ctx->nr_vertex_textures_saved = ctx->nr_vertex_textures;
- for (i = 0; i < ctx->nr_vertex_textures; i++) {
- assert(!ctx->vertex_textures_saved[i]);
- pipe_texture_reference(&ctx->vertex_textures_saved[i], ctx->vertex_textures[i]);
- }
-}
-
-void
-cso_restore_vertex_sampler_textures(struct cso_context *ctx)
-{
- uint i;
-
- ctx->nr_vertex_textures = ctx->nr_vertex_textures_saved;
-
- for (i = 0; i < ctx->nr_vertex_textures; i++) {
- pipe_texture_reference(&ctx->vertex_textures[i], NULL);
- ctx->vertex_textures[i] = ctx->vertex_textures_saved[i];
- ctx->vertex_textures_saved[i] = NULL;
- }
- for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
- pipe_texture_reference(&ctx->vertex_textures[i], NULL);
- }
-
- ctx->pipe->set_vertex_sampler_textures(ctx->pipe,
- ctx->nr_vertex_textures,
- ctx->vertex_textures);
-
- ctx->nr_vertex_textures_saved = 0;
-}
-
-
-
enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx,
const struct pipe_depth_stencil_alpha_state *templ)
{
@@ -1130,7 +1043,6 @@ void cso_restore_geometry_shader(struct cso_context *ctx)
ctx->geometry_shader_saved = NULL;
}
-
/* clip state */
static INLINE void
@@ -1180,3 +1092,185 @@ cso_restore_clip(struct cso_context *ctx)
ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved);
}
}
+
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+ unsigned count,
+ const struct pipe_vertex_element *states)
+{
+ unsigned key_size, hash_key;
+ struct cso_hash_iter iter;
+ void *handle;
+ struct cso_velems_state velems_state;
+
+ /* need to include the count into the stored state data too.
+ Otherwise first few count pipe_vertex_elements could be identical even if count
+ is different, and there's no guarantee the hash would be different in that
+ case neither */
+ key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
+ velems_state.count = count;
+ memcpy(velems_state.velems, states, sizeof(struct pipe_vertex_element) * count);
+ hash_key = cso_construct_key((void*)&velems_state, key_size);
+ iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)&velems_state, key_size);
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_velements *cso = MALLOC(sizeof(struct cso_velements));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memcpy(&cso->state, &velems_state, key_size);
+ cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state.velems[0]);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_vertex_elements_state;
+ cso->context = ctx->pipe;
+
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
+ }
+
+ if (ctx->velements != handle) {
+ ctx->velements = handle;
+ ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle);
+ }
+ return PIPE_OK;
+}
+
+void cso_save_vertex_elements(struct cso_context *ctx)
+{
+ assert(!ctx->velements_saved);
+ ctx->velements_saved = ctx->velements;
+}
+
+void cso_restore_vertex_elements(struct cso_context *ctx)
+{
+ if (ctx->velements != ctx->velements_saved) {
+ ctx->velements = ctx->velements_saved;
+ ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved);
+ }
+ ctx->velements_saved = NULL;
+}
+
+/* fragment sampler view state */
+
+void
+cso_set_fragment_sampler_views(struct cso_context *cso,
+ uint count,
+ struct pipe_sampler_view **views)
+{
+ uint i;
+
+ for (i = 0; i < count; i++) {
+ pipe_sampler_view_reference(&cso->fragment_sampler_views[i], views[i]);
+ }
+ for (; i < cso->nr_fragment_sampler_views; i++) {
+ pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL);
+ }
+
+ cso->pipe->set_fragment_sampler_views(cso->pipe,
+ MAX2(count, cso->nr_fragment_sampler_views),
+ cso->fragment_sampler_views);
+
+ cso->nr_fragment_sampler_views = count;
+}
+
+void
+cso_save_fragment_sampler_views(struct cso_context *cso)
+{
+ uint i;
+
+ cso->nr_fragment_sampler_views_saved = cso->nr_fragment_sampler_views;
+
+ for (i = 0; i < cso->nr_fragment_sampler_views; i++) {
+ assert(!cso->fragment_sampler_views_saved[i]);
+
+ pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i],
+ cso->fragment_sampler_views[i]);
+ }
+}
+
+void
+cso_restore_fragment_sampler_views(struct cso_context *cso)
+{
+ uint i;
+
+ for (i = 0; i < cso->nr_fragment_sampler_views_saved; i++) {
+ pipe_sampler_view_reference(&cso->fragment_sampler_views[i], cso->fragment_sampler_views_saved[i]);
+ pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i], NULL);
+ }
+ for (; i < cso->nr_fragment_sampler_views; i++) {
+ pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL);
+ }
+
+ cso->pipe->set_fragment_sampler_views(cso->pipe,
+ MAX2(cso->nr_fragment_sampler_views, cso->nr_fragment_sampler_views_saved),
+ cso->fragment_sampler_views);
+
+ cso->nr_fragment_sampler_views = cso->nr_fragment_sampler_views_saved;
+ cso->nr_fragment_sampler_views_saved = 0;
+}
+
+
+/* vertex sampler view state */
+
+void
+cso_set_vertex_sampler_views(struct cso_context *cso,
+ uint count,
+ struct pipe_sampler_view **views)
+{
+ uint i;
+
+ for (i = 0; i < count; i++) {
+ pipe_sampler_view_reference(&cso->vertex_sampler_views[i], views[i]);
+ }
+ for (; i < cso->nr_vertex_sampler_views; i++) {
+ pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL);
+ }
+
+ cso->pipe->set_vertex_sampler_views(cso->pipe,
+ MAX2(count, cso->nr_vertex_sampler_views),
+ cso->vertex_sampler_views);
+
+ cso->nr_vertex_sampler_views = count;
+}
+
+void
+cso_save_vertex_sampler_views(struct cso_context *cso)
+{
+ uint i;
+
+ cso->nr_vertex_sampler_views_saved = cso->nr_vertex_sampler_views;
+
+ for (i = 0; i < cso->nr_vertex_sampler_views; i++) {
+ assert(!cso->vertex_sampler_views_saved[i]);
+
+ pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i],
+ cso->vertex_sampler_views[i]);
+ }
+}
+
+void
+cso_restore_vertex_sampler_views(struct cso_context *cso)
+{
+ uint i;
+
+ for (i = 0; i < cso->nr_vertex_sampler_views_saved; i++) {
+ pipe_sampler_view_reference(&cso->vertex_sampler_views[i], cso->vertex_sampler_views_saved[i]);
+ pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i], NULL);
+ }
+ for (; i < cso->nr_vertex_sampler_views; i++) {
+ pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL);
+ }
+
+ cso->pipe->set_vertex_sampler_views(cso->pipe,
+ MAX2(cso->nr_vertex_sampler_views, cso->nr_vertex_sampler_views_saved),
+ cso->vertex_sampler_views);
+
+ cso->nr_vertex_sampler_views = cso->nr_vertex_sampler_views_saved;
+ cso->nr_vertex_sampler_views_saved = 0;
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index 251a9a644f8..d6bcb1fe8f7 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -103,24 +103,11 @@ void
cso_single_vertex_sampler_done(struct cso_context *cso);
-
-enum pipe_error cso_set_sampler_textures( struct cso_context *cso,
- uint count,
- struct pipe_texture **textures );
-void cso_save_sampler_textures( struct cso_context *cso );
-void cso_restore_sampler_textures( struct cso_context *cso );
-
-
-
-enum pipe_error
-cso_set_vertex_sampler_textures(struct cso_context *cso,
- uint count,
- struct pipe_texture **textures);
-void
-cso_save_vertex_sampler_textures(struct cso_context *cso);
-void
-cso_restore_vertex_sampler_textures(struct cso_context *cso);
-
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+ unsigned count,
+ const struct pipe_vertex_element *states);
+void cso_save_vertex_elements(struct cso_context *ctx);
+void cso_restore_vertex_elements(struct cso_context *ctx);
/* These aren't really sensible -- most of the time the api provides
@@ -157,7 +144,6 @@ void cso_save_geometry_shader(struct cso_context *cso);
void cso_restore_geometry_shader(struct cso_context *cso);
-
enum pipe_error cso_set_framebuffer(struct cso_context *cso,
const struct pipe_framebuffer_state *fb);
void cso_save_framebuffer(struct cso_context *cso);
@@ -193,6 +179,34 @@ void
cso_restore_clip(struct cso_context *cso);
+/* fragment sampler view state */
+
+void
+cso_set_fragment_sampler_views(struct cso_context *cso,
+ uint count,
+ struct pipe_sampler_view **views);
+
+void
+cso_save_fragment_sampler_views(struct cso_context *cso);
+
+void
+cso_restore_fragment_sampler_views(struct cso_context *cso);
+
+
+/* vertex sampler view state */
+
+void
+cso_set_vertex_sampler_views(struct cso_context *cso,
+ uint count,
+ struct pipe_sampler_view **views);
+
+void
+cso_save_vertex_sampler_views(struct cso_context *cso);
+
+void
+cso_restore_vertex_sampler_views(struct cso_context *cso);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index bb0988543f5..02abddf1491 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -31,19 +31,49 @@
*/
+#include "pipe/p_context.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "draw_gs.h"
+#if HAVE_LLVM
+#include "gallivm/lp_bld_init.h"
+#endif
-struct draw_context *draw_create( void )
+struct draw_context *draw_create( struct pipe_context *pipe )
{
struct draw_context *draw = CALLOC_STRUCT( draw_context );
if (draw == NULL)
goto fail;
+#if HAVE_LLVM
+ lp_build_init();
+ assert(lp_build_engine);
+ draw->engine = lp_build_engine;
+#endif
+
+ if (!draw_init(draw))
+ goto fail;
+
+ draw->pipe = pipe;
+
+ return draw;
+
+fail:
+ draw_destroy( draw );
+ return NULL;
+}
+
+boolean draw_init(struct draw_context *draw)
+{
+ /*
+ * Note that several functions compute the clipmask of the predefined
+ * formats with hardcoded formulas instead of using these. So modifications
+ * here must be reflected there too.
+ */
+
ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 );
ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 );
ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 );
@@ -57,31 +87,40 @@ struct draw_context *draw_create( void )
if (!draw_pipeline_init( draw ))
- goto fail;
+ return FALSE;
if (!draw_pt_init( draw ))
- goto fail;
+ return FALSE;
if (!draw_vs_init( draw ))
- goto fail;
+ return FALSE;
if (!draw_gs_init( draw ))
- goto fail;
+ return FALSE;
- return draw;
-
-fail:
- draw_destroy( draw );
- return NULL;
+ return TRUE;
}
void draw_destroy( struct draw_context *draw )
{
+ struct pipe_context *pipe;
+ int i, j;
+
if (!draw)
return;
+ pipe = draw->pipe;
+ /* free any rasterizer CSOs that we may have created.
+ */
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ if (draw->rasterizer_no_cull[i][j]) {
+ pipe->delete_rasterizer_state(pipe, draw->rasterizer_no_cull[i][j]);
+ }
+ }
+ }
/* Not so fast -- we're just borrowing this at the moment.
*
@@ -123,12 +162,17 @@ void draw_set_mrd(struct draw_context *draw, double mrd)
* This causes the drawing pipeline to be rebuilt.
*/
void draw_set_rasterizer_state( struct draw_context *draw,
- const struct pipe_rasterizer_state *raster )
+ const struct pipe_rasterizer_state *raster,
+ void *rast_handle )
{
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ if (!draw->suspend_flushing) {
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->rasterizer = raster;
- draw->bypass_clipping = draw->driver.bypass_clipping;
+ draw->rasterizer = raster;
+ draw->rast_handle = rast_handle;
+
+ draw->bypass_clipping = draw->driver.bypass_clipping;
+ }
}
@@ -261,6 +305,17 @@ draw_wide_point_threshold(struct draw_context *draw, float threshold)
/**
+ * Should the draw module handle point->quad conversion for drawing sprites?
+ */
+void
+draw_wide_point_sprites(struct draw_context *draw, boolean draw_sprite)
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->pipeline.wide_point_sprites = draw_sprite;
+}
+
+
+/**
* Tells the draw module to draw lines with triangles if their width
* is greater than this threshold.
*/
@@ -413,12 +468,14 @@ void draw_set_render( struct draw_context *draw,
void
draw_set_mapped_element_buffer_range( struct draw_context *draw,
unsigned eltSize,
+ int eltBias,
unsigned min_index,
unsigned max_index,
const void *elements )
{
draw->pt.user.elts = elements;
draw->pt.user.eltSize = eltSize;
+ draw->pt.user.eltBias = eltBias;
draw->pt.user.min_index = min_index;
draw->pt.user.max_index = max_index;
}
@@ -427,10 +484,12 @@ draw_set_mapped_element_buffer_range( struct draw_context *draw,
void
draw_set_mapped_element_buffer( struct draw_context *draw,
unsigned eltSize,
+ int eltBias,
const void *elements )
{
draw->pt.user.elts = elements;
draw->pt.user.eltSize = eltSize;
+ draw->pt.user.eltBias = eltBias;
draw->pt.user.min_index = 0;
draw->pt.user.max_index = 0xffffffff;
}
@@ -481,3 +540,37 @@ draw_current_shader_position_output(const struct draw_context *draw)
return draw->gs.position_output;
return draw->vs.position_output;
}
+
+
+/**
+ * Return a pointer/handle for a driver/CSO rasterizer object which
+ * disabled culling, stippling, unfilled tris, etc.
+ * This is used by some pipeline stages (such as wide_point, aa_line
+ * and aa_point) which convert points/lines into triangles. In those
+ * cases we don't want to accidentally cull the triangles.
+ *
+ * \param scissor should the rasterizer state enable scissoring?
+ * \param flatshade should the rasterizer state use flat shading?
+ * \return rasterizer CSO handle
+ */
+void *
+draw_get_rasterizer_no_cull( struct draw_context *draw,
+ boolean scissor,
+ boolean flatshade )
+{
+ if (!draw->rasterizer_no_cull[scissor][flatshade]) {
+ /* create now */
+ struct pipe_context *pipe = draw->pipe;
+ struct pipe_rasterizer_state rast;
+
+ memset(&rast, 0, sizeof(rast));
+ rast.scissor = scissor;
+ rast.flatshade = flatshade;
+ rast.front_winding = PIPE_WINDING_CCW;
+ rast.gl_rasterization_rules = draw->rasterizer->gl_rasterization_rules;
+
+ draw->rasterizer_no_cull[scissor][flatshade] =
+ pipe->create_rasterizer_state(pipe, &rast);
+ }
+ return draw->rasterizer_no_cull[scissor][flatshade];
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index acd81b9712d..b905c2f2da6 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -40,7 +40,6 @@
#include "pipe/p_state.h"
-
struct pipe_context;
struct draw_context;
struct draw_stage;
@@ -49,7 +48,7 @@ struct draw_geometry_shader;
struct tgsi_sampler;
-struct draw_context *draw_create( void );
+struct draw_context *draw_create( struct pipe_context *pipe );
void draw_destroy( struct draw_context *draw );
@@ -60,13 +59,16 @@ void draw_set_clip_state( struct draw_context *pipe,
const struct pipe_clip_state *clip );
void draw_set_rasterizer_state( struct draw_context *draw,
- const struct pipe_rasterizer_state *raster );
+ const struct pipe_rasterizer_state *raster,
+ void *rast_handle );
void draw_set_rasterize_stage( struct draw_context *draw,
struct draw_stage *stage );
void draw_wide_point_threshold(struct draw_context *draw, float threshold);
+void draw_wide_point_sprites(struct draw_context *draw, boolean draw_sprite);
+
void draw_wide_line_threshold(struct draw_context *draw, float threshold);
void draw_enable_line_stipple(struct draw_context *draw, boolean enable);
@@ -140,12 +142,14 @@ void draw_set_vertex_elements(struct draw_context *draw,
void
draw_set_mapped_element_buffer_range( struct draw_context *draw,
unsigned eltSize,
+ int eltBias,
unsigned min_index,
unsigned max_index,
const void *elements );
void draw_set_mapped_element_buffer( struct draw_context *draw,
unsigned eltSize,
+ int eltBias,
const void *elements );
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
@@ -197,6 +201,4 @@ boolean draw_need_pipeline(const struct draw_context *draw,
const struct pipe_rasterizer_state *rasterizer,
unsigned prim );
-
-
#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 7069aa6b181..131deed43e4 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -342,10 +342,10 @@ void draw_geometry_shader_delete(struct draw_geometry_shader *shader)
void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
struct draw_context *draw)
{
- if (shader->machine->Tokens != shader->state.tokens) {
- tgsi_exec_machine_bind_shader(shader->machine,
- shader->state.tokens,
- draw->gs.num_samplers,
- draw->gs.samplers);
- }
+ if (shader && shader->machine->Tokens != shader->state.tokens) {
+ tgsi_exec_machine_bind_shader(shader->machine,
+ shader->state.tokens,
+ draw->gs.num_samplers,
+ draw->gs.samplers);
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
new file mode 100644
index 00000000000..27383221b9b
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -0,0 +1,855 @@
+#include "draw_llvm.h"
+
+#include "draw_context.h"
+#include "draw_vs.h"
+
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_struct.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "gallivm/lp_bld_printf.h"
+
+#include "tgsi/tgsi_exec.h"
+
+#include "util/u_cpu_detect.h"
+#include "util/u_string.h"
+
+#include <llvm-c/Transforms/Scalar.h>
+
+#define DEBUG_STORE 0
+
+
+/* generates the draw jit function */
+static void
+draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
+
+static void
+init_globals(struct draw_llvm *llvm)
+{
+ LLVMTypeRef texture_type;
+
+ /* struct draw_jit_texture */
+ {
+ LLVMTypeRef elem_types[4];
+
+ elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
+
+ texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_WIDTH);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_HEIGHT);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_DATA);
+ LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
+ llvm->target, texture_type);
+
+ LLVMAddTypeName(llvm->module, "texture", texture_type);
+ }
+
+
+ /* struct draw_jit_context */
+ {
+ LLVMTypeRef elem_types[3];
+ LLVMTypeRef context_type;
+
+ elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
+ elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
+ elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+
+ context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
+ llvm->target, context_type, 0);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
+ llvm->target, context_type, 1);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
+ llvm->target, context_type,
+ DRAW_JIT_CONTEXT_TEXTURES_INDEX);
+ LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
+ llvm->target, context_type);
+
+ LLVMAddTypeName(llvm->module, "draw_jit_context", context_type);
+
+ llvm->context_ptr_type = LLVMPointerType(context_type, 0);
+ }
+ {
+ LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0);
+ llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0);
+ }
+ /* struct pipe_vertex_buffer */
+ {
+ LLVMTypeRef elem_types[4];
+ LLVMTypeRef vb_type;
+
+ elem_types[0] = LLVMInt32Type();
+ elem_types[1] = LLVMInt32Type();
+ elem_types[2] = LLVMInt32Type();
+ elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
+
+ vb_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
+ llvm->target, vb_type, 0);
+ LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
+ llvm->target, vb_type, 2);
+ LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer,
+ llvm->target, vb_type);
+
+ LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type);
+
+ llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
+ }
+}
+
+static LLVMTypeRef
+create_vertex_header(struct draw_llvm *llvm, int data_elems)
+{
+ /* struct vertex_header */
+ LLVMTypeRef elem_types[3];
+ LLVMTypeRef vertex_header;
+ char struct_name[24];
+
+ util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
+
+ elem_types[0] = LLVMIntType(32);
+ elem_types[1] = LLVMArrayType(LLVMFloatType(), 4);
+ elem_types[2] = LLVMArrayType(elem_types[1], data_elems);
+
+ vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ /* these are bit-fields and we can't take address of them
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_CLIPMASK);
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_EDGEFLAG);
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_PAD);
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_VERTEX_ID);
+ */
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_CLIP);
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_DATA);
+
+ LLVMAddTypeName(llvm->module, struct_name, vertex_header);
+
+ return LLVMPointerType(vertex_header, 0);
+}
+
+struct draw_llvm *
+draw_llvm_create(struct draw_context *draw)
+{
+ struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm );
+
+ util_cpu_detect();
+
+ llvm->draw = draw;
+ llvm->engine = draw->engine;
+
+ debug_assert(llvm->engine);
+
+ llvm->module = LLVMModuleCreateWithName("draw_llvm");
+ llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
+
+ LLVMAddModuleProvider(llvm->engine, llvm->provider);
+
+ llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
+
+ llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
+ LLVMAddTargetData(llvm->target, llvm->pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ /* TODO: Add more passes */
+ LLVMAddConstantPropagationPass(llvm->pass);
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(llvm->pass);
+ }
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+ LLVMAddGVNPass(llvm->pass);
+ LLVMAddCFGSimplificationPass(llvm->pass);
+
+ init_globals(llvm);
+
+
+#if 0
+ LLVMDumpModule(llvm->module);
+#endif
+
+ return llvm;
+}
+
+void
+draw_llvm_destroy(struct draw_llvm *llvm)
+{
+ FREE(llvm);
+}
+
+struct draw_llvm_variant *
+draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs)
+{
+ struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
+
+ draw_llvm_make_variant_key(llvm, &variant->key);
+
+ llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
+
+ draw_llvm_generate(llvm, variant);
+ draw_llvm_generate_elts(llvm, variant);
+
+ return variant;
+}
+
+static void
+generate_vs(struct draw_llvm *llvm,
+ LLVMBuilderRef builder,
+ LLVMValueRef (*outputs)[NUM_CHANNELS],
+ const LLVMValueRef (*inputs)[NUM_CHANNELS],
+ LLVMValueRef context_ptr)
+{
+ const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
+ struct lp_type vs_type;
+ LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
+
+ memset(&vs_type, 0, sizeof vs_type);
+ vs_type.floating = TRUE; /* floating point values */
+ vs_type.sign = TRUE; /* values are signed */
+ vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
+ vs_type.width = 32; /* 32-bit float */
+ vs_type.length = 4; /* 4 elements per vector */
+#if 0
+ num_vs = 4; /* number of vertices per block */
+#endif
+
+ /*tgsi_dump(tokens, 0);*/
+ lp_build_tgsi_soa(builder,
+ tokens,
+ vs_type,
+ NULL /*struct lp_build_mask_context *mask*/,
+ consts_ptr,
+ NULL /*pos*/,
+ inputs,
+ outputs,
+ NULL/*sampler*/,
+ &llvm->draw->vs.vertex_shader->info);
+}
+
+#if DEBUG_STORE
+static void print_vectorf(LLVMBuilderRef builder,
+ LLVMValueRef vec)
+{
+ LLVMValueRef val[4];
+ val[0] = LLVMBuildExtractElement(builder, vec,
+ LLVMConstInt(LLVMInt32Type(), 0, 0), "");
+ val[1] = LLVMBuildExtractElement(builder, vec,
+ LLVMConstInt(LLVMInt32Type(), 1, 0), "");
+ val[2] = LLVMBuildExtractElement(builder, vec,
+ LLVMConstInt(LLVMInt32Type(), 2, 0), "");
+ val[3] = LLVMBuildExtractElement(builder, vec,
+ LLVMConstInt(LLVMInt32Type(), 3, 0), "");
+ lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
+ val[0], val[1], val[2], val[3]);
+}
+#endif
+
+static void
+generate_fetch(LLVMBuilderRef builder,
+ LLVMValueRef vbuffers_ptr,
+ LLVMValueRef *res,
+ struct pipe_vertex_element *velem,
+ LLVMValueRef vbuf,
+ LLVMValueRef index)
+{
+ LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
+ LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
+ &indices, 1, "");
+ LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
+ LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
+ LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
+ LLVMValueRef cond;
+ LLVMValueRef stride;
+
+ cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
+
+ index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
+
+ stride = LLVMBuildMul(builder, vb_stride, index, "");
+
+ vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
+
+ stride = LLVMBuildAdd(builder, stride,
+ vb_buffer_offset,
+ "");
+ stride = LLVMBuildAdd(builder, stride,
+ LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0),
+ "");
+
+ /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
+ vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
+
+ *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format);
+}
+
+static LLVMValueRef
+aos_to_soa(LLVMBuilderRef builder,
+ LLVMValueRef val0,
+ LLVMValueRef val1,
+ LLVMValueRef val2,
+ LLVMValueRef val3,
+ LLVMValueRef channel)
+{
+ LLVMValueRef ex, res;
+
+ ex = LLVMBuildExtractElement(builder, val0,
+ channel, "");
+ res = LLVMBuildInsertElement(builder,
+ LLVMConstNull(LLVMTypeOf(val0)),
+ ex,
+ LLVMConstInt(LLVMInt32Type(), 0, 0),
+ "");
+
+ ex = LLVMBuildExtractElement(builder, val1,
+ channel, "");
+ res = LLVMBuildInsertElement(builder,
+ res, ex,
+ LLVMConstInt(LLVMInt32Type(), 1, 0),
+ "");
+
+ ex = LLVMBuildExtractElement(builder, val2,
+ channel, "");
+ res = LLVMBuildInsertElement(builder,
+ res, ex,
+ LLVMConstInt(LLVMInt32Type(), 2, 0),
+ "");
+
+ ex = LLVMBuildExtractElement(builder, val3,
+ channel, "");
+ res = LLVMBuildInsertElement(builder,
+ res, ex,
+ LLVMConstInt(LLVMInt32Type(), 3, 0),
+ "");
+
+ return res;
+}
+
+static void
+soa_to_aos(LLVMBuilderRef builder,
+ LLVMValueRef soa[NUM_CHANNELS],
+ LLVMValueRef aos[NUM_CHANNELS])
+{
+ LLVMValueRef comp;
+ int i = 0;
+
+ debug_assert(NUM_CHANNELS == 4);
+
+ aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
+ aos[1] = aos[2] = aos[3] = aos[0];
+
+ for (i = 0; i < NUM_CHANNELS; ++i) {
+ LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0);
+
+ comp = LLVMBuildExtractElement(builder, soa[i],
+ LLVMConstInt(LLVMInt32Type(), 0, 0), "");
+ aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
+
+ comp = LLVMBuildExtractElement(builder, soa[i],
+ LLVMConstInt(LLVMInt32Type(), 1, 0), "");
+ aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
+
+ comp = LLVMBuildExtractElement(builder, soa[i],
+ LLVMConstInt(LLVMInt32Type(), 2, 0), "");
+ aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
+
+ comp = LLVMBuildExtractElement(builder, soa[i],
+ LLVMConstInt(LLVMInt32Type(), 3, 0), "");
+ aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
+
+ }
+}
+
+static void
+convert_to_soa(LLVMBuilderRef builder,
+ LLVMValueRef (*aos)[NUM_CHANNELS],
+ LLVMValueRef (*soa)[NUM_CHANNELS],
+ int num_attribs)
+{
+ int i;
+
+ debug_assert(NUM_CHANNELS == 4);
+
+ for (i = 0; i < num_attribs; ++i) {
+ LLVMValueRef val0 = aos[i][0];
+ LLVMValueRef val1 = aos[i][1];
+ LLVMValueRef val2 = aos[i][2];
+ LLVMValueRef val3 = aos[i][3];
+
+ soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3,
+ LLVMConstInt(LLVMInt32Type(), 0, 0));
+ soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3,
+ LLVMConstInt(LLVMInt32Type(), 1, 0));
+ soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3,
+ LLVMConstInt(LLVMInt32Type(), 2, 0));
+ soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3,
+ LLVMConstInt(LLVMInt32Type(), 3, 0));
+ }
+}
+
+static void
+store_aos(LLVMBuilderRef builder,
+ LLVMValueRef io_ptr,
+ LLVMValueRef index,
+ LLVMValueRef value)
+{
+ LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr);
+ LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr);
+ LLVMValueRef indices[3];
+
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[1] = index;
+ indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ /* undefined vertex */
+ LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(),
+ 0xffff, 0), id_ptr);
+
+#if DEBUG_STORE
+ lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
+#endif
+#if 0
+ /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr);
+ print_vectorf(builder, value);*/
+ data_ptr = LLVMBuildBitCast(builder, data_ptr,
+ LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
+ "datavec");
+ data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
+
+ LLVMBuildStore(builder, value, data_ptr);
+#else
+ {
+ LLVMValueRef x, y, z, w;
+ LLVMValueRef idx0, idx1, idx2, idx3;
+ LLVMValueRef gep0, gep1, gep2, gep3;
+ data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
+
+ idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+ idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+
+ x = LLVMBuildExtractElement(builder, value,
+ idx0, "");
+ y = LLVMBuildExtractElement(builder, value,
+ idx1, "");
+ z = LLVMBuildExtractElement(builder, value,
+ idx2, "");
+ w = LLVMBuildExtractElement(builder, value,
+ idx3, "");
+
+ gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
+ gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
+ gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
+ gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
+
+ /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
+ x, gep0, y, gep1, z, gep2, w, gep3);*/
+ LLVMBuildStore(builder, x, gep0);
+ LLVMBuildStore(builder, y, gep1);
+ LLVMBuildStore(builder, z, gep2);
+ LLVMBuildStore(builder, w, gep3);
+ }
+#endif
+}
+
+static void
+store_aos_array(LLVMBuilderRef builder,
+ LLVMValueRef io_ptr,
+ LLVMValueRef aos[NUM_CHANNELS],
+ int attrib,
+ int num_outputs)
+{
+ LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0);
+ LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+ LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+ LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
+
+ debug_assert(NUM_CHANNELS == 4);
+
+ io0_ptr = LLVMBuildGEP(builder, io_ptr,
+ &ind0, 1, "");
+ io1_ptr = LLVMBuildGEP(builder, io_ptr,
+ &ind1, 1, "");
+ io2_ptr = LLVMBuildGEP(builder, io_ptr,
+ &ind2, 1, "");
+ io3_ptr = LLVMBuildGEP(builder, io_ptr,
+ &ind3, 1, "");
+
+#if DEBUG_STORE
+ lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n",
+ io_ptr, ind0, ind1, ind2, ind3);
+#endif
+
+ store_aos(builder, io0_ptr, attr_index, aos[0]);
+ store_aos(builder, io1_ptr, attr_index, aos[1]);
+ store_aos(builder, io2_ptr, attr_index, aos[2]);
+ store_aos(builder, io3_ptr, attr_index, aos[3]);
+}
+
+static void
+convert_to_aos(LLVMBuilderRef builder,
+ LLVMValueRef io,
+ LLVMValueRef (*outputs)[NUM_CHANNELS],
+ int num_outputs,
+ int max_vertices)
+{
+ unsigned chan, attrib;
+
+#if DEBUG_STORE
+ lp_build_printf(builder, " # storing begin\n");
+#endif
+ for (attrib = 0; attrib < num_outputs; ++attrib) {
+ LLVMValueRef soa[4];
+ LLVMValueRef aos[4];
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ if(outputs[attrib][chan]) {
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+ lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
+ /*lp_build_printf(builder, "output %d : %d ",
+ LLVMConstInt(LLVMInt32Type(), attrib, 0),
+ LLVMConstInt(LLVMInt32Type(), chan, 0));
+ print_vectorf(builder, out);*/
+ soa[chan] = out;
+ } else
+ soa[chan] = 0;
+ }
+ soa_to_aos(builder, soa, aos);
+ store_aos_array(builder,
+ io,
+ aos,
+ attrib,
+ num_outputs);
+ }
+#if DEBUG_STORE
+ lp_build_printf(builder, " # storing end\n");
+#endif
+}
+
+static void
+draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
+{
+ LLVMTypeRef arg_types[7];
+ LLVMTypeRef func_type;
+ LLVMValueRef context_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ LLVMValueRef start, end, count, stride, step, io_itr;
+ LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ struct draw_context *draw = llvm->draw;
+ unsigned i, j;
+ struct lp_build_context bld;
+ struct lp_build_loop_state lp_loop;
+ struct lp_type vs_type = lp_type_float_vec(32);
+ const int max_vertices = 4;
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+
+ arg_types[0] = llvm->context_ptr_type; /* context */
+ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
+ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
+ arg_types[3] = LLVMInt32Type(); /* start */
+ arg_types[4] = LLVMInt32Type(); /* count */
+ arg_types[5] = LLVMInt32Type(); /* stride */
+ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+ variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
+ LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
+
+ context_ptr = LLVMGetParam(variant->function, 0);
+ io_ptr = LLVMGetParam(variant->function, 1);
+ vbuffers_ptr = LLVMGetParam(variant->function, 2);
+ start = LLVMGetParam(variant->function, 3);
+ count = LLVMGetParam(variant->function, 4);
+ stride = LLVMGetParam(variant->function, 5);
+ vb_ptr = LLVMGetParam(variant->function, 6);
+
+ lp_build_name(context_ptr, "context");
+ lp_build_name(io_ptr, "io");
+ lp_build_name(vbuffers_ptr, "vbuffers");
+ lp_build_name(start, "start");
+ lp_build_name(count, "count");
+ lp_build_name(stride, "stride");
+ lp_build_name(vb_ptr, "vb");
+
+ /*
+ * Function body
+ */
+
+ block = LLVMAppendBasicBlock(variant->function, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ lp_build_context_init(&bld, builder, vs_type);
+
+ end = lp_build_add(&bld, start, count);
+
+ step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+
+#if DEBUG_STORE
+ lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
+ start, end, step);
+#endif
+ lp_build_loop_begin(builder, start, &lp_loop);
+ {
+ LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
+ LLVMValueRef io;
+ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
+
+ io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
+ io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
+#if DEBUG_STORE
+ lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
+ io_itr, io, lp_loop.counter);
+#endif
+ for (i = 0; i < NUM_CHANNELS; ++i) {
+ LLVMValueRef true_index = LLVMBuildAdd(
+ builder,
+ lp_loop.counter,
+ LLVMConstInt(LLVMInt32Type(), i, 0), "");
+ for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
+ struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
+ LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
+ velem->vertex_buffer_index,
+ 0);
+ LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
+ &vb_index, 1, "");
+ generate_fetch(builder, vbuffers_ptr,
+ &aos_attribs[j][i], velem, vb, true_index);
+ }
+ }
+ convert_to_soa(builder, aos_attribs, inputs,
+ draw->pt.nr_vertex_elements);
+
+ ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
+ generate_vs(llvm,
+ builder,
+ outputs,
+ ptr_aos,
+ context_ptr);
+
+ convert_to_aos(builder, io, outputs,
+ draw->vs.vertex_shader->info.num_outputs,
+ max_vertices);
+ }
+ lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
+
+ LLVMBuildRetVoid(builder);
+
+ LLVMDisposeBuilder(builder);
+
+ /*
+ * Translate the LLVM IR into machine code.
+ */
+#ifdef DEBUG
+ if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
+ LLVMDumpValue(variant->function);
+ assert(0);
+ }
+#endif
+
+ LLVMRunFunctionPassManager(llvm->pass, variant->function);
+
+ if (0) {
+ LLVMDumpValue(variant->function);
+ debug_printf("\n");
+ }
+ variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
+
+ if (0)
+ lp_disassemble(variant->jit_func);
+}
+
+
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
+{
+ LLVMTypeRef arg_types[7];
+ LLVMTypeRef func_type;
+ LLVMValueRef context_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
+ LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ struct draw_context *draw = llvm->draw;
+ unsigned i, j;
+ struct lp_build_context bld;
+ struct lp_build_loop_state lp_loop;
+ struct lp_type vs_type = lp_type_float_vec(32);
+ const int max_vertices = 4;
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+
+ arg_types[0] = llvm->context_ptr_type; /* context */
+ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
+ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
+ arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
+ arg_types[4] = LLVMInt32Type(); /* fetch_count */
+ arg_types[5] = LLVMInt32Type(); /* stride */
+ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+ variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
+ LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
+
+ context_ptr = LLVMGetParam(variant->function_elts, 0);
+ io_ptr = LLVMGetParam(variant->function_elts, 1);
+ vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
+ fetch_elts = LLVMGetParam(variant->function_elts, 3);
+ fetch_count = LLVMGetParam(variant->function_elts, 4);
+ stride = LLVMGetParam(variant->function_elts, 5);
+ vb_ptr = LLVMGetParam(variant->function_elts, 6);
+
+ lp_build_name(context_ptr, "context");
+ lp_build_name(io_ptr, "io");
+ lp_build_name(vbuffers_ptr, "vbuffers");
+ lp_build_name(fetch_elts, "fetch_elts");
+ lp_build_name(fetch_count, "fetch_count");
+ lp_build_name(stride, "stride");
+ lp_build_name(vb_ptr, "vb");
+
+ /*
+ * Function body
+ */
+
+ block = LLVMAppendBasicBlock(variant->function_elts, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ lp_build_context_init(&bld, builder, vs_type);
+
+ step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+
+ lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
+ {
+ LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
+ LLVMValueRef io;
+ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
+
+ io_itr = lp_loop.counter;
+ io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
+#if DEBUG_STORE
+ lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
+ io_itr, io, lp_loop.counter);
+#endif
+ for (i = 0; i < NUM_CHANNELS; ++i) {
+ LLVMValueRef true_index = LLVMBuildAdd(
+ builder,
+ lp_loop.counter,
+ LLVMConstInt(LLVMInt32Type(), i, 0), "");
+ LLVMValueRef fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+ &true_index, 1, "");
+ true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
+ for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
+ struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
+ LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
+ velem->vertex_buffer_index,
+ 0);
+ LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
+ &vb_index, 1, "");
+ generate_fetch(builder, vbuffers_ptr,
+ &aos_attribs[j][i], velem, vb, true_index);
+ }
+ }
+ convert_to_soa(builder, aos_attribs, inputs,
+ draw->pt.nr_vertex_elements);
+
+ ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
+ generate_vs(llvm,
+ builder,
+ outputs,
+ ptr_aos,
+ context_ptr);
+
+ convert_to_aos(builder, io, outputs,
+ draw->vs.vertex_shader->info.num_outputs,
+ max_vertices);
+ }
+ lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
+
+ LLVMBuildRetVoid(builder);
+
+ LLVMDisposeBuilder(builder);
+
+ /*
+ * Translate the LLVM IR into machine code.
+ */
+#ifdef DEBUG
+ if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
+ LLVMDumpValue(variant->function_elts);
+ assert(0);
+ }
+#endif
+
+ LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
+
+ if (0) {
+ LLVMDumpValue(variant->function_elts);
+ debug_printf("\n");
+ }
+ variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal(
+ llvm->draw->engine, variant->function_elts);
+
+ if (0)
+ lp_disassemble(variant->jit_func_elts);
+}
+
+void
+draw_llvm_make_variant_key(struct draw_llvm *llvm,
+ struct draw_llvm_variant_key *key)
+{
+ memset(key, 0, sizeof(struct draw_llvm_variant_key));
+
+ key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
+
+ memcpy(key->vertex_element,
+ llvm->draw->pt.vertex_element,
+ sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
+
+ memcpy(&key->vs,
+ &llvm->draw->vs.vertex_shader->state,
+ sizeof(struct pipe_shader_state));
+}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
new file mode 100644
index 00000000000..58fee7f9d60
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -0,0 +1,159 @@
+#ifndef HAVE_LLVM_H
+#define HAVE_LLVM_H
+
+#include "draw/draw_private.h"
+
+#include "pipe/p_context.h"
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Analysis.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/ExecutionEngine.h>
+
+struct draw_jit_texture
+{
+ uint32_t width;
+ uint32_t height;
+ uint32_t stride;
+ const void *data;
+};
+
+enum {
+ DRAW_JIT_TEXTURE_WIDTH = 0,
+ DRAW_JIT_TEXTURE_HEIGHT,
+ DRAW_JIT_TEXTURE_STRIDE,
+ DRAW_JIT_TEXTURE_DATA
+};
+
+enum {
+ DRAW_JIT_VERTEX_VERTEX_ID = 0,
+ DRAW_JIT_VERTEX_CLIP,
+ DRAW_JIT_VERTEX_DATA
+};
+
+/**
+ * This structure is passed directly to the generated vertex shader.
+ *
+ * It contains the derived state.
+ *
+ * Changes here must be reflected in the draw_jit_context_* macros.
+ * Changes to the ordering should be avoided.
+ *
+ * Only use types with a clear size and padding here, in particular prefer the
+ * stdint.h types to the basic integer types.
+ */
+struct draw_jit_context
+{
+ const float *vs_constants;
+ const float *gs_constants;
+
+
+ struct draw_jit_texture textures[PIPE_MAX_SAMPLERS];
+};
+
+
+#define draw_jit_context_vs_constants(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, 0, "vs_constants")
+
+#define draw_jit_context_gs_constants(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, 1, "gs_constants")
+
+#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2
+
+#define draw_jit_context_textures(_builder, _ptr) \
+ lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+
+
+
+#define draw_jit_header_id(_builder, _ptr) \
+ lp_build_struct_get_ptr(_builder, _ptr, 0, "id")
+
+#define draw_jit_header_clip(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, 1, "clip")
+
+#define draw_jit_header_data(_builder, _ptr) \
+ lp_build_struct_get_ptr(_builder, _ptr, 2, "data")
+
+
+#define draw_jit_vbuffer_stride(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, 0, "stride")
+
+#define draw_jit_vbuffer_max_index(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, 1, "max_index")
+
+#define draw_jit_vbuffer_offset(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, 2, "buffer_offset")
+
+
+typedef void
+(*draw_jit_vert_func)(struct draw_jit_context *context,
+ struct vertex_header *io,
+ const char *vbuffers[PIPE_MAX_ATTRIBS],
+ unsigned start,
+ unsigned count,
+ unsigned stride,
+ struct pipe_vertex_buffer *vertex_buffers);
+
+
+typedef void
+(*draw_jit_vert_func_elts)(struct draw_jit_context *context,
+ struct vertex_header *io,
+ const char *vbuffers[PIPE_MAX_ATTRIBS],
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ unsigned stride,
+ struct pipe_vertex_buffer *vertex_buffers);
+
+struct draw_llvm {
+ struct draw_context *draw;
+
+ struct draw_jit_context jit_context;
+
+ LLVMModuleRef module;
+ LLVMExecutionEngineRef engine;
+ LLVMModuleProviderRef provider;
+ LLVMTargetDataRef target;
+ LLVMPassManagerRef pass;
+
+ LLVMTypeRef context_ptr_type;
+ LLVMTypeRef vertex_header_ptr_type;
+ LLVMTypeRef buffer_ptr_type;
+ LLVMTypeRef vb_ptr_type;
+};
+
+struct draw_llvm_variant_key
+{
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_elements;
+ struct pipe_shader_state vs;
+};
+
+struct draw_llvm_variant
+{
+ struct draw_llvm_variant_key key;
+ LLVMValueRef function;
+ LLVMValueRef function_elts;
+ draw_jit_vert_func jit_func;
+ draw_jit_vert_func_elts jit_func_elts;
+
+ struct draw_llvm_variant *next;
+};
+
+struct draw_llvm *
+draw_llvm_create(struct draw_context *draw);
+
+void
+draw_llvm_destroy(struct draw_llvm *llvm);
+
+struct draw_llvm_variant *
+draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs);
+
+void
+draw_llvm_make_variant_key(struct draw_llvm *llvm,
+ struct draw_llvm_variant_key *key);
+
+LLVMValueRef
+draw_llvm_translate_from(LLVMBuilderRef builder,
+ LLVMValueRef vbuffer,
+ enum pipe_format from_format);
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c
new file mode 100644
index 00000000000..d7da7ed357d
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c
@@ -0,0 +1,497 @@
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "draw_llvm.h"
+
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_struct.h"
+#include "gallivm/lp_bld_format.h"
+#include "gallivm/lp_bld_debug.h"
+
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "pipe/p_state.h"
+
+
+#define DRAW_DBG 0
+
+static LLVMValueRef
+from_64_float(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMDoubleType(), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), "");
+}
+
+static LLVMValueRef
+from_32_float(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMFloatType(), 0) , "");
+ return LLVMBuildLoad(builder, bc, "");
+}
+
+static INLINE LLVMValueRef
+from_8_uscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ return LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_16_uscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMIntType(16), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_32_uscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMIntType(32), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_8_sscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ return LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_16_sscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMIntType(16), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_32_sscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMIntType(32), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+}
+
+
+static INLINE LLVMValueRef
+from_8_unorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 255.), "");
+}
+
+static INLINE LLVMValueRef
+from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMIntType(16), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 65535.), "");
+}
+
+static INLINE LLVMValueRef
+from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMIntType(32), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 4294967295.), "");
+}
+
+static INLINE LLVMValueRef
+from_8_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 127.0), "");
+}
+
+static INLINE LLVMValueRef
+from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMIntType(16), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 32767.0f), "");
+}
+
+static INLINE LLVMValueRef
+from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMIntType(32), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 2147483647.0), "");
+}
+
+static INLINE LLVMValueRef
+from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMPointerType(LLVMIntType(32), 0) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 65536.0), "");
+}
+
+static LLVMValueRef
+to_64_float(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPExt(builder, l, LLVMDoubleType(), "");
+}
+
+static LLVMValueRef
+to_32_float(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ return LLVMBuildLoad(builder, fp, "");
+}
+
+static INLINE LLVMValueRef
+to_8_uscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToUI(builder, l, LLVMIntType(8), "");
+}
+
+static INLINE LLVMValueRef
+to_16_uscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToUI(builder, l, LLVMIntType(16), "");
+}
+
+static INLINE LLVMValueRef
+to_32_uscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToUI(builder, l, LLVMIntType(32), "");
+}
+
+static INLINE LLVMValueRef
+to_8_sscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToSI(builder, l, LLVMIntType(8), "");
+}
+
+static INLINE LLVMValueRef
+to_16_sscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToSI(builder, l, LLVMIntType(16), "");
+}
+
+static INLINE LLVMValueRef
+to_32_sscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToSI(builder, l, LLVMIntType(32), "");
+}
+
+static INLINE LLVMValueRef
+to_8_unorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(8), "");
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 255.), "");
+}
+
+static INLINE LLVMValueRef
+to_16_unorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), "");
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 65535.), "");
+}
+
+static INLINE LLVMValueRef
+to_32_unorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), "");
+
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 4294967295.), "");
+}
+
+static INLINE LLVMValueRef
+to_8_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(8), "");
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 127.0), "");
+}
+
+static INLINE LLVMValueRef
+to_16_snorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(16), "");
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 32767.0f), "");
+}
+
+static INLINE LLVMValueRef
+to_32_snorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), "");
+
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 2147483647.0), "");
+}
+
+static INLINE LLVMValueRef
+to_32_fixed(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), "");
+
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(LLVMFloatType(), 65536.0), "");
+}
+
+typedef LLVMValueRef (*from_func)(LLVMBuilderRef, LLVMValueRef);
+typedef LLVMValueRef (*to_func)(LLVMBuilderRef, LLVMValueRef);
+
+/* so that underneath can avoid function calls which are prohibited
+ * for static initialization we need this conversion */
+enum ll_type {
+ LL_Double,
+ LL_Float,
+ LL_Int32,
+ LL_Int16,
+ LL_Int8
+};
+
+static INLINE LLVMTypeRef
+ll_type_to_llvm(enum ll_type type)
+{
+ switch (type) {
+ case LL_Double:
+ return LLVMDoubleType();
+ case LL_Float:
+ return LLVMFloatType();
+ case LL_Int32:
+ return LLVMInt32Type();
+ case LL_Int16:
+ return LLVMIntType(16);
+ case LL_Int8:
+ return LLVMIntType(8);
+ }
+ return LLVMIntType(8);
+}
+
+static INLINE int
+ll_type_size(enum ll_type type)
+{
+ switch (type) {
+ case LL_Double:
+ return 8;
+ case LL_Float:
+ return 4;
+ case LL_Int32:
+ return 4;
+ case LL_Int16:
+ return 2;
+ case LL_Int8:
+ return 1;
+ }
+ return 1;
+}
+
+struct draw_llvm_translate {
+ int format;
+ from_func from;
+ to_func to;
+ enum ll_type type;
+ int num_components;
+} translates[] =
+{
+ {PIPE_FORMAT_R64_FLOAT, from_64_float, to_64_float, LL_Double, 1},
+ {PIPE_FORMAT_R64G64_FLOAT, from_64_float, to_64_float, LL_Double, 2},
+ {PIPE_FORMAT_R64G64B64_FLOAT, from_64_float, to_64_float, LL_Double, 3},
+ {PIPE_FORMAT_R64G64B64A64_FLOAT, from_64_float, to_64_float, LL_Double, 4},
+ {PIPE_FORMAT_R32_FLOAT, from_32_float, to_32_float, LL_Float, 1},
+ {PIPE_FORMAT_R32G32_FLOAT, from_32_float, to_32_float, LL_Float, 2},
+ {PIPE_FORMAT_R32G32B32_FLOAT, from_32_float, to_32_float, LL_Float, 3},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, from_32_float, to_32_float, LL_Float, 4},
+
+ {PIPE_FORMAT_R32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 1},
+ {PIPE_FORMAT_R32G32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 2},
+ {PIPE_FORMAT_R32G32B32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 3},
+ {PIPE_FORMAT_R32G32B32A32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 4},
+
+ {PIPE_FORMAT_R32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 1},
+ {PIPE_FORMAT_R32G32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 2},
+ {PIPE_FORMAT_R32G32B32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 3},
+ {PIPE_FORMAT_R32G32B32A32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 4},
+
+ {PIPE_FORMAT_R32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 1},
+ {PIPE_FORMAT_R32G32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 2},
+ {PIPE_FORMAT_R32G32B32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 3},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 4},
+
+ {PIPE_FORMAT_R32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 1},
+ {PIPE_FORMAT_R32G32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 2},
+ {PIPE_FORMAT_R32G32B32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 3},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 4},
+
+ {PIPE_FORMAT_R16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 1},
+ {PIPE_FORMAT_R16G16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 2},
+ {PIPE_FORMAT_R16G16B16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 3},
+ {PIPE_FORMAT_R16G16B16A16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 4},
+
+ {PIPE_FORMAT_R16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 1},
+ {PIPE_FORMAT_R16G16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 2},
+ {PIPE_FORMAT_R16G16B16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 3},
+ {PIPE_FORMAT_R16G16B16A16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 4},
+
+ {PIPE_FORMAT_R16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 1},
+ {PIPE_FORMAT_R16G16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 2},
+ {PIPE_FORMAT_R16G16B16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 3},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 4},
+
+ {PIPE_FORMAT_R16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 1},
+ {PIPE_FORMAT_R16G16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 2},
+ {PIPE_FORMAT_R16G16B16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 3},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 4},
+
+ {PIPE_FORMAT_R8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 1},
+ {PIPE_FORMAT_R8G8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 2},
+ {PIPE_FORMAT_R8G8B8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 3},
+ {PIPE_FORMAT_R8G8B8A8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4},
+
+ {PIPE_FORMAT_R8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 1},
+ {PIPE_FORMAT_R8G8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 2},
+ {PIPE_FORMAT_R8G8B8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 3},
+ {PIPE_FORMAT_R8G8B8A8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 4},
+
+ {PIPE_FORMAT_R8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 1},
+ {PIPE_FORMAT_R8G8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 2},
+ {PIPE_FORMAT_R8G8B8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 3},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 4},
+
+ {PIPE_FORMAT_R8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 1},
+ {PIPE_FORMAT_R8G8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 2},
+ {PIPE_FORMAT_R8G8B8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 3},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 4},
+
+ {PIPE_FORMAT_R32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 1},
+ {PIPE_FORMAT_R32G32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 2},
+ {PIPE_FORMAT_R32G32B32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 3},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 4},
+};
+
+
+static LLVMValueRef
+fetch(LLVMBuilderRef builder,
+ LLVMValueRef ptr, int val_size, int nr_components,
+ from_func func)
+{
+ int i;
+ int offset = 0;
+ LLVMValueRef res = LLVMConstNull(
+ LLVMVectorType(LLVMFloatType(), 4));
+ LLVMValueRef defaults[4];
+
+ defaults[0] = LLVMConstReal(LLVMFloatType(), 0);
+ defaults[1] = LLVMConstReal(LLVMFloatType(), 0);
+ defaults[2] = LLVMConstReal(LLVMFloatType(), 0);
+ defaults[3] = LLVMConstReal(LLVMFloatType(), 1);
+
+ for (i = 0; i < nr_components; ++i) {
+ LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0);
+ LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef src_tmp;
+ LLVMValueRef component;
+
+ src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, "src_tmp");
+
+ /* convert src_tmp to float */
+ component = func(builder, src_tmp);
+
+ /* vec.comp = component */
+ res = LLVMBuildInsertElement(builder,
+ res,
+ component,
+ dst_index, "");
+ offset += val_size;
+ }
+ for (; i < 4; ++i) {
+ LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res = LLVMBuildInsertElement(builder,
+ res,
+ defaults[i],
+ dst_index, "");
+ }
+ return res;
+}
+
+
+LLVMValueRef
+draw_llvm_translate_from(LLVMBuilderRef builder,
+ LLVMValueRef vbuffer,
+ enum pipe_format from_format)
+{
+ const struct util_format_description *format_desc;
+ LLVMValueRef zero;
+ int i;
+
+ /*
+ * The above can only cope with straight arrays: no bitfields,
+ * swizzles, or half floats.
+ */
+
+ for (i = 0; i < Elements(translates); ++i) {
+ if (translates[i].format == from_format) {
+ /*LLVMTypeRef type = ll_type_to_llvm(translates[i].type);*/
+ return fetch(builder,
+ vbuffer,
+ ll_type_size(translates[i].type),
+ translates[i].num_components,
+ translates[i].from);
+ }
+ }
+
+
+ /*
+ * This doesn't handle anything bigger than 32bits, or half floats
+ * yet.
+ *
+ * TODO: unify all this code into lp_build_fetch_rgba_aos().
+ */
+
+ format_desc = util_format_description(from_format);
+ zero = LLVMConstNull(LLVMInt32Type());
+ return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero);
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 83dc1a35f4c..64c35025081 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -66,6 +66,7 @@ boolean draw_pipeline_init( struct draw_context *draw )
/* these defaults are oriented toward the needs of softpipe */
draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */
draw->pipeline.wide_line_threshold = 1.0;
+ draw->pipeline.wide_point_sprites = FALSE;
draw->pipeline.line_stipple = TRUE;
draw->pipeline.point_sprite = TRUE;
@@ -225,7 +226,7 @@ static void do_triangle( struct draw_context *draw,
/**
- * Code to run the pipeline on a fairly arbitary collection of vertices.
+ * Code to run the pipeline on a fairly arbitrary collection of vertices.
* For drawing indexed primitives.
*
* Vertex headers must be pre-initialized with the
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 8f6ca15dfa2..4faf0a779ca 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -40,6 +40,7 @@
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_sampler.h"
#include "tgsi/tgsi_transform.h"
#include "tgsi/tgsi_dump.h"
@@ -87,9 +88,10 @@ struct aaline_stage
uint pos_slot;
void *sampler_cso;
- struct pipe_texture *texture;
+ struct pipe_resource *texture;
+ struct pipe_sampler_view *sampler_view;
uint num_samplers;
- uint num_textures;
+ uint num_sampler_views;
/*
@@ -98,7 +100,7 @@ struct aaline_stage
struct aaline_fragment_shader *fs;
struct {
void *sampler[PIPE_MAX_SAMPLERS];
- struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
} state;
/*
@@ -111,10 +113,10 @@ struct aaline_stage
void (*driver_bind_sampler_states)(struct pipe_context *, unsigned,
void **);
- void (*driver_set_sampler_textures)(struct pipe_context *, unsigned,
- struct pipe_texture **);
- struct pipe_context *pipe;
+ void (*driver_set_sampler_views)(struct pipe_context *,
+ unsigned,
+ struct pipe_sampler_view **);
};
@@ -339,6 +341,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
static boolean
generate_aaline_fs(struct aaline_stage *aaline)
{
+ struct pipe_context *pipe = aaline->stage.draw->pipe;
const struct pipe_shader_state *orig_fs = &aaline->fs->state;
struct pipe_shader_state aaline_fs;
struct aa_transform_context transform;
@@ -371,7 +374,7 @@ generate_aaline_fs(struct aaline_stage *aaline)
aaline->fs->sampler_unit = transform.freeSampler;
aaline->fs->aaline_fs
- = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs);
+ = aaline->driver_create_fs_state(pipe, &aaline_fs);
if (aaline->fs->aaline_fs == NULL)
goto fail;
@@ -391,9 +394,10 @@ fail:
static boolean
aaline_create_texture(struct aaline_stage *aaline)
{
- struct pipe_context *pipe = aaline->pipe;
+ struct pipe_context *pipe = aaline->stage.draw->pipe;
struct pipe_screen *screen = pipe->screen;
- struct pipe_texture texTemp;
+ struct pipe_resource texTemp;
+ struct pipe_sampler_view viewTempl;
uint level;
memset(&texTemp, 0, sizeof(texTemp));
@@ -403,28 +407,46 @@ aaline_create_texture(struct aaline_stage *aaline)
texTemp.width0 = 1 << MAX_TEXTURE_LEVEL;
texTemp.height0 = 1 << MAX_TEXTURE_LEVEL;
texTemp.depth0 = 1;
+ texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
- aaline->texture = screen->texture_create(screen, &texTemp);
+ aaline->texture = screen->resource_create(screen, &texTemp);
if (!aaline->texture)
return FALSE;
+ u_sampler_view_default_template(&viewTempl,
+ aaline->texture,
+ aaline->texture->format);
+ aaline->sampler_view = pipe->create_sampler_view(pipe,
+ aaline->texture,
+ &viewTempl);
+ if (!aaline->sampler_view) {
+ return FALSE;
+ }
+
/* Fill in mipmap images.
* Basically each level is solid opaque, except for the outermost
* texels which are zero. Special case the 1x1 and 2x2 levels.
*/
for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
struct pipe_transfer *transfer;
+ struct pipe_box box;
const uint size = u_minify(aaline->texture->width0, level);
ubyte *data;
uint i, j;
assert(aaline->texture->width0 == aaline->texture->height0);
+ u_box_origin_2d( size, size, &box );
+
/* This texture is new, no need to flush.
*/
- transfer = screen->get_tex_transfer(screen, aaline->texture, 0, level, 0,
- PIPE_TRANSFER_WRITE, 0, 0, size, size);
- data = screen->transfer_map(screen, transfer);
+ transfer = pipe->get_transfer(pipe,
+ aaline->texture,
+ u_subresource(0, level),
+ PIPE_TRANSFER_WRITE,
+ &box);
+
+ data = pipe->transfer_map(pipe, transfer);
if (data == NULL)
return FALSE;
@@ -448,8 +470,8 @@ aaline_create_texture(struct aaline_stage *aaline)
}
/* unmap */
- screen->transfer_unmap(screen, transfer);
- screen->tex_transfer_destroy(transfer);
+ pipe->transfer_unmap(pipe, transfer);
+ pipe->transfer_destroy(pipe, transfer);
}
return TRUE;
}
@@ -464,7 +486,7 @@ static boolean
aaline_create_sampler(struct aaline_stage *aaline)
{
struct pipe_sampler_state sampler;
- struct pipe_context *pipe = aaline->pipe;
+ struct pipe_context *pipe = aaline->stage.draw->pipe;
memset(&sampler, 0, sizeof(sampler));
sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
@@ -493,13 +515,14 @@ static boolean
bind_aaline_fragment_shader(struct aaline_stage *aaline)
{
struct draw_context *draw = aaline->stage.draw;
+ struct pipe_context *pipe = draw->pipe;
if (!aaline->fs->aaline_fs &&
!generate_aaline_fs(aaline))
return FALSE;
draw->suspend_flushing = TRUE;
- aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs);
+ aaline->driver_bind_fs_state(pipe, aaline->fs->aaline_fs);
draw->suspend_flushing = FALSE;
return TRUE;
@@ -639,8 +662,10 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
{
auto struct aaline_stage *aaline = aaline_stage(stage);
struct draw_context *draw = stage->draw;
- struct pipe_context *pipe = aaline->pipe;
+ struct pipe_context *pipe = draw->pipe;
+ const struct pipe_rasterizer_state *rast = draw->rasterizer;
uint num_samplers;
+ void *r;
assert(draw->rasterizer->line_smooth);
@@ -669,16 +694,21 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
- num_samplers = MAX2(aaline->num_textures, aaline->num_samplers);
+ num_samplers = MAX2(aaline->num_sampler_views, aaline->num_samplers);
num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1);
aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso;
- pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit],
- aaline->texture);
+ pipe_sampler_view_reference(&aaline->state.sampler_views[aaline->fs->sampler_unit],
+ aaline->sampler_view);
draw->suspend_flushing = TRUE;
aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler);
- aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture);
+ aaline->driver_set_sampler_views(pipe, num_samplers, aaline->state.sampler_views);
+
+ /* Disable triangle culling, stippling, unfilled mode etc. */
+ r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
+ pipe->bind_rasterizer_state(pipe, r);
+
draw->suspend_flushing = FALSE;
/* now really draw first line */
@@ -692,7 +722,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
{
struct draw_context *draw = stage->draw;
struct aaline_stage *aaline = aaline_stage(stage);
- struct pipe_context *pipe = aaline->pipe;
+ struct pipe_context *pipe = draw->pipe;
stage->line = aaline_first_line;
stage->next->flush( stage->next, flags );
@@ -702,8 +732,15 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs);
aaline->driver_bind_sampler_states(pipe, aaline->num_samplers,
aaline->state.sampler);
- aaline->driver_set_sampler_textures(pipe, aaline->num_textures,
- aaline->state.texture);
+ aaline->driver_set_sampler_views(pipe,
+ aaline->num_sampler_views,
+ aaline->state.sampler_views);
+
+ /* restore original rasterizer state */
+ if (draw->rast_handle) {
+ pipe->bind_rasterizer_state(pipe, draw->rast_handle);
+ }
+
draw->suspend_flushing = FALSE;
draw->extra_shader_outputs.slot = 0;
@@ -721,17 +758,22 @@ static void
aaline_destroy(struct draw_stage *stage)
{
struct aaline_stage *aaline = aaline_stage(stage);
+ struct pipe_context *pipe = stage->draw->pipe;
uint i;
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- pipe_texture_reference(&aaline->state.texture[i], NULL);
+ pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL);
}
if (aaline->sampler_cso)
- aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso);
+ pipe->delete_sampler_state(pipe, aaline->sampler_cso);
if (aaline->texture)
- pipe_texture_reference(&aaline->texture, NULL);
+ pipe_resource_reference(&aaline->texture, NULL);
+
+ if (aaline->sampler_view) {
+ pipe_sampler_view_reference(&aaline->sampler_view, NULL);
+ }
draw_free_temp_verts( stage );
@@ -787,13 +829,14 @@ aaline_create_fs_state(struct pipe_context *pipe,
{
struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader);
+
if (aafs == NULL)
return NULL;
aafs->state = *fs;
/* pass-through */
- aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs);
+ aafs->driver_fs = aaline->driver_create_fs_state(pipe, fs);
return aafs;
}
@@ -808,8 +851,7 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs)
/* save current */
aaline->fs = aafs;
/* pass-through */
- aaline->driver_bind_fs_state(aaline->pipe,
- (aafs ? aafs->driver_fs : NULL));
+ aaline->driver_bind_fs_state(pipe, (aafs ? aafs->driver_fs : NULL));
}
@@ -818,11 +860,12 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs)
{
struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;
+
/* pass-through */
- aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs);
+ aaline->driver_delete_fs_state(pipe, aafs->driver_fs);
if (aafs->aaline_fs)
- aaline->driver_delete_fs_state(aaline->pipe, aafs->aaline_fs);
+ aaline->driver_delete_fs_state(pipe, aafs->aaline_fs);
FREE(aafs);
}
@@ -839,28 +882,29 @@ aaline_bind_sampler_states(struct pipe_context *pipe,
aaline->num_samplers = num;
/* pass-through */
- aaline->driver_bind_sampler_states(aaline->pipe, num, sampler);
+ aaline->driver_bind_sampler_states(pipe, num, sampler);
}
static void
-aaline_set_sampler_textures(struct pipe_context *pipe,
- unsigned num, struct pipe_texture **texture)
+aaline_set_sampler_views(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_sampler_view **views)
{
struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
uint i;
/* save current */
for (i = 0; i < num; i++) {
- pipe_texture_reference(&aaline->state.texture[i], texture[i]);
+ pipe_sampler_view_reference(&aaline->state.sampler_views[i], views[i]);
}
for ( ; i < PIPE_MAX_SAMPLERS; i++) {
- pipe_texture_reference(&aaline->state.texture[i], NULL);
+ pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL);
}
- aaline->num_textures = num;
+ aaline->num_sampler_views = num;
/* pass-through */
- aaline->driver_set_sampler_textures(aaline->pipe, num, texture);
+ aaline->driver_set_sampler_views(pipe, num, views);
}
@@ -883,8 +927,6 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
if (!aaline)
goto fail;
- aaline->pipe = pipe;
-
/* create special texture, sampler state */
if (!aaline_create_texture(aaline))
goto fail;
@@ -898,7 +940,7 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
aaline->driver_delete_fs_state = pipe->delete_fs_state;
aaline->driver_bind_sampler_states = pipe->bind_fragment_sampler_states;
- aaline->driver_set_sampler_textures = pipe->set_fragment_sampler_textures;
+ aaline->driver_set_sampler_views = pipe->set_fragment_sampler_views;
/* override the driver's functions */
pipe->create_fs_state = aaline_create_fs_state;
@@ -906,7 +948,7 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
pipe->delete_fs_state = aaline_delete_fs_state;
pipe->bind_fragment_sampler_states = aaline_bind_sampler_states;
- pipe->set_fragment_sampler_textures = aaline_set_sampler_textures;
+ pipe->set_fragment_sampler_views = aaline_set_sampler_views;
/* Install once everything is known to be OK:
*/
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 9f9fb4312c1..bba6f50c020 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -107,8 +107,6 @@ struct aapoint_stage
const struct pipe_shader_state *);
void (*driver_bind_fs_state)(struct pipe_context *, void *);
void (*driver_delete_fs_state)(struct pipe_context *, void *);
-
- struct pipe_context *pipe;
};
@@ -499,6 +497,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
struct pipe_shader_state aapoint_fs;
struct aa_transform_context transform;
const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
+ struct pipe_context *pipe = aapoint->stage.draw->pipe;
aapoint_fs = *orig_fs; /* copy to init */
aapoint_fs.tokens = tgsi_alloc_tokens(newLen);
@@ -527,7 +526,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
#endif
aapoint->fs->aapoint_fs
- = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
+ = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
if (aapoint->fs->aapoint_fs == NULL)
goto fail;
@@ -549,13 +548,14 @@ static boolean
bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
{
struct draw_context *draw = aapoint->stage.draw;
+ struct pipe_context *pipe = draw->pipe;
if (!aapoint->fs->aapoint_fs &&
!generate_aapoint_fs(aapoint))
return FALSE;
draw->suspend_flushing = TRUE;
- aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
+ aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
draw->suspend_flushing = FALSE;
return TRUE;
@@ -679,6 +679,9 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
{
auto struct aapoint_stage *aapoint = aapoint_stage(stage);
struct draw_context *draw = stage->draw;
+ struct pipe_context *pipe = draw->pipe;
+ const struct pipe_rasterizer_state *rast = draw->rasterizer;
+ void *r;
assert(draw->rasterizer->point_smooth);
@@ -716,6 +719,14 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
}
}
+ draw->suspend_flushing = TRUE;
+
+ /* Disable triangle culling, stippling, unfilled mode etc. */
+ r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
+ pipe->bind_rasterizer_state(pipe, r);
+
+ draw->suspend_flushing = FALSE;
+
/* now really draw first point */
stage->point = aapoint_point;
stage->point(stage, header);
@@ -727,7 +738,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)
{
struct draw_context *draw = stage->draw;
struct aapoint_stage *aapoint = aapoint_stage(stage);
- struct pipe_context *pipe = aapoint->pipe;
+ struct pipe_context *pipe = draw->pipe;
stage->point = aapoint_first_point;
stage->next->flush( stage->next, flags );
@@ -735,6 +746,12 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)
/* restore original frag shader */
draw->suspend_flushing = TRUE;
aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
+
+ /* restore original rasterizer state */
+ if (draw->rast_handle) {
+ pipe->bind_rasterizer_state(pipe, draw->rast_handle);
+ }
+
draw->suspend_flushing = FALSE;
draw->extra_shader_outputs.slot = 0;
@@ -811,7 +828,7 @@ aapoint_create_fs_state(struct pipe_context *pipe,
aafs->state = *fs;
/* pass-through */
- aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
+ aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
return aafs;
}
@@ -825,7 +842,7 @@ aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
/* save current */
aapoint->fs = aafs;
/* pass-through */
- aapoint->driver_bind_fs_state(aapoint->pipe,
+ aapoint->driver_bind_fs_state(pipe,
(aafs ? aafs->driver_fs : NULL));
}
@@ -837,10 +854,10 @@ aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
/* pass-through */
- aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs);
+ aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
if (aafs->aapoint_fs)
- aapoint->driver_delete_fs_state(aapoint->pipe, aafs->aapoint_fs);
+ aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
FREE(aafs);
}
@@ -857,8 +874,6 @@ draw_install_aapoint_stage(struct draw_context *draw,
{
struct aapoint_stage *aapoint;
- pipe->draw = (void *) draw;
-
/*
* Create / install AA point drawing / prim stage
*/
@@ -866,8 +881,6 @@ draw_install_aapoint_stage(struct draw_context *draw,
if (aapoint == NULL)
return FALSE;
- aapoint->pipe = pipe;
-
/* save original driver functions */
aapoint->driver_create_fs_state = pipe->create_fs_state;
aapoint->driver_bind_fs_state = pipe->bind_fs_state;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index d0d99aa331a..ef30db094fe 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -42,6 +42,7 @@
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_sampler.h"
#include "tgsi/tgsi_transform.h"
#include "tgsi/tgsi_dump.h"
@@ -74,9 +75,10 @@ struct pstip_stage
struct draw_stage stage;
void *sampler_cso;
- struct pipe_texture *texture;
+ struct pipe_resource *texture;
+ struct pipe_sampler_view *sampler_view;
uint num_samplers;
- uint num_textures;
+ uint num_sampler_views;
/*
* Currently bound state
@@ -84,7 +86,7 @@ struct pstip_stage
struct pstip_fragment_shader *fs;
struct {
void *samplers[PIPE_MAX_SAMPLERS];
- struct pipe_texture *textures[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
const struct pipe_poly_stipple *stipple;
} state;
@@ -98,8 +100,9 @@ struct pstip_stage
void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **);
- void (*driver_set_sampler_textures)(struct pipe_context *, unsigned,
- struct pipe_texture **);
+ void (*driver_set_sampler_views)(struct pipe_context *,
+ unsigned,
+ struct pipe_sampler_view **);
void (*driver_set_polygon_stipple)(struct pipe_context *,
const struct pipe_poly_stipple *);
@@ -374,19 +377,21 @@ pstip_update_texture(struct pstip_stage *pstip)
{
static const uint bit31 = 1 << 31;
struct pipe_context *pipe = pstip->pipe;
- struct pipe_screen *screen = pipe->screen;
struct pipe_transfer *transfer;
const uint *stipple = pstip->state.stipple->stipple;
uint i, j;
ubyte *data;
/* XXX: want to avoid flushing just because we use stipple:
+ *
+ * Flush should no longer be necessary if driver is properly
+ * interleaving drawing and transfers on a given context:
*/
pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL );
- transfer = screen->get_tex_transfer(screen, pstip->texture, 0, 0, 0,
- PIPE_TRANSFER_WRITE, 0, 0, 32, 32);
- data = screen->transfer_map(screen, transfer);
+ transfer = pipe_get_transfer(pipe, pstip->texture, 0, 0, 0,
+ PIPE_TRANSFER_WRITE, 0, 0, 32, 32);
+ data = pipe->transfer_map(pipe, transfer);
/*
* Load alpha texture.
@@ -408,8 +413,8 @@ pstip_update_texture(struct pstip_stage *pstip)
}
/* unmap */
- screen->transfer_unmap(screen, transfer);
- screen->tex_transfer_destroy(transfer);
+ pipe->transfer_unmap(pipe, transfer);
+ pipe->transfer_destroy(pipe, transfer);
}
@@ -421,7 +426,8 @@ pstip_create_texture(struct pstip_stage *pstip)
{
struct pipe_context *pipe = pstip->pipe;
struct pipe_screen *screen = pipe->screen;
- struct pipe_texture texTemp;
+ struct pipe_resource texTemp;
+ struct pipe_sampler_view viewTempl;
memset(&texTemp, 0, sizeof(texTemp));
texTemp.target = PIPE_TEXTURE_2D;
@@ -430,11 +436,22 @@ pstip_create_texture(struct pstip_stage *pstip)
texTemp.width0 = 32;
texTemp.height0 = 32;
texTemp.depth0 = 1;
+ texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
- pstip->texture = screen->texture_create(screen, &texTemp);
+ pstip->texture = screen->resource_create(screen, &texTemp);
if (pstip->texture == NULL)
return FALSE;
+ u_sampler_view_default_template(&viewTempl,
+ pstip->texture,
+ pstip->texture->format);
+ pstip->sampler_view = pipe->create_sampler_view(pipe,
+ pstip->texture,
+ &viewTempl);
+ if (!pstip->sampler_view) {
+ return FALSE;
+ }
+
return TRUE;
}
@@ -513,19 +530,19 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
- num_samplers = MAX2(pstip->num_textures, pstip->num_samplers);
+ num_samplers = MAX2(pstip->num_sampler_views, pstip->num_samplers);
num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1);
/* plug in our sampler, texture */
pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso;
- pipe_texture_reference(&pstip->state.textures[pstip->fs->sampler_unit],
- pstip->texture);
+ pipe_sampler_view_reference(&pstip->state.sampler_views[pstip->fs->sampler_unit],
+ pstip->sampler_view);
assert(num_samplers <= PIPE_MAX_SAMPLERS);
draw->suspend_flushing = TRUE;
pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers);
- pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures);
+ pstip->driver_set_sampler_views(pipe, num_samplers, pstip->state.sampler_views);
draw->suspend_flushing = FALSE;
/* now really draw first triangle */
@@ -549,8 +566,9 @@ pstip_flush(struct draw_stage *stage, unsigned flags)
pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs);
pstip->driver_bind_sampler_states(pipe, pstip->num_samplers,
pstip->state.samplers);
- pstip->driver_set_sampler_textures(pipe, pstip->num_textures,
- pstip->state.textures);
+ pstip->driver_set_sampler_views(pipe,
+ pstip->num_sampler_views,
+ pstip->state.sampler_views);
draw->suspend_flushing = FALSE;
}
@@ -569,12 +587,16 @@ pstip_destroy(struct draw_stage *stage)
uint i;
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- pipe_texture_reference(&pstip->state.textures[i], NULL);
+ pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL);
}
pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso);
- pipe_texture_reference(&pstip->texture, NULL);
+ pipe_resource_reference(&pstip->texture, NULL);
+
+ if (pstip->sampler_view) {
+ pipe_sampler_view_reference(&pstip->sampler_view, NULL);
+ }
draw_free_temp_verts( stage );
FREE( stage );
@@ -680,24 +702,25 @@ pstip_bind_sampler_states(struct pipe_context *pipe,
static void
-pstip_set_sampler_textures(struct pipe_context *pipe,
- unsigned num, struct pipe_texture **texture)
+pstip_set_sampler_views(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_sampler_view **views)
{
struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
uint i;
/* save current */
for (i = 0; i < num; i++) {
- pipe_texture_reference(&pstip->state.textures[i], texture[i]);
+ pipe_sampler_view_reference(&pstip->state.sampler_views[i], views[i]);
}
for (; i < PIPE_MAX_SAMPLERS; i++) {
- pipe_texture_reference(&pstip->state.textures[i], NULL);
+ pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL);
}
- pstip->num_textures = num;
+ pstip->num_sampler_views = num;
/* pass-through */
- pstip->driver_set_sampler_textures(pstip->pipe, num, texture);
+ pstip->driver_set_sampler_views(pstip->pipe, num, views);
}
@@ -754,7 +777,7 @@ draw_install_pstipple_stage(struct draw_context *draw,
pstip->driver_delete_fs_state = pipe->delete_fs_state;
pstip->driver_bind_sampler_states = pipe->bind_fragment_sampler_states;
- pstip->driver_set_sampler_textures = pipe->set_fragment_sampler_textures;
+ pstip->driver_set_sampler_views = pipe->set_fragment_sampler_views;
pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple;
/* override the driver's functions */
@@ -763,7 +786,7 @@ draw_install_pstipple_stage(struct draw_context *draw,
pipe->delete_fs_state = pstip_delete_fs_state;
pipe->bind_fragment_sampler_states = pstip_bind_sampler_states;
- pipe->set_fragment_sampler_textures = pstip_set_sampler_textures;
+ pipe->set_fragment_sampler_views = pstip_set_sampler_views;
pipe->set_polygon_stipple = pstip_set_polygon_stipple;
return TRUE;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index 153097e543e..2a50af7a414 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -100,6 +100,11 @@ draw_need_pipeline(const struct draw_context *draw,
if (rasterizer->point_size > draw->pipeline.wide_point_threshold)
return TRUE;
+ /* sprite points */
+ if (rasterizer->point_quad_rasterization
+ && draw->pipeline.wide_point_sprites)
+ return TRUE;
+
/* AA points */
if (rasterizer->point_smooth && draw->pipeline.aapoint)
return TRUE;
@@ -154,6 +159,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
boolean need_det = FALSE;
boolean precalc_flat = FALSE;
boolean wide_lines, wide_points;
+ const struct pipe_rasterizer_state *rast = draw->rasterizer;
/* Set the validate's next stage to the rasterize stage, so that it
* can be found later if needed for flushing.
@@ -161,15 +167,17 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
stage->next = next;
/* drawing wide lines? */
- wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold
- && !draw->rasterizer->line_smooth);
+ wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold
+ && !rast->line_smooth);
/* drawing large points? */
- if (draw->rasterizer->sprite_coord_enable && draw->pipeline.point_sprite)
+ if (rast->sprite_coord_enable && draw->pipeline.point_sprite)
wide_points = TRUE;
- else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint)
+ else if (rast->point_smooth && draw->pipeline.aapoint)
wide_points = FALSE;
- else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold)
+ else if (rast->point_size > draw->pipeline.wide_point_threshold)
+ wide_points = TRUE;
+ else if (rast->point_quad_rasterization && draw->pipeline.wide_point_sprites)
wide_points = TRUE;
else
wide_points = FALSE;
@@ -181,12 +189,12 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
* shorter pipelines for lines & points.
*/
- if (draw->rasterizer->line_smooth && draw->pipeline.aaline) {
+ if (rast->line_smooth && draw->pipeline.aaline) {
draw->pipeline.aaline->next = next;
next = draw->pipeline.aaline;
}
- if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) {
+ if (rast->point_smooth && draw->pipeline.aapoint) {
draw->pipeline.aapoint->next = next;
next = draw->pipeline.aapoint;
}
@@ -197,44 +205,44 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
precalc_flat = TRUE;
}
- if (wide_points || draw->rasterizer->sprite_coord_enable) {
+ if (wide_points || rast->sprite_coord_enable) {
draw->pipeline.wide_point->next = next;
next = draw->pipeline.wide_point;
}
- if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) {
+ if (rast->line_stipple_enable && draw->pipeline.line_stipple) {
draw->pipeline.stipple->next = next;
next = draw->pipeline.stipple;
precalc_flat = TRUE; /* only needed for lines really */
}
- if (draw->rasterizer->poly_stipple_enable
+ if (rast->poly_stipple_enable
&& draw->pipeline.pstipple) {
draw->pipeline.pstipple->next = next;
next = draw->pipeline.pstipple;
}
- if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) {
+ if (rast->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ rast->fill_ccw != PIPE_POLYGON_MODE_FILL) {
draw->pipeline.unfilled->next = next;
next = draw->pipeline.unfilled;
precalc_flat = TRUE; /* only needed for triangles really */
need_det = TRUE;
}
- if (draw->rasterizer->flatshade && precalc_flat) {
+ if (rast->flatshade && precalc_flat) {
draw->pipeline.flatshade->next = next;
next = draw->pipeline.flatshade;
}
- if (draw->rasterizer->offset_cw ||
- draw->rasterizer->offset_ccw) {
+ if (rast->offset_cw ||
+ rast->offset_ccw) {
draw->pipeline.offset->next = next;
next = draw->pipeline.offset;
need_det = TRUE;
}
- if (draw->rasterizer->light_twoside) {
+ if (rast->light_twoside) {
draw->pipeline.twoside->next = next;
next = draw->pipeline.twoside;
need_det = TRUE;
@@ -247,7 +255,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
* to less work emitting vertices, smaller vertex buffers, etc.
* It's difficult to say whether this will be true in general.
*/
- if (need_det || draw->rasterizer->cull_mode) {
+ if (need_det || rast->cull_mode) {
draw->pipeline.cull->next = next;
next = draw->pipeline.cull;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 27099579618..abbf6247ab8 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -137,7 +137,7 @@ emit_vertex( struct vbuf_stage *vbuf,
*/
/* Note: we really do want data[0] here, not data[pos]:
*/
- vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
+ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0, ~0);
vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr);
if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
@@ -235,41 +235,18 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
for (i = 0; i < vbuf->vinfo->num_attribs; i++) {
unsigned emit_sz = 0;
unsigned src_buffer = 0;
- unsigned output_format;
+ enum pipe_format output_format;
unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) );
- switch (vbuf->vinfo->attrib[i].emit) {
- case EMIT_4F:
- output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- emit_sz = 4 * sizeof(float);
- break;
- case EMIT_3F:
- output_format = PIPE_FORMAT_R32G32B32_FLOAT;
- emit_sz = 3 * sizeof(float);
- break;
- case EMIT_2F:
- output_format = PIPE_FORMAT_R32G32_FLOAT;
- emit_sz = 2 * sizeof(float);
- break;
- case EMIT_1F:
- output_format = PIPE_FORMAT_R32_FLOAT;
- emit_sz = 1 * sizeof(float);
- break;
- case EMIT_1F_PSIZE:
- output_format = PIPE_FORMAT_R32_FLOAT;
- emit_sz = 1 * sizeof(float);
+ output_format = draw_translate_vinfo_format(vbuf->vinfo->attrib[i].emit);
+ emit_sz = draw_translate_vinfo_size(vbuf->vinfo->attrib[i].emit);
+
+ /* doesn't handle EMIT_OMIT */
+ assert(emit_sz != 0);
+
+ if (vbuf->vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
src_buffer = 1;
src_offset = 0;
- break;
- case EMIT_4UB:
- output_format = PIPE_FORMAT_A8R8G8B8_UNORM;
- emit_sz = 4 * sizeof(ubyte);
- break;
- default:
- assert(0);
- output_format = PIPE_FORMAT_NONE;
- emit_sz = 0;
- break;
}
hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
@@ -294,7 +271,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
translate_key_sanitize(&hw_key);
vbuf->translate = translate_cache_find(vbuf->cache, &hw_key);
- vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0);
+ vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0, ~0);
}
vbuf->point_size = vbuf->stage.draw->rasterizer->point_size;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index 3073c870825..ab167065815 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -28,6 +28,7 @@
/* Authors: Keith Whitwell <[email protected]>
*/
+#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
@@ -53,7 +54,6 @@ static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage )
/**
* Draw a wide line by drawing a quad (two triangles).
- * XXX need to disable polygon stipple.
*/
static void wideline_line( struct draw_stage *stage,
struct prim_header *header )
@@ -142,9 +142,40 @@ static void wideline_line( struct draw_stage *stage,
}
+static void wideline_first_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct draw_context *draw = stage->draw;
+ struct pipe_context *pipe = draw->pipe;
+ const struct pipe_rasterizer_state *rast = draw->rasterizer;
+ void *r;
+
+ /* Disable triangle culling, stippling, unfilled mode etc. */
+ r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
+ draw->suspend_flushing = TRUE;
+ pipe->bind_rasterizer_state(pipe, r);
+ draw->suspend_flushing = FALSE;
+
+ stage->line = wideline_line;
+
+ wideline_line(stage, header);
+}
+
+
static void wideline_flush( struct draw_stage *stage, unsigned flags )
{
+ struct draw_context *draw = stage->draw;
+ struct pipe_context *pipe = draw->pipe;
+
+ stage->line = wideline_first_line;
stage->next->flush( stage->next, flags );
+
+ /* restore original rasterizer state */
+ if (draw->rast_handle) {
+ draw->suspend_flushing = TRUE;
+ pipe->bind_rasterizer_state(pipe, draw->rast_handle);
+ draw->suspend_flushing = FALSE;
+ }
}
@@ -171,7 +202,7 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw )
wide->stage.name = "wide-line";
wide->stage.next = NULL;
wide->stage.point = draw_pipe_passthrough_point;
- wide->stage.line = wideline_line;
+ wide->stage.line = wideline_first_line;
wide->stage.tri = draw_pipe_passthrough_tri;
wide->stage.flush = wideline_flush;
wide->stage.reset_stipple_counter = wideline_reset_stipple_counter;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index fdabce7d443..a86fe19586c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -52,6 +52,7 @@
*/
+#include "pipe/p_context.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "pipe/p_defines.h"
@@ -113,7 +114,10 @@ static void set_texcoords(const struct widepoint_stage *wide,
/* put gl_PointCoord into the extra vertex slot */
uint slot = wide->stage.draw->extra_shader_outputs.slot;
v->data[slot][0] = tc[0];
- v->data[slot][1] = tc[1];
+ if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+ v->data[slot][1] = 1.0f - tc[1];
+ else
+ v->data[slot][1] = tc[1];
v->data[slot][2] = 0.0F;
v->data[slot][3] = 1.0F;
}
@@ -128,10 +132,9 @@ static void set_texcoords(const struct widepoint_stage *wide,
static void widepoint_point( struct draw_stage *stage,
struct prim_header *header )
{
- /* XXX should take point_quad_rasterization into account? */
const struct widepoint_stage *wide = widepoint_stage(stage);
const unsigned pos = draw_current_shader_position_output(stage->draw);
- const boolean sprite = (boolean) stage->draw->rasterizer->sprite_coord_enable;
+ const boolean sprite = (boolean) stage->draw->rasterizer->point_quad_rasterization;
float half_size;
float left_adj, right_adj, bot_adj, top_adj;
@@ -213,33 +216,42 @@ static void widepoint_first_point( struct draw_stage *stage,
{
struct widepoint_stage *wide = widepoint_stage(stage);
struct draw_context *draw = stage->draw;
+ struct pipe_context *pipe = draw->pipe;
+ const struct pipe_rasterizer_state *rast = draw->rasterizer;
+ void *r;
- wide->half_point_size = 0.5f * draw->rasterizer->point_size;
+ wide->half_point_size = 0.5f * rast->point_size;
wide->xbias = 0.0;
wide->ybias = 0.0;
- if (draw->rasterizer->gl_rasterization_rules) {
+ if (rast->gl_rasterization_rules) {
wide->xbias = 0.125;
}
+ /* Disable triangle culling, stippling, unfilled mode etc. */
+ r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
+ draw->suspend_flushing = TRUE;
+ pipe->bind_rasterizer_state(pipe, r);
+ draw->suspend_flushing = FALSE;
+
/* XXX we won't know the real size if it's computed by the vertex shader! */
- if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) ||
- (draw->rasterizer->sprite_coord_enable && draw->pipeline.point_sprite)) {
+ if ((rast->point_size > draw->pipeline.wide_point_threshold) ||
+ (rast->point_quad_rasterization && draw->pipeline.point_sprite)) {
stage->point = widepoint_point;
}
else {
stage->point = draw_pipe_passthrough_point;
}
- if (draw->rasterizer->sprite_coord_enable) {
+ if (rast->point_quad_rasterization) {
/* find vertex shader texcoord outputs */
const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i, j = 0;
- wide->texcoord_mode = draw->rasterizer->sprite_coord_mode;
+ wide->texcoord_mode = rast->sprite_coord_mode;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
wide->texcoord_slot[j] = i;
- wide->texcoord_enable[j] = (draw->rasterizer->sprite_coord_enable >> j) & 1;
+ wide->texcoord_enable[j] = (rast->sprite_coord_enable >> j) & 1;
j++;
}
}
@@ -259,7 +271,7 @@ static void widepoint_first_point( struct draw_stage *stage,
}
wide->psize_slot = -1;
- if (draw->rasterizer->point_size_per_vertex) {
+ if (rast->point_size_per_vertex) {
/* find PSIZ vertex output */
const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
@@ -277,9 +289,19 @@ static void widepoint_first_point( struct draw_stage *stage,
static void widepoint_flush( struct draw_stage *stage, unsigned flags )
{
+ struct draw_context *draw = stage->draw;
+ struct pipe_context *pipe = draw->pipe;
+
stage->point = widepoint_first_point;
stage->next->flush( stage->next, flags );
stage->draw->extra_shader_outputs.slot = 0;
+
+ /* restore original rasterizer state */
+ if (draw->rast_handle) {
+ draw->suspend_flushing = TRUE;
+ pipe->bind_rasterizer_state(pipe, draw->rast_handle);
+ draw->suspend_flushing = FALSE;
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 1e6e01af9e2..a2bfb693c09 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -46,6 +46,10 @@
#include "tgsi/tgsi_scan.h"
+#ifdef HAVE_LLVM
+#include <llvm-c/ExecutionEngine.h>
+#endif
+
struct pipe_context;
struct draw_vertex_shader;
@@ -82,6 +86,8 @@ struct vertex_header {
*/
struct draw_context
{
+ struct pipe_context *pipe;
+
/** Drawing/primitive pipeline stages */
struct {
struct draw_stage *first; /**< one of the following */
@@ -105,6 +111,7 @@ struct draw_context
float wide_point_threshold; /**< convert pnts to tris if larger than this */
float wide_line_threshold; /**< convert lines to tris if wider than this */
+ boolean wide_point_sprites; /**< convert points to tris for sprite mode */
boolean line_stipple; /**< do line stipple? */
boolean point_sprite; /**< convert points to quads for sprites? */
@@ -125,6 +132,7 @@ struct draw_context
struct draw_pt_middle_end *fetch_emit;
struct draw_pt_middle_end *fetch_shade_emit;
struct draw_pt_middle_end *general;
+ struct draw_pt_middle_end *llvm;
} middle;
struct {
@@ -144,6 +152,7 @@ struct draw_context
const void *elts;
/** bytes per index (0, 1, 2 or 4) */
unsigned eltSize;
+ int eltBias;
unsigned min_index;
unsigned max_index;
@@ -174,8 +183,14 @@ struct draw_context
double mrd; /**< minimum resolvable depth value, for polygon offset */
- /* pipe state that we need: */
+ /** Current rasterizer state given to us by the driver */
const struct pipe_rasterizer_state *rasterizer;
+ /** Driver CSO handle for the current rasterizer state */
+ void *rast_handle;
+
+ /** Rasterizer CSOs without culling/stipple/etc */
+ void *rasterizer_no_cull[2][2];
+
struct pipe_viewport_state viewport;
boolean identity_viewport;
@@ -237,9 +252,17 @@ struct draw_context
unsigned instance_id;
+#ifdef HAVE_LLVM
+ LLVMExecutionEngineRef engine;
+#endif
+
void *driver_private;
};
+/*******************************************************************************
+ * Draw common initialization code
+ */
+boolean draw_init(struct draw_context *draw);
/*******************************************************************************
* Vertex shader code:
@@ -345,5 +368,10 @@ void draw_do_flush( struct draw_context *draw, unsigned flags );
+void *
+draw_get_rasterizer_no_cull( struct draw_context *draw,
+ boolean scissor,
+ boolean flatshade );
+
#endif /* DRAW_PRIVATE_H */
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 6d90a6c42fd..b853f3a89f8 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -37,6 +37,13 @@
#include "util/u_math.h"
#include "util/u_prim.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
+#ifdef HAVE_LLVM
+DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
+#endif
+
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
if (count < first)
@@ -90,12 +97,16 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_SHADE;
}
- if (opt == 0)
- middle = draw->pt.middle.fetch_emit;
- else if (opt == PT_SHADE && !draw->pt.no_fse)
- middle = draw->pt.middle.fetch_shade_emit;
- else
- middle = draw->pt.middle.general;
+ if (draw->pt.middle.llvm) {
+ middle = draw->pt.middle.llvm;
+ } else {
+ if (opt == 0)
+ middle = draw->pt.middle.fetch_emit;
+ else if (opt == PT_SHADE && !draw->pt.no_fse)
+ middle = draw->pt.middle.fetch_shade_emit;
+ else
+ middle = draw->pt.middle.general;
+ }
/* Pick the right frontend
@@ -111,6 +122,7 @@ draw_pt_arrays(struct draw_context *draw,
frontend->run(frontend,
draw_pt_elt_func(draw),
draw_pt_elt_ptr(draw, start),
+ draw->pt.user.eltBias,
count);
frontend->finish( frontend );
@@ -121,8 +133,8 @@ draw_pt_arrays(struct draw_context *draw,
boolean draw_pt_init( struct draw_context *draw )
{
- draw->pt.test_fse = debug_get_bool_option("DRAW_FSE", FALSE);
- draw->pt.no_fse = debug_get_bool_option("DRAW_NO_FSE", FALSE);
+ draw->pt.test_fse = debug_get_option_draw_fse();
+ draw->pt.no_fse = debug_get_option_draw_no_fse();
draw->pt.front.vcache = draw_pt_vcache( draw );
if (!draw->pt.front.vcache)
@@ -144,12 +156,22 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.general)
return FALSE;
+#if HAVE_LLVM
+ if (debug_get_option_draw_use_llvm())
+ draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw );
+#endif
+
return TRUE;
}
void draw_pt_destroy( struct draw_context *draw )
{
+ if (draw->pt.middle.llvm) {
+ draw->pt.middle.llvm->destroy( draw->pt.middle.llvm );
+ draw->pt.middle.llvm = NULL;
+ }
+
if (draw->pt.middle.general) {
draw->pt.middle.general->destroy( draw->pt.middle.general );
draw->pt.middle.general = NULL;
@@ -215,8 +237,11 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
break;
default:
assert(0);
+ return;
}
- debug_printf("Element[%u + %u] -> Vertex %u:\n", start, i, ii);
+ ii += draw->pt.user.eltBias;
+ debug_printf("Element[%u + %u] + %i -> Vertex %u:\n", start, i,
+ draw->pt.user.eltBias, ii);
}
else {
/* non-indexed arrays */
@@ -307,9 +332,8 @@ draw_arrays_instanced(struct draw_context *draw,
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
debug_printf("Elements:\n");
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
- debug_printf(" format=%s comps=%u\n",
- util_format_name(draw->pt.vertex_element[i].src_format),
- draw->pt.vertex_element[i].nr_components);
+ debug_printf(" format=%s\n",
+ util_format_name(draw->pt.vertex_element[i].src_format));
}
debug_printf("Buffers:\n");
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index d5e0d92a605..3e3ea320cc0 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -67,6 +67,7 @@ struct draw_pt_front_end {
void (*run)( struct draw_pt_front_end *,
pt_elt_func elt_func,
const void *elt_ptr,
+ int elt_bias,
unsigned count );
void (*finish)( struct draw_pt_front_end * );
@@ -147,6 +148,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw);
diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h
index 4ca5b520204..3c44f7c11ee 100644
--- a/src/gallium/auxiliary/draw/draw_pt_decompose.h
+++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h
@@ -105,40 +105,20 @@ static void FUNC( ARGS,
case PIPE_PRIM_QUADS:
- if (flatfirst) {
- for (i = 0; i+3 < count; i += 4) {
- QUAD( (i + 1),
- (i + 2),
- (i + 3),
- (i + 0) );
- }
- }
- else {
- for (i = 0; i+3 < count; i += 4) {
- QUAD( (i + 0),
- (i + 1),
- (i + 2),
- (i + 3));
- }
+ for (i = 0; i+3 < count; i += 4) {
+ QUAD( (i + 0),
+ (i + 1),
+ (i + 2),
+ (i + 3));
}
break;
case PIPE_PRIM_QUAD_STRIP:
- if (flatfirst) {
- for (i = 0; i+3 < count; i += 2) {
- QUAD( (i + 1),
- (i + 3),
- (i + 2),
- (i + 0) );
- }
- }
- else {
- for (i = 0; i+3 < count; i += 2) {
- QUAD( (i + 2),
- (i + 0),
- (i + 1),
- (i + 3));
- }
+ for (i = 0; i+3 < count; i += 2) {
+ QUAD( (i + 2),
+ (i + 0),
+ (i + 1),
+ (i + 3));
}
break;
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index ae357b51226..ad48fa39a4f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -86,40 +86,15 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
unsigned output_format;
unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
+ output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+ emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
-
- switch (vinfo->attrib[i].emit) {
- case EMIT_4F:
- output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- emit_sz = 4 * sizeof(float);
- break;
- case EMIT_3F:
- output_format = PIPE_FORMAT_R32G32B32_FLOAT;
- emit_sz = 3 * sizeof(float);
- break;
- case EMIT_2F:
- output_format = PIPE_FORMAT_R32G32_FLOAT;
- emit_sz = 2 * sizeof(float);
- break;
- case EMIT_1F:
- output_format = PIPE_FORMAT_R32_FLOAT;
- emit_sz = 1 * sizeof(float);
- break;
- case EMIT_1F_PSIZE:
- output_format = PIPE_FORMAT_R32_FLOAT;
- emit_sz = 1 * sizeof(float);
+ /* doesn't handle EMIT_OMIT */
+ assert(emit_sz != 0);
+
+ if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
src_buffer = 1;
src_offset = 0;
- break;
- case EMIT_4UB:
- output_format = PIPE_FORMAT_A8R8G8B8_UNORM;
- emit_sz = 4 * sizeof(ubyte);
- break;
- default:
- assert(0);
- output_format = PIPE_FORMAT_NONE;
- emit_sz = 0;
- break;
}
hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
@@ -196,12 +171,14 @@ void draw_pt_emit( struct pt_emit *emit,
translate->set_buffer(translate,
0,
vertex_data,
- stride );
+ stride,
+ ~0);
translate->set_buffer(translate,
1,
&draw->rasterizer->point_size,
- 0);
+ 0,
+ ~0);
translate->run( translate,
0,
@@ -257,11 +234,11 @@ void draw_pt_emit_linear(struct pt_emit *emit,
goto fail;
translate->set_buffer(translate, 0,
- vertex_data, stride);
+ vertex_data, stride, count - 1);
translate->set_buffer(translate, 1,
&draw->rasterizer->point_size,
- 0);
+ 0, ~0);
translate->run(translate,
0,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 252be5053e4..a1347221b5d 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -149,7 +149,8 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
fetch->translate->set_buffer(fetch->translate,
draw->pt.nr_vertex_buffers,
&vh,
- 0);
+ 0,
+ ~0);
}
}
@@ -172,7 +173,8 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].stride );
+ draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index);
}
translate->run_elts( translate,
@@ -198,7 +200,8 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].stride );
+ draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index);
}
translate->run( translate,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 2a604470e9a..d7735bf1ac9 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -129,41 +129,16 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
unsigned input_offset = src->src_offset;
unsigned output_format;
- switch (vinfo->attrib[i].emit) {
- case EMIT_4UB:
- output_format = PIPE_FORMAT_R8G8B8A8_UNORM;
- emit_sz = 4 * sizeof(unsigned char);
- break;
- case EMIT_4F:
- output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- emit_sz = 4 * sizeof(float);
- break;
- case EMIT_3F:
- output_format = PIPE_FORMAT_R32G32B32_FLOAT;
- emit_sz = 3 * sizeof(float);
- break;
- case EMIT_2F:
- output_format = PIPE_FORMAT_R32G32_FLOAT;
- emit_sz = 2 * sizeof(float);
- break;
- case EMIT_1F:
- output_format = PIPE_FORMAT_R32_FLOAT;
- emit_sz = 1 * sizeof(float);
- break;
- case EMIT_1F_PSIZE:
+ output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+ emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
+
+ if (vinfo->attrib[i].emit == EMIT_OMIT)
+ continue;
+
+ if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
input_format = PIPE_FORMAT_R32_FLOAT;
input_buffer = draw->pt.nr_vertex_buffers;
input_offset = 0;
- output_format = PIPE_FORMAT_R32_FLOAT;
- emit_sz = 1 * sizeof(float);
- break;
- case EMIT_OMIT:
- continue;
- default:
- assert(0);
- output_format = PIPE_FORMAT_NONE;
- emit_sz = 0;
- continue;
}
key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
@@ -193,7 +168,8 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
feme->translate->set_buffer(feme->translate,
draw->pt.nr_vertex_buffers,
&feme->point_size,
- 0);
+ 0,
+ ~0);
}
feme->point_size = draw->rasterizer->point_size;
@@ -203,7 +179,8 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].stride );
+ draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index);
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 1aecb510777..cbb5b6c9605 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -130,31 +130,10 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
unsigned dst_offset = 0;
for (i = 0; i < vinfo->num_attribs; i++) {
- unsigned emit_sz = 0;
-
- switch (vinfo->attrib[i].emit) {
- case EMIT_4F:
- emit_sz = 4 * sizeof(float);
- break;
- case EMIT_3F:
- emit_sz = 3 * sizeof(float);
- break;
- case EMIT_2F:
- emit_sz = 2 * sizeof(float);
- break;
- case EMIT_1F:
- emit_sz = 1 * sizeof(float);
- break;
- case EMIT_1F_PSIZE:
- emit_sz = 1 * sizeof(float);
- break;
- case EMIT_4UB:
- emit_sz = 4 * sizeof(ubyte);
- break;
- default:
- assert(0);
- break;
- }
+ unsigned emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
+
+ /* doesn't handle EMIT_OMIT */
+ assert(emit_sz != 0);
/* The elements in the key correspond to vertex shader output
* numbers, not to positions in the hw vertex description --
@@ -188,7 +167,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
i,
((const ubyte *) draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].stride );
+ draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
new file mode 100644
index 00000000000..d2a492f2b4c
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -0,0 +1,416 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMWare, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+#include "draw/draw_gs.h"
+#include "draw/draw_llvm.h"
+
+
+struct llvm_middle_end {
+ struct draw_pt_middle_end base;
+ struct draw_context *draw;
+
+ struct pt_emit *emit;
+ struct pt_fetch *fetch;
+ struct pt_post_vs *post_vs;
+
+
+ unsigned vertex_data_offset;
+ unsigned vertex_size;
+ unsigned prim;
+ unsigned opt;
+
+ struct draw_llvm *llvm;
+ struct draw_llvm_variant *variants;
+ struct draw_llvm_variant *current_variant;
+ int nr_variants;
+};
+
+
+static void
+llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
+ unsigned prim,
+ unsigned opt,
+ unsigned *max_vertices )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ struct draw_geometry_shader *gs = draw->gs.geometry_shader;
+ struct draw_llvm_variant_key key;
+ struct draw_llvm_variant *variant = NULL;
+ unsigned i;
+ unsigned instance_id_index = ~0;
+
+ /* Add one to num_outputs because the pipeline occasionally tags on
+ * an additional texcoord, eg for AA lines.
+ */
+ unsigned nr = MAX2( vs->info.num_inputs,
+ vs->info.num_outputs + 1 );
+
+ /* Scan for instanceID system value.
+ */
+ for (i = 0; i < vs->info.num_inputs; i++) {
+ if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
+ instance_id_index = i;
+ break;
+ }
+ }
+
+ fpme->prim = prim;
+ fpme->opt = opt;
+
+ /* Always leave room for the vertex header whether we need it or
+ * not. It's hard to get rid of it in particular because of the
+ * viewport code in draw_pt_post_vs.c.
+ */
+ fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
+
+
+
+ draw_pt_fetch_prepare( fpme->fetch,
+ vs->info.num_inputs,
+ fpme->vertex_size,
+ instance_id_index );
+ if (opt & PT_SHADE) {
+ vs->prepare(vs, draw);
+ draw_geometry_shader_prepare(gs, draw);
+ }
+
+
+ /* XXX: it's not really gl rasterization rules we care about here,
+ * but gl vs dx9 clip spaces.
+ */
+ draw_pt_post_vs_prepare( fpme->post_vs,
+ (boolean)draw->bypass_clipping,
+ (boolean)(draw->identity_viewport),
+ (boolean)draw->rasterizer->gl_rasterization_rules,
+ (draw->vs.edgeflag_output ? true : false) );
+
+ if (!(opt & PT_PIPELINE)) {
+ draw_pt_emit_prepare( fpme->emit,
+ prim,
+ max_vertices );
+
+ *max_vertices = MAX2( *max_vertices,
+ DRAW_PIPE_MAX_VERTICES );
+ }
+ else {
+ *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ }
+
+ /* return even number */
+ *max_vertices = *max_vertices & ~1;
+
+ draw_llvm_make_variant_key(fpme->llvm, &key);
+
+ variant = fpme->variants;
+ while(variant) {
+ if(memcmp(&variant->key, &key, sizeof key) == 0)
+ break;
+
+ variant = variant->next;
+ }
+
+ if (!variant) {
+ variant = draw_llvm_prepare(fpme->llvm, nr);
+ variant->next = fpme->variants;
+ fpme->variants = variant;
+ ++fpme->nr_variants;
+ }
+ fpme->current_variant = variant;
+
+ /*XXX we only support one constant buffer */
+ fpme->llvm->jit_context.vs_constants =
+ draw->pt.user.vs_constants[0];
+ fpme->llvm->jit_context.gs_constants =
+ draw->pt.user.gs_constants[0];
+}
+
+
+
+static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align( fetch_count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
+ pipeline_verts,
+ (const char **)draw->pt.user.vbuffer,
+ fetch_elts,
+ fetch_count,
+ fpme->vertex_size,
+ draw->pt.vertex_buffer );
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+
+
+ FREE(pipeline_verts);
+}
+
+
+static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count)
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align( count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+#if 0
+ debug_printf("#### Pipeline = %p (data = %p)\n",
+ pipeline_verts, pipeline_verts->data);
+#endif
+ fpme->current_variant->jit_func( &fpme->llvm->jit_context,
+ pipeline_verts,
+ (const char **)draw->pt.user.vbuffer,
+ start,
+ count,
+ fpme->vertex_size,
+ draw->pt.vertex_buffer );
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run_linear( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size);
+ }
+ else {
+ draw_pt_emit_linear( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ fpme->vertex_size,
+ count );
+ }
+
+ FREE(pipeline_verts);
+}
+
+
+
+static boolean
+llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align( count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts)
+ return FALSE;
+
+ fpme->current_variant->jit_func( &fpme->llvm->jit_context,
+ pipeline_verts,
+ (const char **)draw->pt.user.vbuffer,
+ start,
+ count,
+ fpme->vertex_size,
+ draw->pt.vertex_buffer );
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+
+ FREE(pipeline_verts);
+ return TRUE;
+}
+
+
+
+static void llvm_middle_end_finish( struct draw_pt_middle_end *middle )
+{
+ /* nothing to do */
+}
+
+static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+
+ if (fpme->fetch)
+ draw_pt_fetch_destroy( fpme->fetch );
+
+ if (fpme->emit)
+ draw_pt_emit_destroy( fpme->emit );
+
+ if (fpme->post_vs)
+ draw_pt_post_vs_destroy( fpme->post_vs );
+
+ if (fpme->llvm)
+ draw_llvm_destroy( fpme->llvm );
+
+ FREE(middle);
+}
+
+
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw )
+{
+ struct llvm_middle_end *fpme = 0;
+
+ if (!draw->engine)
+ return NULL;
+
+ fpme = CALLOC_STRUCT( llvm_middle_end );
+ if (!fpme)
+ goto fail;
+
+ fpme->base.prepare = llvm_middle_end_prepare;
+ fpme->base.run = llvm_middle_end_run;
+ fpme->base.run_linear = llvm_middle_end_linear_run;
+ fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts;
+ fpme->base.finish = llvm_middle_end_finish;
+ fpme->base.destroy = llvm_middle_end_destroy;
+
+ fpme->draw = draw;
+
+ fpme->fetch = draw_pt_fetch_create( draw );
+ if (!fpme->fetch)
+ goto fail;
+
+ fpme->post_vs = draw_pt_post_vs_create( draw );
+ if (!fpme->post_vs)
+ goto fail;
+
+ fpme->emit = draw_pt_emit_create( draw );
+ if (!fpme->emit)
+ goto fail;
+
+ fpme->llvm = draw_llvm_create(draw);
+ if (!fpme->llvm)
+ goto fail;
+
+ fpme->variants = NULL;
+ fpme->current_variant = NULL;
+ fpme->nr_variants = 0;
+
+ return &fpme->base;
+
+ fail:
+ if (fpme)
+ llvm_middle_end_destroy( &fpme->base );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 9728d5c2bdf..5525dfc748d 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -108,6 +108,11 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
for (j = 0; j < count; j++) {
float *position = out->data[pos];
+#if 0
+ debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n",
+ j, out, position, position[0], position[1], position[2], position[3]);
+#endif
+
out->clip[0] = position[0];
out->clip[1] = position[1];
out->clip[2] = position[2];
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index f0aec5febab..a292346be95 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -6,6 +6,7 @@ static unsigned trim( unsigned count, unsigned first, unsigned incr )
static void FUNC(struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
const void *elts,
+ int elt_bias,
unsigned count)
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
@@ -14,6 +15,8 @@ static void FUNC(struct draw_pt_front_end *frontend,
unsigned j;
unsigned first, incr;
+ assert(elt_bias == 0);
+
draw_pt_split_prim(varray->input_prim, &first, &incr);
/* Sanitize primitive length:
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 757c4874545..37ffbac4f92 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -329,6 +329,7 @@ static INLINE void
vcache_check_run( struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
const void *elts,
+ int elt_bias,
unsigned draw_count )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
@@ -346,7 +347,7 @@ vcache_check_run( struct draw_pt_front_end *frontend,
vcache->fetch_max,
draw_count);
- if (max_index >= DRAW_PIPE_MAX_VERTICES ||
+ if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
fetch_count >= UNDEFINED_VERTEX_ID ||
fetch_count > draw_count) {
if (0) debug_printf("fail\n");
@@ -362,8 +363,11 @@ vcache_check_run( struct draw_pt_front_end *frontend,
}
+ assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
+ (elt_bias < 0 && min_index + elt_bias < min_index));
+
if (min_index == 0 &&
- index_size == 2)
+ index_size == 2)
{
transformed_elts = (const ushort *)elts;
}
@@ -433,7 +437,7 @@ vcache_check_run( struct draw_pt_front_end *frontend,
if (fetch_count < UNDEFINED_VERTEX_ID)
ok = vcache->middle->run_linear_elts( vcache->middle,
- min_index, /* start */
+ min_index + elt_bias, /* start */
fetch_count,
transformed_elts,
draw_count );
@@ -447,7 +451,7 @@ vcache_check_run( struct draw_pt_front_end *frontend,
fetch_count, draw_count);
fail:
- vcache_run( frontend, get_elt, elts, draw_count );
+ vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
index 62822a3d562..f7a63de3ba9 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -3,6 +3,7 @@
static void FUNC( struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
const void *elts,
+ int elt_bias,
unsigned count )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
@@ -20,7 +21,7 @@ static void FUNC( struct draw_pt_front_end *frontend,
case PIPE_PRIM_POINTS:
for (i = 0; i < count; i ++) {
POINT( vcache,
- get_elt(elts, i + 0) );
+ get_elt(elts, i + 0) + elt_bias );
}
break;
@@ -28,8 +29,8 @@ static void FUNC( struct draw_pt_front_end *frontend,
for (i = 0; i+1 < count; i += 2) {
LINE( vcache,
DRAW_PIPE_RESET_STIPPLE,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1));
+ get_elt(elts, i + 0) + elt_bias,
+ get_elt(elts, i + 1) + elt_bias);
}
break;
@@ -40,14 +41,14 @@ static void FUNC( struct draw_pt_front_end *frontend,
for (i = 1; i < count; i++, flags = 0) {
LINE( vcache,
flags,
- get_elt(elts, i - 1),
- get_elt(elts, i ));
+ get_elt(elts, i - 1) + elt_bias,
+ get_elt(elts, i ) + elt_bias);
}
LINE( vcache,
flags,
- get_elt(elts, i - 1),
- get_elt(elts, 0 ));
+ get_elt(elts, i - 1) + elt_bias,
+ get_elt(elts, 0 ) + elt_bias);
}
break;
@@ -56,8 +57,8 @@ static void FUNC( struct draw_pt_front_end *frontend,
for (i = 1; i < count; i++, flags = 0) {
LINE( vcache,
flags,
- get_elt(elts, i - 1),
- get_elt(elts, i ));
+ get_elt(elts, i - 1) + elt_bias,
+ get_elt(elts, i ) + elt_bias);
}
break;
@@ -65,9 +66,9 @@ static void FUNC( struct draw_pt_front_end *frontend,
for (i = 0; i+2 < count; i += 3) {
TRIANGLE( vcache,
DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2 ));
+ get_elt(elts, i + 0) + elt_bias,
+ get_elt(elts, i + 1) + elt_bias,
+ get_elt(elts, i + 2 ) + elt_bias);
}
break;
@@ -76,18 +77,18 @@ static void FUNC( struct draw_pt_front_end *frontend,
for (i = 0; i+2 < count; i++) {
TRIANGLE( vcache,
DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1 + (i&1)),
- get_elt(elts, i + 2 - (i&1)));
+ get_elt(elts, i + 0) + elt_bias,
+ get_elt(elts, i + 1 + (i&1)) + elt_bias,
+ get_elt(elts, i + 2 - (i&1)) + elt_bias);
}
}
else {
for (i = 0; i+2 < count; i++) {
TRIANGLE( vcache,
DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0 + (i&1)),
- get_elt(elts, i + 1 - (i&1)),
- get_elt(elts, i + 2 ));
+ get_elt(elts, i + 0 + (i&1)) + elt_bias,
+ get_elt(elts, i + 1 - (i&1)) + elt_bias,
+ get_elt(elts, i + 2 ) + elt_bias);
}
}
break;
@@ -98,18 +99,18 @@ static void FUNC( struct draw_pt_front_end *frontend,
for (i = 0; i+2 < count; i++) {
TRIANGLE( vcache,
DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, 0 ));
+ get_elt(elts, i + 1) + elt_bias,
+ get_elt(elts, i + 2) + elt_bias,
+ get_elt(elts, 0 ) + elt_bias);
}
}
else {
for (i = 0; i+2 < count; i++) {
TRIANGLE( vcache,
DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2 ));
+ get_elt(elts, 0) + elt_bias,
+ get_elt(elts, i + 1) + elt_bias,
+ get_elt(elts, i + 2 ) + elt_bias);
}
}
}
@@ -118,39 +119,21 @@ static void FUNC( struct draw_pt_front_end *frontend,
case PIPE_PRIM_QUADS:
for (i = 0; i+3 < count; i += 4) {
- if (flatfirst) {
- QUAD( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, i + 3) );
- }
- else {
- QUAD( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, i + 3) );
- }
+ QUAD( vcache,
+ get_elt(elts, i + 0) + elt_bias,
+ get_elt(elts, i + 1) + elt_bias,
+ get_elt(elts, i + 2) + elt_bias,
+ get_elt(elts, i + 3) + elt_bias );
}
break;
case PIPE_PRIM_QUAD_STRIP:
for (i = 0; i+3 < count; i += 2) {
- if (flatfirst) {
- QUAD( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 3),
- get_elt(elts, i + 2) );
- }
- else {
- QUAD( vcache,
- get_elt(elts, i + 2),
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 3) );
- }
+ QUAD( vcache,
+ get_elt(elts, i + 2) + elt_bias,
+ get_elt(elts, i + 0) + elt_bias,
+ get_elt(elts, i + 1) + elt_bias,
+ get_elt(elts, i + 3) + elt_bias );
}
break;
@@ -183,16 +166,16 @@ static void FUNC( struct draw_pt_front_end *frontend,
if (flatfirst) {
TRIANGLE( vcache,
flags,
- get_elt(elts, 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2) );
+ get_elt(elts, 0) + elt_bias,
+ get_elt(elts, i + 1) + elt_bias,
+ get_elt(elts, i + 2) + elt_bias );
}
else {
TRIANGLE( vcache,
flags,
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, 0));
+ get_elt(elts, i + 1) + elt_bias,
+ get_elt(elts, i + 2) + elt_bias,
+ get_elt(elts, 0) + elt_bias);
}
}
}
diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c
index 3214213e445..a4f5e882c0a 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.c
+++ b/src/gallium/auxiliary/draw/draw_vertex.c
@@ -48,30 +48,12 @@ draw_compute_vertex_size(struct vertex_info *vinfo)
uint i;
vinfo->size = 0;
- for (i = 0; i < vinfo->num_attribs; i++) {
- switch (vinfo->attrib[i].emit) {
- case EMIT_OMIT:
- break;
- case EMIT_4UB:
- /* fall-through */
- case EMIT_1F_PSIZE:
- /* fall-through */
- case EMIT_1F:
- vinfo->size += 1;
- break;
- case EMIT_2F:
- vinfo->size += 2;
- break;
- case EMIT_3F:
- vinfo->size += 3;
- break;
- case EMIT_4F:
- vinfo->size += 4;
- break;
- default:
- assert(0);
- }
- }
+ for (i = 0; i < vinfo->num_attribs; i++)
+ vinfo->size += draw_translate_vinfo_size(vinfo->attrib[i].emit);
+
+ assert(vinfo->size % 4 == 0);
+ /* in dwords */
+ vinfo->size /= 4;
}
@@ -120,6 +102,13 @@ draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
debug_printf("%u ", *data++);
debug_printf("%u ", *data++);
break;
+ case EMIT_4UB_BGRA:
+ debug_printf("EMIT_4UB_BGRA:\t");
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ break;
default:
assert(0);
}
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index 8c3c7befbc7..3af31ffe126 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -54,7 +54,8 @@ enum attrib_emit {
EMIT_2F,
EMIT_3F,
EMIT_4F,
- EMIT_4UB /**< XXX may need variations for RGBA vs BGRA, etc */
+ EMIT_4UB, /**< is RGBA like the rest */
+ EMIT_4UB_BGRA
};
@@ -141,9 +142,11 @@ void draw_dump_emitted_vertex(const struct vertex_info *vinfo,
const uint8_t *data);
-static INLINE unsigned draw_translate_vinfo_format(unsigned format )
+static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit)
{
- switch (format) {
+ switch (emit) {
+ case EMIT_OMIT:
+ return PIPE_FORMAT_NONE;
case EMIT_1F:
case EMIT_1F_PSIZE:
return PIPE_FORMAT_R32_FLOAT;
@@ -155,10 +158,36 @@ static INLINE unsigned draw_translate_vinfo_format(unsigned format )
return PIPE_FORMAT_R32G32B32A32_FLOAT;
case EMIT_4UB:
return PIPE_FORMAT_R8G8B8A8_UNORM;
+ case EMIT_4UB_BGRA:
+ return PIPE_FORMAT_B8G8R8A8_UNORM;
default:
+ assert(!"unexpected format");
return PIPE_FORMAT_NONE;
}
}
+static INLINE enum attrib_emit draw_translate_vinfo_size(enum attrib_emit emit)
+{
+ switch (emit) {
+ case EMIT_OMIT:
+ return 0;
+ case EMIT_1F:
+ case EMIT_1F_PSIZE:
+ return 1 * sizeof(float);
+ case EMIT_2F:
+ return 2 * sizeof(float);
+ case EMIT_3F:
+ return 3 * sizeof(float);
+ case EMIT_4F:
+ return 4 * sizeof(float);
+ case EMIT_4UB:
+ return 4 * sizeof(unsigned char);
+ case EMIT_4UB_BGRA:
+ return 4 * sizeof(unsigned char);
+ default:
+ assert(!"unexpected format");
+ return 0;
+ }
+}
#endif /* DRAW_VERTEX_H */
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 90858380221..c2832eefa2a 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -46,7 +46,7 @@
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_exec.h"
-
+DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE)
void
draw_vs_set_constants(struct draw_context *draw,
@@ -91,14 +91,11 @@ draw_create_vertex_shader(struct draw_context *draw,
tgsi_dump(shader->tokens, 0);
}
- vs = draw_create_vs_llvm( draw, shader );
+ vs = draw_create_vs_sse( draw, shader );
if (!vs) {
- vs = draw_create_vs_sse( draw, shader );
+ vs = draw_create_vs_ppc( draw, shader );
if (!vs) {
- vs = draw_create_vs_ppc( draw, shader );
- if (!vs) {
- vs = draw_create_vs_exec( draw, shader );
- }
+ vs = draw_create_vs_exec( draw, shader );
}
}
@@ -160,7 +157,7 @@ draw_delete_vertex_shader(struct draw_context *draw,
boolean
draw_vs_init( struct draw_context *draw )
{
- draw->dump_vs = debug_get_bool_option("GALLIUM_DUMP_VS", FALSE);
+ draw->dump_vs = debug_get_option_gallium_dump_vs();
draw->vs.machine = tgsi_exec_machine_create();
if (!draw->vs.machine)
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index d095c9bad1d..6c7e94db433 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -80,7 +80,8 @@ struct draw_vs_varient {
void (*set_buffer)( struct draw_vs_varient *,
unsigned i,
const void *ptr,
- unsigned stride );
+ unsigned stride,
+ unsigned max_stride );
void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
unsigned start,
@@ -163,17 +164,14 @@ struct draw_vertex_shader *
draw_create_vs_ppc(struct draw_context *draw,
const struct pipe_shader_state *templ);
-struct draw_vertex_shader *
-draw_create_vs_llvm(struct draw_context *draw,
- const struct pipe_shader_state *templ);
-
struct draw_vs_varient_key;
struct draw_vertex_shader;
-struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key );
+struct draw_vs_varient *
+draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
@@ -191,8 +189,9 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw,
struct translate *draw_vs_get_emit( struct draw_context *draw,
struct translate_key *key );
-struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key );
+struct draw_vs_varient *
+draw_vs_create_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index e7121f36541..19f49e34c8b 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -2089,13 +2089,21 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
}
+/** cast wrapper */
+static INLINE struct draw_vs_varient_aos_sse *
+draw_vs_varient_aos_sse(struct draw_vs_varient *varient)
+{
+ return (struct draw_vs_varient_aos_sse *) varient;
+}
+
static void vaos_set_buffer( struct draw_vs_varient *varient,
unsigned buf,
const void *ptr,
- unsigned stride )
+ unsigned stride,
+ unsigned max_stride)
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
if (buf < vaos->nr_vb) {
vaos->buffer[buf].base_ptr = (char *)ptr;
@@ -2112,7 +2120,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
unsigned count,
void *output_buffer )
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
struct aos_machine *machine = vaos->draw->vs.aos_machine;
unsigned i;
@@ -2136,7 +2144,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
unsigned count,
void *output_buffer )
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
struct aos_machine *machine = vaos->draw->vs.aos_machine;
unsigned i;
@@ -2165,7 +2173,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
static void vaos_destroy( struct draw_vs_varient *varient )
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
FREE( vaos->buffer );
@@ -2241,13 +2249,14 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
}
-struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+struct draw_vs_varient *
+draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
{
struct draw_vs_varient *varient = varient_aos_sse( vs, key );
if (varient == NULL) {
- varient = draw_vs_varient_generic( vs, key );
+ varient = draw_vs_create_varient_generic( vs, key );
}
return varient;
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index ece1ddde0cb..8f8bbe7cb88 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -401,13 +401,11 @@ static boolean emit_output( struct aos_compilation *cp,
emit_store_R32G32B32A32(cp, ptr, dataXMM);
break;
case EMIT_4UB:
- if (1) {
- emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
- emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
- }
- else {
- emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
- }
+ emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+ break;
+ case EMIT_4UB_BGRA:
+ emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
break;
default:
AOS_ERROR(cp, "unhandled output format");
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 7deca2b69d9..bc34d390dae 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -203,7 +203,7 @@ draw_create_vs_exec(struct draw_context *draw,
vs->base.prepare = vs_exec_prepare;
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->machine = draw->vs.machine;
return &vs->base;
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
deleted file mode 100644
index 5f7a645f5d8..00000000000
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin
- * Keith Whitwell <[email protected]>
- * Brian Paul
- */
-
-#include "util/u_memory.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-#include "draw_context.h"
-#include "draw_vs.h"
-
-#include "tgsi/tgsi_parse.h"
-
-#ifdef MESA_LLVM
-
-struct draw_llvm_vertex_shader {
- struct draw_vertex_shader base;
- struct tgsi_exec_machine *machine;
-};
-
-
-static void
-vs_llvm_prepare( struct draw_vertex_shader *base,
- struct draw_context *draw )
-{
-}
-
-
-static void
-vs_llvm_run_linear( struct draw_vertex_shader *base,
- const float (*input)[4],
- float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
- unsigned count,
- unsigned input_stride,
- unsigned output_stride )
-{
- struct draw_llvm_vertex_shader *shader =
- (struct draw_llvm_vertex_shader *)base;
-}
-
-
-
-static void
-vs_llvm_delete( struct draw_vertex_shader *base )
-{
- struct draw_llvm_vertex_shader *shader =
- (struct draw_llvm_vertex_shader *)base;
-
- /* Do something to free compiled shader:
- */
-
- FREE( (void*) shader->base.state.tokens );
- FREE( shader );
-}
-
-
-
-
-struct draw_vertex_shader *
-draw_create_vs_llvm(struct draw_context *draw,
- const struct pipe_shader_state *templ)
-{
- struct draw_llvm_vertex_shader *vs;
-
- vs = CALLOC_STRUCT( draw_llvm_vertex_shader );
- if (vs == NULL)
- return NULL;
-
- /* we make a private copy of the tokens */
- vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
- if (!vs->base.state.tokens) {
- FREE(vs);
- return NULL;
- }
-
- tgsi_scan_shader(vs->base.state.tokens, &vs->base.info);
-
- vs->base.draw = draw;
- vs->base.prepare = vs_llvm_prepare;
- vs->base.create_varient = draw_vs_varient_generic;
- vs->base.run_linear = vs_llvm_run_linear;
- vs->base.delete = vs_llvm_delete;
- vs->machine = draw->vs.machine;
-
- return &vs->base;
-}
-
-
-
-
-
-#else
-
-struct draw_vertex_shader *
-draw_create_vs_llvm(struct draw_context *draw,
- const struct pipe_shader_state *shader)
-{
- return NULL;
-}
-
-#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 54e6423388f..14c95082a9d 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -165,9 +165,9 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.draw = draw;
if (1)
- vs->base.create_varient = draw_vs_varient_aos_sse;
+ vs->base.create_varient = draw_vs_create_varient_aos_sse;
else
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 5ed706cb4ff..6eb26927f27 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -66,14 +66,16 @@ struct draw_vs_varient_generic {
static void vsvg_set_buffer( struct draw_vs_varient *varient,
unsigned buffer,
const void *ptr,
- unsigned stride )
+ unsigned stride,
+ unsigned max_index )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
vsvg->fetch->set_buffer(vsvg->fetch,
buffer,
ptr,
- stride);
+ stride,
+ max_index );
}
@@ -172,12 +174,14 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->emit->set_buffer( vsvg->emit,
0,
temp_buffer,
- temp_vertex_stride );
+ temp_vertex_stride,
+ ~0 );
vsvg->emit->set_buffer( vsvg->emit,
1,
&vsvg->draw->rasterizer->point_size,
- 0);
+ 0,
+ ~0 );
vsvg->emit->run( vsvg->emit,
0, count,
@@ -232,12 +236,14 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->emit->set_buffer( vsvg->emit,
0,
temp_buffer,
- temp_vertex_stride );
+ temp_vertex_stride,
+ ~0 );
vsvg->emit->set_buffer( vsvg->emit,
1,
&vsvg->draw->rasterizer->point_size,
- 0);
+ 0,
+ ~0 );
vsvg->emit->run( vsvg->emit,
0, count,
@@ -257,8 +263,9 @@ static void vsvg_destroy( struct draw_vs_varient *varient )
}
-struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+struct draw_vs_varient *
+draw_vs_create_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
{
unsigned i;
struct translate_key fetch, emit;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld.h
index 634575670db..2fa682f4009 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,29 +26,22 @@
**************************************************************************/
/**
- * Alpha testing to LLVM IR translation.
- *
- * @author Jose Fonseca <[email protected]>
+ * @file
+ * Wrapper for LLVM header file #includes.
*/
-#ifndef LP_BLD_ALPHA_H
-#define LP_BLD_ALPHA_H
+#ifndef LP_BLD_H
+#define LP_BLD_H
-#include <llvm-c/Core.h>
-struct pipe_alpha_state;
-struct lp_type;
-struct lp_build_mask_context;
+#include <llvm-c/Core.h>
-void
-lp_build_alpha_test(LLVMBuilderRef builder,
- const struct pipe_alpha_state *state,
- struct lp_type type,
- struct lp_build_mask_context *mask,
- LLVMValueRef alpha,
- LLVMValueRef ref);
+/** Ensure HAVE_LLVM is set to avoid #ifdef HAVE_LLVM everywhere */
+#ifndef HAVE_LLVM
+#error "HAVE_LLVM should be set with LLVM's version number, e.g. (0x0207 for 2.7)"
+#endif
-#endif /* !LP_BLD_ALPHA_H */
+#endif /* LP_BLD_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 32f9e5201c5..20ae958714b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -232,6 +232,37 @@ lp_build_add(struct lp_build_context *bld,
}
+/** Return the sum of the elements of a */
+LLVMValueRef
+lp_build_sum_vector(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef index, res;
+ int i;
+
+ if (a == bld->zero)
+ return bld->zero;
+ if (a == bld->undef)
+ return bld->undef;
+ assert(type.length > 1);
+
+ assert(!bld->type.norm);
+
+ index = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ res = LLVMBuildExtractElement(bld->builder, a, index, "");
+
+ for (i = 1; i < type.length; i++) {
+ index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res = LLVMBuildAdd(bld->builder, res,
+ LLVMBuildExtractElement(bld->builder, a, index, ""),
+ "");
+ }
+
+ return res;
+}
+
+
/**
* Generate a - b
*/
@@ -330,12 +361,12 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
LLVMValueRef c8;
LLVMValueRef ab;
- c8 = lp_build_int_const_scalar(i16_type, 8);
+ c8 = lp_build_const_int_vec(i16_type, 8);
#if 0
/* a*b/255 ~= (a*(b + 1)) >> 256 */
- b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), "");
+ b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(i16_type, 1), "");
ab = LLVMBuildMul(builder, a, b, "");
#else
@@ -343,7 +374,7 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
/* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
ab = LLVMBuildMul(builder, a, b, "");
ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), "");
- ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), "");
+ ab = LLVMBuildAdd(builder, ab, lp_build_const_int_vec(i16_type, 0x80), "");
#endif
@@ -398,7 +429,7 @@ lp_build_mul(struct lp_build_context *bld,
}
if(type.fixed)
- shift = lp_build_int_const_scalar(type, type.width/2);
+ shift = lp_build_const_int_vec(type, type.width/2);
else
shift = NULL;
@@ -460,7 +491,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
* for Inf and NaN.
*/
unsigned mantissa = lp_mantissa(bld->type);
- factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
+ factor = lp_build_const_int_vec(bld->type, (unsigned long long)shift << mantissa);
a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
a = LLVMBuildAdd(bld->builder, a, factor, "");
a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
@@ -468,12 +499,12 @@ lp_build_mul_imm(struct lp_build_context *bld,
#endif
}
else {
- factor = lp_build_const_scalar(bld->type, shift);
+ factor = lp_build_const_vec(bld->type, shift);
return LLVMBuildShl(bld->builder, a, factor, "");
}
}
- factor = lp_build_const_scalar(bld->type, (double)b);
+ factor = lp_build_const_vec(bld->type, (double)b);
return lp_build_mul(bld, a, factor);
}
@@ -536,7 +567,7 @@ lp_build_lerp(struct lp_build_context *bld,
* but it will be wrong for other uses. Basically we need a more
* powerful lp_type, capable of further distinguishing the values
* interpretation from the value storage. */
- res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), "");
+ res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), "");
return res;
}
@@ -644,13 +675,26 @@ lp_build_abs(struct lp_build_context *bld,
if(type.floating) {
/* Mask out the sign bit */
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- unsigned long long absMask = ~(1ULL << (type.width - 1));
- LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- a = LLVMBuildAnd(bld->builder, a, mask, "");
- a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
- return a;
+ if (type.length == 1) {
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ LLVMTypeRef float_type = LLVMFloatType();
+ unsigned long long absMask = ~(1ULL << (type.width - 1));
+ LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0);
+ a = LLVMBuildBitCast(bld->builder, a, int_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, float_type, "");
+ return a;
+ }
+ else {
+ /* vector of floats */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ unsigned long long absMask = ~(1ULL << (type.width - 1));
+ LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask));
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+ return a;
+ }
}
if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -676,12 +720,12 @@ lp_build_negate(struct lp_build_context *bld,
}
+/** Return -1, 0 or +1 depending on the sign of a */
LLVMValueRef
lp_build_sgn(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef cond;
LLVMValueRef res;
@@ -691,27 +735,42 @@ lp_build_sgn(struct lp_build_context *bld,
res = bld->one;
}
else if(type.floating) {
- /* Take the sign bit and add it to 1 constant */
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMTypeRef vec_type;
+ LLVMTypeRef int_type;
+ LLVMValueRef mask;
LLVMValueRef sign;
LLVMValueRef one;
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ unsigned long long maskBit = (unsigned long long)1 << (type.width - 1);
+
+ if (type.length == 1) {
+ int_type = lp_build_int_elem_type(type);
+ vec_type = lp_build_elem_type(type);
+ mask = LLVMConstInt(int_type, maskBit, 0);
+ }
+ else {
+ /* vector */
+ int_type = lp_build_int_vec_type(type);
+ vec_type = lp_build_vec_type(type);
+ mask = lp_build_const_int_vec(type, maskBit);
+ }
+
+ /* Take the sign bit and add it to 1 constant */
+ sign = LLVMBuildBitCast(bld->builder, a, int_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- one = LLVMConstBitCast(bld->one, int_vec_type);
+ one = LLVMConstBitCast(bld->one, int_type);
res = LLVMBuildOr(bld->builder, sign, one, "");
res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
}
else
{
- LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
+ LLVMValueRef minus_one = lp_build_const_vec(type, -1.0);
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
res = lp_build_select(bld, cond, bld->one, minus_one);
}
/* Handle zero */
cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
- res = lp_build_select(bld, cond, bld->zero, bld->one);
+ res = lp_build_select(bld, cond, bld->zero, res);
return res;
}
@@ -730,8 +789,8 @@ lp_build_set_sign(struct lp_build_context *bld,
const struct lp_type type = bld->type;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1);
- LLVMValueRef mask = lp_build_int_const_scalar(type,
+ LLVMValueRef shift = lp_build_const_int_vec(type, type.width - 1);
+ LLVMValueRef mask = lp_build_const_int_vec(type,
~((unsigned long long) 1 << (type.width - 1)));
LLVMValueRef val, res;
@@ -753,7 +812,7 @@ lp_build_set_sign(struct lp_build_context *bld,
/**
- * Convert vector of int to vector of float.
+ * Convert vector of (or scalar) int to vector of (or scalar) float.
*/
LLVMValueRef
lp_build_int_to_float(struct lp_build_context *bld,
@@ -764,7 +823,11 @@ lp_build_int_to_float(struct lp_build_context *bld,
assert(type.floating);
/*assert(lp_check_value(type, a));*/
- {
+ if (type.length == 1) {
+ LLVMTypeRef float_type = LLVMFloatType();
+ return LLVMBuildSIToFP(bld->builder, a, float_type, "");
+ }
+ else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
/*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
LLVMValueRef res;
@@ -866,6 +929,13 @@ lp_build_floor(struct lp_build_context *bld,
assert(type.floating);
+ if (type.length == 1) {
+ LLVMValueRef res;
+ res = lp_build_ifloor(bld, a);
+ res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), "");
+ return res;
+ }
+
if(util_cpu_caps.has_sse4_1)
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
else {
@@ -921,15 +991,24 @@ lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
assert(type.floating);
- assert(lp_check_value(type, a));
- return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ if (type.length == 1) {
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ return LLVMBuildFPToSI(bld->builder, a, int_type, "");
+ }
+ else {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ assert(lp_check_value(type, a));
+ return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ }
}
+/**
+ * Convert float[] to int[] with round().
+ */
LLVMValueRef
lp_build_iround(struct lp_build_context *bld,
LLVMValueRef a)
@@ -939,6 +1018,15 @@ lp_build_iround(struct lp_build_context *bld,
LLVMValueRef res;
assert(type.floating);
+
+ if (type.length == 1) {
+ /* scalar float to int */
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ /* XXX we want rounding here! */
+ res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+ return res;
+ }
+
assert(lp_check_value(type, a));
if(util_cpu_caps.has_sse4_1) {
@@ -946,7 +1034,7 @@ lp_build_iround(struct lp_build_context *bld,
}
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
LLVMValueRef sign;
LLVMValueRef half;
@@ -955,7 +1043,7 @@ lp_build_iround(struct lp_build_context *bld,
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
/* sign * 0.5 */
- half = lp_build_const_scalar(type, 0.5);
+ half = lp_build_const_vec(type, 0.5);
half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
half = LLVMBuildOr(bld->builder, sign, half, "");
half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
@@ -981,6 +1069,14 @@ lp_build_ifloor(struct lp_build_context *bld,
LLVMValueRef res;
assert(type.floating);
+
+ if (type.length == 1) {
+ /* scalar float to int */
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+ return res;
+ }
+
assert(lp_check_value(type, a));
if(util_cpu_caps.has_sse4_1) {
@@ -990,18 +1086,18 @@ lp_build_ifloor(struct lp_build_context *bld,
/* Take the sign bit and add it to 1 constant */
LLVMTypeRef vec_type = lp_build_vec_type(type);
unsigned mantissa = lp_mantissa(type);
- LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
LLVMValueRef sign;
LLVMValueRef offset;
/* sign = a < 0 ? ~0 : 0 */
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "");
lp_build_name(sign, "floor.sign");
/* offset = -0.99999(9)f */
- offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
+ offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
offset = LLVMConstBitCast(offset, int_vec_type);
/* offset = a < 0 ? -0.99999(9)f : 0.0f */
@@ -1114,6 +1210,14 @@ LLVMValueRef
lp_build_cos(struct lp_build_context *bld,
LLVMValueRef a)
{
+#ifdef PIPE_OS_WINDOWS
+ /*
+ * FIXME: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf()
+ * which is neither efficient nor does the CRT linkage work on Windows
+ * causing segmentation fault. So simply disable the code for now.
+ */
+ return bld->one;
+#else
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -1124,6 +1228,7 @@ lp_build_cos(struct lp_build_context *bld,
util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width);
return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
+#endif
}
@@ -1134,6 +1239,14 @@ LLVMValueRef
lp_build_sin(struct lp_build_context *bld,
LLVMValueRef a)
{
+#ifdef PIPE_OS_WINDOWS
+ /*
+ * FIXME: X86 backend translates llvm.sin.v4f32 to 4 calls to CRT's sinf()
+ * which is neither efficient nor does the CRT linkage work on Windows
+ * causing segmentation fault. So simply disable the code for now.
+ */
+ return bld->zero;
+#else
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -1144,6 +1257,7 @@ lp_build_sin(struct lp_build_context *bld,
util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width);
return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
+#endif
}
@@ -1172,7 +1286,7 @@ lp_build_exp(struct lp_build_context *bld,
LLVMValueRef x)
{
/* log2(e) = 1/log(2) */
- LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634);
+ LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634);
return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
}
@@ -1186,7 +1300,7 @@ lp_build_log(struct lp_build_context *bld,
LLVMValueRef x)
{
/* log(2) */
- LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529);
+ LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529);
return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
}
@@ -1207,6 +1321,7 @@ lp_build_polynomial(struct lp_build_context *bld,
unsigned num_coeffs)
{
const struct lp_type type = bld->type;
+ LLVMTypeRef float_type = LLVMFloatType();
LLVMValueRef res = NULL;
unsigned i;
@@ -1216,7 +1331,13 @@ lp_build_polynomial(struct lp_build_context *bld,
__FUNCTION__);
for (i = num_coeffs; i--; ) {
- LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
+ LLVMValueRef coeff;
+
+ if (type.length == 1)
+ coeff = LLVMConstReal(float_type, coeffs[i]);
+ else
+ coeff = lp_build_const_vec(type, coeffs[i]);
+
if(res)
res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
else
@@ -1272,11 +1393,11 @@ lp_build_exp2_approx(struct lp_build_context *bld,
assert(type.floating && type.width == 32);
- x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0));
- x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999));
+ x = lp_build_min(bld, x, lp_build_const_vec(type, 129.0));
+ x = lp_build_max(bld, x, lp_build_const_vec(type, -126.99999));
/* ipart = int(x - 0.5) */
- ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), "");
+ ipart = LLVMBuildSub(bld->builder, x, lp_build_const_vec(type, 0.5f), "");
ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
/* fpart = x - ipart */
@@ -1286,8 +1407,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
if(p_exp2_int_part || p_exp2) {
/* expipart = (float) (1 << ipart) */
- expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), "");
- expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), "");
+ expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_const_int_vec(type, 127), "");
+ expipart = LLVMBuildShl(bld->builder, expipart, lp_build_const_int_vec(type, 23), "");
expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
}
@@ -1353,8 +1474,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000);
- LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff);
+ LLVMValueRef expmask = lp_build_const_int_vec(type, 0x7f800000);
+ LLVMValueRef mantmask = lp_build_const_int_vec(type, 0x007fffff);
LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
LLVMValueRef i = NULL;
@@ -1379,8 +1500,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
}
if(p_floor_log2 || p_log2) {
- logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), "");
- logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), "");
+ logexp = LLVMBuildLShr(bld->builder, exp, lp_build_const_int_vec(type, 23), "");
+ logexp = LLVMBuildSub(bld->builder, logexp, lp_build_const_int_vec(type, 127), "");
logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
}
@@ -1399,8 +1520,83 @@ lp_build_log2_approx(struct lp_build_context *bld,
res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
}
- if(p_exp)
+ if(p_exp) {
+ exp = LLVMBuildBitCast(bld->builder, exp, vec_type, "");
*p_exp = exp;
+ }
+
+ if(p_floor_log2)
+ *p_floor_log2 = logexp;
+
+ if(p_log2)
+ *p_log2 = res;
+}
+
+
+/** scalar version of above function */
+static void
+lp_build_float_log2_approx(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef *p_exp,
+ LLVMValueRef *p_floor_log2,
+ LLVMValueRef *p_log2)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef float_type = LLVMFloatType();
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+
+ LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0);
+ LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0);
+ LLVMValueRef one = LLVMConstBitCast(bld->one, int_type);
+
+ LLVMValueRef i = NULL;
+ LLVMValueRef exp = NULL;
+ LLVMValueRef mant = NULL;
+ LLVMValueRef logexp = NULL;
+ LLVMValueRef logmant = NULL;
+ LLVMValueRef res = NULL;
+
+ if(p_exp || p_floor_log2 || p_log2) {
+ /* TODO: optimize the constant case */
+ if(LLVMIsConstant(x))
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
+
+ assert(type.floating && type.width == 32);
+
+ i = LLVMBuildBitCast(bld->builder, x, int_type, "");
+
+ /* exp = (float) exponent(x) */
+ exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+ }
+
+ if(p_floor_log2 || p_log2) {
+ LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0);
+ LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0);
+ logexp = LLVMBuildLShr(bld->builder, exp, c23, "");
+ logexp = LLVMBuildSub(bld->builder, logexp, c127, "");
+ logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, "");
+ }
+
+ if(p_log2) {
+ /* mant = (float) mantissa(x) */
+ mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+ mant = LLVMBuildOr(bld->builder, mant, one, "");
+ mant = LLVMBuildBitCast(bld->builder, mant, float_type, "");
+
+ logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+ Elements(lp_build_log2_polynomial));
+
+ /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+
+ res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+ }
+
+ if(p_exp) {
+ exp = LLVMBuildBitCast(bld->builder, exp, float_type, "");
+ *p_exp = exp;
+ }
if(p_floor_log2)
*p_floor_log2 = logexp;
@@ -1415,6 +1611,11 @@ lp_build_log2(struct lp_build_context *bld,
LLVMValueRef x)
{
LLVMValueRef res;
- lp_build_log2_approx(bld, x, NULL, NULL, &res);
+ if (bld->type.length == 1) {
+ lp_build_float_log2_approx(bld, x, NULL, NULL, &res);
+ }
+ else {
+ lp_build_log2_approx(bld, x, NULL, NULL, &res);
+ }
return res;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 55385e3a66a..31efa9921ce 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -37,7 +37,7 @@
#define LP_BLD_ARIT_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
struct lp_type;
@@ -57,6 +57,10 @@ lp_build_add(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_sum_vector(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
lp_build_sub(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h
deleted file mode 100644
index da272e549f3..00000000000
--- a/src/gallium/auxiliary/gallivm/lp_bld_blend.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef LP_BLD_BLEND_H
-#define LP_BLD_BLEND_H
-
-
-/**
- * @file
- * LLVM IR building helpers interfaces.
- *
- * We use LLVM-C bindings for now. They are not documented, but follow the C++
- * interfaces very closely, and appear to be complete enough for code
- * genration. See
- * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- * for a standalone example.
- */
-
-#include <llvm-c/Core.h>
-
-#include "pipe/p_format.h"
-
-
-struct pipe_blend_state;
-struct lp_type;
-struct lp_build_context;
-
-
-/**
- * Whether the blending function is commutative or not.
- */
-boolean
-lp_build_blend_func_commutative(unsigned func);
-
-
-/**
- * Whether the blending functions are the reverse of each other.
- */
-boolean
-lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func);
-
-
-LLVMValueRef
-lp_build_blend_func(struct lp_build_context *bld,
- unsigned func,
- LLVMValueRef term1,
- LLVMValueRef term2);
-
-
-LLVMValueRef
-lp_build_blend_aos(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
- struct lp_type type,
- LLVMValueRef src,
- LLVMValueRef dst,
- LLVMValueRef const_,
- unsigned alpha_swizzle);
-
-
-void
-lp_build_blend_soa(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
- struct lp_type type,
- LLVMValueRef src[4],
- LLVMValueRef dst[4],
- LLVMValueRef const_[4],
- LLVMValueRef res[4]);
-
-
-/**
- * Apply a logic op.
- *
- * src/dst parameters are packed values. It should work regardless the inputs
- * are scalars, or a vector.
- */
-LLVMValueRef
-lp_build_logicop(LLVMBuilderRef builder,
- unsigned logicop_func,
- LLVMValueRef src,
- LLVMValueRef dst);
-
-
-#endif /* !LP_BLD_BLEND_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c
deleted file mode 100644
index 0215bb72ac6..00000000000
--- a/src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * @file
- * Blend LLVM IR generation -- AoS layout.
- *
- * AoS blending is in general much slower than SoA, but there are some cases
- * where it might be faster. In particular, if a pixel is rendered only once
- * then the overhead of tiling and untiling will dominate over the speedup that
- * SoA gives. So we might want to detect such cases and fallback to AoS in the
- * future, but for now this function is here for historical/benchmarking
- * purposes.
- *
- * Run lp_blend_test after any change to this file.
- *
- * @author Jose Fonseca <[email protected]>
- */
-
-
-#include "pipe/p_state.h"
-#include "util/u_debug.h"
-
-#include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_arit.h"
-#include "lp_bld_logic.h"
-#include "lp_bld_swizzle.h"
-#include "lp_bld_blend.h"
-#include "lp_bld_debug.h"
-
-
-/**
- * We may the same values several times, so we keep them here to avoid
- * recomputing them. Also reusing the values allows us to do simplifications
- * that LLVM optimization passes wouldn't normally be able to do.
- */
-struct lp_build_blend_aos_context
-{
- struct lp_build_context base;
-
- LLVMValueRef src;
- LLVMValueRef dst;
- LLVMValueRef const_;
-
- LLVMValueRef inv_src;
- LLVMValueRef inv_dst;
- LLVMValueRef inv_const;
- LLVMValueRef saturate;
-
- LLVMValueRef rgb_src_factor;
- LLVMValueRef alpha_src_factor;
- LLVMValueRef rgb_dst_factor;
- LLVMValueRef alpha_dst_factor;
-};
-
-
-static LLVMValueRef
-lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
- unsigned factor,
- boolean alpha)
-{
- switch (factor) {
- case PIPE_BLENDFACTOR_ZERO:
- return bld->base.zero;
- case PIPE_BLENDFACTOR_ONE:
- return bld->base.one;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return bld->src;
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return bld->dst;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- if(alpha)
- return bld->base.one;
- else {
- if(!bld->inv_dst)
- bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
- if(!bld->saturate)
- bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
- return bld->saturate;
- }
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return bld->const_;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return bld->base.zero;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- if(!bld->inv_src)
- bld->inv_src = lp_build_comp(&bld->base, bld->src);
- return bld->inv_src;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- if(!bld->inv_dst)
- bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
- return bld->inv_dst;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- if(!bld->inv_const)
- bld->inv_const = lp_build_comp(&bld->base, bld->const_);
- return bld->inv_const;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return bld->base.zero;
- default:
- assert(0);
- return bld->base.zero;
- }
-}
-
-
-enum lp_build_blend_swizzle {
- LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
- LP_BUILD_BLEND_SWIZZLE_AAAA = 1
-};
-
-
-/**
- * How should we shuffle the base factor.
- */
-static enum lp_build_blend_swizzle
-lp_build_blend_factor_swizzle(unsigned factor)
-{
- switch (factor) {
- case PIPE_BLENDFACTOR_ONE:
- case PIPE_BLENDFACTOR_ZERO:
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- return LP_BUILD_BLEND_SWIZZLE_RGBA;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return LP_BUILD_BLEND_SWIZZLE_AAAA;
- default:
- assert(0);
- return LP_BUILD_BLEND_SWIZZLE_RGBA;
- }
-}
-
-
-static LLVMValueRef
-lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
- LLVMValueRef rgb,
- LLVMValueRef alpha,
- enum lp_build_blend_swizzle rgb_swizzle,
- unsigned alpha_swizzle)
-{
- if(rgb == alpha) {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
- return rgb;
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
- return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
- }
- else {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
- boolean cond[4] = {0, 0, 0, 0};
- cond[alpha_swizzle] = 1;
- return lp_build_select_aos(&bld->base, alpha, rgb, cond);
- }
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
- unsigned char swizzle[4];
- swizzle[0] = alpha_swizzle;
- swizzle[1] = alpha_swizzle;
- swizzle[2] = alpha_swizzle;
- swizzle[3] = alpha_swizzle;
- swizzle[alpha_swizzle] += 4;
- return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
- }
- }
- assert(0);
- return bld->base.undef;
-}
-
-
-/**
- * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
- */
-static LLVMValueRef
-lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
- LLVMValueRef factor1,
- unsigned rgb_factor,
- unsigned alpha_factor,
- unsigned alpha_swizzle)
-{
- LLVMValueRef rgb_factor_;
- LLVMValueRef alpha_factor_;
- LLVMValueRef factor2;
- enum lp_build_blend_swizzle rgb_swizzle;
-
- rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
- alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
-
- rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
-
- factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
-
- return lp_build_mul(&bld->base, factor1, factor2);
-}
-
-
-boolean
-lp_build_blend_func_commutative(unsigned func)
-{
- switch (func) {
- case PIPE_BLEND_ADD:
- case PIPE_BLEND_MIN:
- case PIPE_BLEND_MAX:
- return TRUE;
- case PIPE_BLEND_SUBTRACT:
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return FALSE;
- default:
- assert(0);
- return TRUE;
- }
-}
-
-
-boolean
-lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
-{
- if(rgb_func == alpha_func)
- return FALSE;
- if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
- return TRUE;
- if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
- return TRUE;
- return FALSE;
-}
-
-
-/**
- * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
- */
-LLVMValueRef
-lp_build_blend_func(struct lp_build_context *bld,
- unsigned func,
- LLVMValueRef term1,
- LLVMValueRef term2)
-{
- switch (func) {
- case PIPE_BLEND_ADD:
- return lp_build_add(bld, term1, term2);
- break;
- case PIPE_BLEND_SUBTRACT:
- return lp_build_sub(bld, term1, term2);
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return lp_build_sub(bld, term2, term1);
- case PIPE_BLEND_MIN:
- return lp_build_min(bld, term1, term2);
- case PIPE_BLEND_MAX:
- return lp_build_max(bld, term1, term2);
- default:
- assert(0);
- return bld->zero;
- }
-}
-
-
-LLVMValueRef
-lp_build_blend_aos(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
- struct lp_type type,
- LLVMValueRef src,
- LLVMValueRef dst,
- LLVMValueRef const_,
- unsigned alpha_swizzle)
-{
- struct lp_build_blend_aos_context bld;
- LLVMValueRef src_term;
- LLVMValueRef dst_term;
-
- /* FIXME */
- assert(blend->independent_blend_enable == 0);
- assert(blend->rt[0].colormask == 0xf);
-
- if(!blend->rt[0].blend_enable)
- return src;
-
- /* It makes no sense to blend unless values are normalized */
- assert(type.norm);
-
- /* Setup build context */
- memset(&bld, 0, sizeof bld);
- lp_build_context_init(&bld.base, builder, type);
- bld.src = src;
- bld.dst = dst;
- bld.const_ = const_;
-
- /* TODO: There are still a few optimization opportunities here. For certain
- * combinations it is possible to reorder the operations and therefore saving
- * some instructions. */
-
- src_term = lp_build_blend_factor(&bld, src, blend->rt[0].rgb_src_factor,
- blend->rt[0].alpha_src_factor, alpha_swizzle);
- dst_term = lp_build_blend_factor(&bld, dst, blend->rt[0].rgb_dst_factor,
- blend->rt[0].alpha_dst_factor, alpha_swizzle);
-
- lp_build_name(src_term, "src_term");
- lp_build_name(dst_term, "dst_term");
-
- if(blend->rt[0].rgb_func == blend->rt[0].alpha_func) {
- return lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term);
- }
- else {
- /* Seperate RGB / A functions */
-
- LLVMValueRef rgb;
- LLVMValueRef alpha;
-
- rgb = lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term);
- alpha = lp_build_blend_func(&bld.base, blend->rt[0].alpha_func, src_term, dst_term);
-
- return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
- }
-}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c b/src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c
deleted file mode 100644
index 1eac0a5c891..00000000000
--- a/src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * @file
- * Blend LLVM IR generation -- logic ops.
- *
- * @author Jose Fonseca <[email protected]>
- */
-
-
-#include "pipe/p_state.h"
-#include "util/u_debug.h"
-
-#include "lp_bld_blend.h"
-
-
-LLVMValueRef
-lp_build_logicop(LLVMBuilderRef builder,
- unsigned logicop_func,
- LLVMValueRef src,
- LLVMValueRef dst)
-{
- LLVMTypeRef type;
- LLVMValueRef res;
-
- type = LLVMTypeOf(src);
-
- switch (logicop_func) {
- case PIPE_LOGICOP_CLEAR:
- res = LLVMConstNull(type);
- break;
- case PIPE_LOGICOP_NOR:
- res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), "");
- break;
- case PIPE_LOGICOP_AND_INVERTED:
- res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, "");
- break;
- case PIPE_LOGICOP_COPY_INVERTED:
- res = LLVMBuildNot(builder, src, "");
- break;
- case PIPE_LOGICOP_AND_REVERSE:
- res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), "");
- break;
- case PIPE_LOGICOP_INVERT:
- res = LLVMBuildNot(builder, dst, "");
- break;
- case PIPE_LOGICOP_XOR:
- res = LLVMBuildXor(builder, src, dst, "");
- break;
- case PIPE_LOGICOP_NAND:
- res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), "");
- break;
- case PIPE_LOGICOP_AND:
- res = LLVMBuildAnd(builder, src, dst, "");
- break;
- case PIPE_LOGICOP_EQUIV:
- res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), "");
- break;
- case PIPE_LOGICOP_NOOP:
- res = dst;
- break;
- case PIPE_LOGICOP_OR_INVERTED:
- res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, "");
- break;
- case PIPE_LOGICOP_COPY:
- res = src;
- break;
- case PIPE_LOGICOP_OR_REVERSE:
- res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), "");
- break;
- case PIPE_LOGICOP_OR:
- res = LLVMBuildOr(builder, src, dst, "");
- break;
- case PIPE_LOGICOP_SET:
- res = LLVMConstAllOnes(type);
- break;
- default:
- assert(0);
- res = src;
- }
-
- return res;
-}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c
deleted file mode 100644
index 6d5a45db7a3..00000000000
--- a/src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c
+++ /dev/null
@@ -1,298 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * @file
- * Blend LLVM IR generation -- SoA layout.
- *
- * Blending in SoA is much faster than AoS, especially when separate rgb/alpha
- * factors/functions are used, since no channel masking/shuffling is necessary
- * and we can achieve the full throughput of the SIMD operations. Furthermore
- * the fragment shader output is also in SoA, so it fits nicely with the rest of
- * the fragment pipeline.
- *
- * The drawback is that to be displayed the color buffer needs to be in AoS
- * layout, so we need to tile/untile the color buffer before/after rendering.
- * A color buffer like
- *
- * R11 G11 B11 A11 R12 G12 B12 A12 R13 G13 B13 A13 R14 G14 B14 A14 ...
- * R21 G21 B21 A21 R22 G22 B22 A22 R23 G23 B23 A23 R24 G24 B24 A24 ...
- *
- * R31 G31 B31 A31 R32 G32 B32 A32 R33 G33 B33 A33 R34 G34 B34 A34 ...
- * R41 G41 B41 A41 R42 G42 B42 A42 R43 G43 B43 A43 R44 G44 B44 A44 ...
- *
- * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
- *
- * will actually be stored in memory as
- *
- * R11 R12 R21 R22 R13 R14 R23 R24 ... G11 G12 G21 G22 G13 G14 G23 G24 ... B11 B12 B21 B22 B13 B14 B23 B24 ... A11 A12 A21 A22 A13 A14 A23 A24 ...
- * R31 R32 R41 R42 R33 R34 R43 R44 ... G31 G32 G41 G42 G33 G34 G43 G44 ... B31 B32 B41 B42 B33 B34 B43 B44 ... A31 A32 A41 A42 A33 A34 A43 A44 ...
- * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
- *
- * NOTE: Run lp_blend_test after any change to this file.
- *
- * You can also run lp_blend_test to obtain AoS vs SoA benchmarks. Invoking it
- * as:
- *
- * lp_blend_test -o blend.tsv
- *
- * will generate a tab-seperated-file with the test results and performance
- * measurements.
- *
- * @author Jose Fonseca <[email protected]>
- */
-
-
-#include "pipe/p_state.h"
-#include "util/u_debug.h"
-
-#include "lp_bld_type.h"
-#include "lp_bld_arit.h"
-#include "lp_bld_blend.h"
-
-
-/**
- * We may the same values several times, so we keep them here to avoid
- * recomputing them. Also reusing the values allows us to do simplifications
- * that LLVM optimization passes wouldn't normally be able to do.
- */
-struct lp_build_blend_soa_context
-{
- struct lp_build_context base;
-
- LLVMValueRef src[4];
- LLVMValueRef dst[4];
- LLVMValueRef con[4];
-
- LLVMValueRef inv_src[4];
- LLVMValueRef inv_dst[4];
- LLVMValueRef inv_con[4];
-
- LLVMValueRef src_alpha_saturate;
-
- /**
- * We store all factors in a table in order to eliminate redundant
- * multiplications later.
- */
- LLVMValueRef factor[2][2][4];
-
- /**
- * Table with all terms.
- */
- LLVMValueRef term[2][4];
-};
-
-
-static LLVMValueRef
-lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
- unsigned factor, unsigned i)
-{
- /*
- * Compute src/first term RGB
- */
- switch (factor) {
- case PIPE_BLENDFACTOR_ONE:
- return bld->base.one;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- return bld->src[i];
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return bld->src[3];
- case PIPE_BLENDFACTOR_DST_COLOR:
- return bld->dst[i];
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return bld->dst[3];
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- if(i == 3)
- return bld->base.one;
- else {
- if(!bld->inv_dst[3])
- bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
- if(!bld->src_alpha_saturate)
- bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]);
- return bld->src_alpha_saturate;
- }
- case PIPE_BLENDFACTOR_CONST_COLOR:
- return bld->con[i];
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return bld->con[3];
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- /* TODO */
- assert(0);
- return bld->base.zero;
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return bld->base.zero;
- case PIPE_BLENDFACTOR_ZERO:
- return bld->base.zero;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- if(!bld->inv_src[i])
- bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]);
- return bld->inv_src[i];
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- if(!bld->inv_src[3])
- bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]);
- return bld->inv_src[3];
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- if(!bld->inv_dst[i])
- bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]);
- return bld->inv_dst[i];
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- if(!bld->inv_dst[3])
- bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
- return bld->inv_dst[3];
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- if(!bld->inv_con[i])
- bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]);
- return bld->inv_con[i];
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- if(!bld->inv_con[3])
- bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]);
- return bld->inv_con[3];
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- /* TODO */
- assert(0);
- return bld->base.zero;
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return bld->base.zero;
- default:
- assert(0);
- return bld->base.zero;
- }
-}
-
-
-/**
- * Generate blend code in SOA mode.
- * \param src src/fragment color
- * \param dst dst/framebuffer color
- * \param con constant blend color
- * \param res the result/output
- */
-void
-lp_build_blend_soa(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
- struct lp_type type,
- LLVMValueRef src[4],
- LLVMValueRef dst[4],
- LLVMValueRef con[4],
- LLVMValueRef res[4])
-{
- struct lp_build_blend_soa_context bld;
- unsigned i, j, k;
-
- /* Setup build context */
- memset(&bld, 0, sizeof bld);
- lp_build_context_init(&bld.base, builder, type);
- for (i = 0; i < 4; ++i) {
- bld.src[i] = src[i];
- bld.dst[i] = dst[i];
- bld.con[i] = con[i];
- }
-
- for (i = 0; i < 4; ++i) {
- if (blend->rt[0].colormask & (1 << i)) {
- if (blend->logicop_enable) {
- if(!type.floating) {
- res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]);
- }
- else
- res[i] = dst[i];
- }
- else if (blend->rt[0].blend_enable) {
- unsigned src_factor = i < 3 ? blend->rt[0].rgb_src_factor : blend->rt[0].alpha_src_factor;
- unsigned dst_factor = i < 3 ? blend->rt[0].rgb_dst_factor : blend->rt[0].alpha_dst_factor;
- unsigned func = i < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func;
- boolean func_commutative = lp_build_blend_func_commutative(func);
-
- /* It makes no sense to blend unless values are normalized */
- assert(type.norm);
-
- /*
- * Compute src/dst factors.
- */
-
- bld.factor[0][0][i] = src[i];
- bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i);
- bld.factor[1][0][i] = dst[i];
- bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i);
-
- /*
- * Compute src/dst terms
- */
-
- for(k = 0; k < 2; ++k) {
- /* See if this multiplication has been previously computed */
- for(j = 0; j < i; ++j) {
- if((bld.factor[k][0][j] == bld.factor[k][0][i] &&
- bld.factor[k][1][j] == bld.factor[k][1][i]) ||
- (bld.factor[k][0][j] == bld.factor[k][1][i] &&
- bld.factor[k][1][j] == bld.factor[k][0][i]))
- break;
- }
-
- if(j < i)
- bld.term[k][i] = bld.term[k][j];
- else
- bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]);
- }
-
- /*
- * Combine terms
- */
-
- /* See if this function has been previously applied */
- for(j = 0; j < i; ++j) {
- unsigned prev_func = j < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func;
- unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);
-
- if((!func_reverse &&
- bld.term[0][j] == bld.term[0][i] &&
- bld.term[1][j] == bld.term[1][i]) ||
- ((func_commutative || func_reverse) &&
- bld.term[0][j] == bld.term[1][i] &&
- bld.term[1][j] == bld.term[0][i]))
- break;
- }
-
- if(j < i)
- res[i] = res[j];
- else
- res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]);
- }
- else {
- res[i] = src[i];
- }
- }
- else {
- res[i] = dst[i];
- }
- }
-}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
index c8eaa8c3940..57843e9a60c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -221,8 +221,16 @@ lp_build_undef(struct lp_type type)
LLVMValueRef
lp_build_zero(struct lp_type type)
{
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- return LLVMConstNull(vec_type);
+ if (type.length == 1) {
+ if (type.floating)
+ return LLVMConstReal(LLVMFloatType(), 0.0);
+ else
+ return LLVMConstInt(LLVMIntType(type.width), 0, 0);
+ }
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ return LLVMConstNull(vec_type);
+ }
}
@@ -255,7 +263,7 @@ lp_build_one(struct lp_type type)
if(type.sign)
/* TODO: Unfortunately this caused "Tried to create a shift operation
* on a non-integer type!" */
- vec = LLVMConstLShr(vec, lp_build_int_const_scalar(type, 1));
+ vec = LLVMConstLShr(vec, lp_build_const_int_vec(type, 1));
#endif
return vec;
@@ -264,13 +272,19 @@ lp_build_one(struct lp_type type)
for(i = 1; i < type.length; ++i)
elems[i] = elems[0];
- return LLVMConstVector(elems, type.length);
+ if (type.length == 1)
+ return elems[0];
+ else
+ return LLVMConstVector(elems, type.length);
}
+/**
+ * Build constant-valued vector from a scalar value.
+ */
LLVMValueRef
-lp_build_const_scalar(struct lp_type type,
- double val)
+lp_build_const_vec(struct lp_type type,
+ double val)
{
LLVMTypeRef elem_type = lp_build_elem_type(type);
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
@@ -295,7 +309,7 @@ lp_build_const_scalar(struct lp_type type,
LLVMValueRef
-lp_build_int_const_scalar(struct lp_type type,
+lp_build_const_int_vec(struct lp_type type,
long long val)
{
LLVMTypeRef elem_type = lp_build_int_elem_type(type);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
index cb8e1c7b006..9ca2f0664eb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -37,9 +37,9 @@
#define LP_BLD_CONST_H
-#include <llvm-c/Core.h>
+#include "pipe/p_compiler.h"
+#include "gallivm/lp_bld.h"
-#include <pipe/p_compiler.h>
struct lp_type;
@@ -85,13 +85,11 @@ lp_build_one(struct lp_type type);
LLVMValueRef
-lp_build_const_scalar(struct lp_type type,
- double val);
+lp_build_const_vec(struct lp_type type, double val);
LLVMValueRef
-lp_build_int_const_scalar(struct lp_type type,
- long long val);
+lp_build_const_int_vec(struct lp_type type, long long val);
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index f77cf787213..3f7f2ebde9c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -114,13 +114,13 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
scale = (double)mask/ubound;
bias = (double)((unsigned long long)1 << (mantissa - n));
- res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), "");
- res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), "");
+ res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), "");
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
if(dst_width > n) {
int shift = dst_width - n;
- res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");
+ res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), "");
/* TODO: Fill in the empty lower bits for additional precision? */
/* YES: this fixes progs/trivial/tri-z-eq.c.
@@ -130,21 +130,21 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
#if 0
{
LLVMValueRef msb;
- msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), "");
- msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), "");
- msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), "");
+ msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), "");
+ msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), "");
+ msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), "");
res = LLVMBuildOr(builder, res, msb, "");
}
#elif 0
while(shift > 0) {
- res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), "");
+ res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), "");
shift -= n;
n *= 2;
}
#endif
}
else
- res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), "");
+ res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");
return res;
}
@@ -183,10 +183,10 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
if(src_width > mantissa) {
int shift = src_width - mantissa;
- res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), "");
+ res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), "");
}
- bias_ = lp_build_const_scalar(dst_type, bias);
+ bias_ = lp_build_const_vec(dst_type, bias);
res = LLVMBuildOr(builder,
res,
@@ -195,7 +195,7 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
res = LLVMBuildBitCast(builder, res, vec_type, "");
res = LLVMBuildSub(builder, res, bias_, "");
- res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), "");
+ res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), "");
return res;
}
@@ -251,7 +251,7 @@ lp_build_conv(LLVMBuilderRef builder,
if(dst_min == 0.0)
thres = bld.zero;
else
- thres = lp_build_const_scalar(src_type, dst_min);
+ thres = lp_build_const_vec(src_type, dst_min);
for(i = 0; i < num_tmps; ++i)
tmp[i] = lp_build_max(&bld, tmp[i], thres);
}
@@ -260,7 +260,7 @@ lp_build_conv(LLVMBuilderRef builder,
if(dst_max == 1.0)
thres = bld.one;
else
- thres = lp_build_const_scalar(src_type, dst_max);
+ thres = lp_build_const_vec(src_type, dst_max);
for(i = 0; i < num_tmps; ++i)
tmp[i] = lp_build_min(&bld, tmp[i], thres);
}
@@ -288,7 +288,7 @@ lp_build_conv(LLVMBuilderRef builder,
LLVMTypeRef tmp_vec_type;
if (dst_scale != 1.0) {
- LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale);
+ LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale);
for(i = 0; i < num_tmps; ++i)
tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
}
@@ -315,7 +315,7 @@ lp_build_conv(LLVMBuilderRef builder,
/* FIXME: compensate different offsets too */
if(src_shift > dst_shift) {
- LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift);
+ LLVMValueRef shift = lp_build_const_int_vec(tmp_type, src_shift - dst_shift);
for(i = 0; i < num_tmps; ++i)
if(src_type.sign)
tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");
@@ -388,7 +388,7 @@ lp_build_conv(LLVMBuilderRef builder,
}
if (src_scale != 1.0) {
- LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale);
+ LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale);
for(i = 0; i < num_tmps; ++i)
tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
}
@@ -400,7 +400,7 @@ lp_build_conv(LLVMBuilderRef builder,
/* FIXME: compensate different offsets too */
if(src_shift < dst_shift) {
- LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift);
+ LLVMValueRef shift = lp_build_const_int_vec(tmp_type, dst_shift - src_shift);
for(i = 0; i < num_tmps; ++i)
tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
index 948e68fae4f..628831c3ada 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
@@ -37,7 +37,7 @@
#define LP_BLD_CONV_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
struct lp_type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index 583e6132b4b..7b010cbdb09 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -30,7 +30,7 @@
#define LP_BLD_DEBUG_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
#include "pipe/p_compiler.h"
#include "util/u_string.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c
deleted file mode 100644
index f08f8eb6d8b..00000000000
--- a/src/gallium/auxiliary/gallivm/lp_bld_depth.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * Depth/stencil testing to LLVM IR translation.
- *
- * To be done accurately/efficiently the depth/stencil test must be done with
- * the same type/format of the depth/stencil buffer, which implies massaging
- * the incoming depths to fit into place. Using a more straightforward
- * type/format for depth/stencil values internally and only convert when
- * flushing would avoid this, but it would most likely result in depth fighting
- * artifacts.
- *
- * We are free to use a different pixel layout though. Since our basic
- * processing unit is a quad (2x2 pixel block) we store the depth/stencil
- * values tiled, a quad at time. That is, a depth buffer containing
- *
- * Z11 Z12 Z13 Z14 ...
- * Z21 Z22 Z23 Z24 ...
- * Z31 Z32 Z33 Z34 ...
- * Z41 Z42 Z43 Z44 ...
- * ... ... ... ... ...
- *
- * will actually be stored in memory as
- *
- * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
- * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
- * ... ... ... ... ... ... ... ... ...
- *
- * FIXME: Code generate stencil test
- *
- * @author Jose Fonseca <[email protected]>
- */
-
-#include "pipe/p_state.h"
-#include "util/u_format.h"
-
-#include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_logic.h"
-#include "lp_bld_flow.h"
-#include "lp_bld_debug.h"
-#include "lp_bld_depth.h"
-
-
-/**
- * Return a type appropriate for depth/stencil testing.
- */
-struct lp_type
-lp_depth_type(const struct util_format_description *format_desc,
- unsigned length)
-{
- struct lp_type type;
- unsigned swizzle;
-
- assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
- assert(format_desc->block.width == 1);
- assert(format_desc->block.height == 1);
-
- swizzle = format_desc->swizzle[0];
- assert(swizzle < 4);
-
- memset(&type, 0, sizeof type);
- type.width = format_desc->block.bits;
-
- if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
- type.floating = TRUE;
- assert(swizzle == 0);
- assert(format_desc->channel[swizzle].size == format_desc->block.bits);
- }
- else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
- assert(format_desc->block.bits <= 32);
- if(format_desc->channel[swizzle].normalized)
- type.norm = TRUE;
- }
- else
- assert(0);
-
- assert(type.width <= length);
- type.length = length / type.width;
-
- return type;
-}
-
-
-/**
- * Depth test.
- */
-void
-lp_build_depth_test(LLVMBuilderRef builder,
- const struct pipe_depth_state *state,
- struct lp_type type,
- const struct util_format_description *format_desc,
- struct lp_build_mask_context *mask,
- LLVMValueRef src,
- LLVMValueRef dst_ptr)
-{
- struct lp_build_context bld;
- unsigned z_swizzle;
- LLVMValueRef dst;
- LLVMValueRef z_bitmask = NULL;
- LLVMValueRef test;
-
- if(!state->enabled)
- return;
-
- assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
- assert(format_desc->block.width == 1);
- assert(format_desc->block.height == 1);
-
- z_swizzle = format_desc->swizzle[0];
- if(z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
- return;
-
- /* Sanity checking */
- assert(z_swizzle < 4);
- assert(format_desc->block.bits == type.width);
- if(type.floating) {
- assert(z_swizzle == 0);
- assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT);
- assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
- }
- else {
- assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
- assert(format_desc->channel[z_swizzle].normalized);
- assert(!type.fixed);
- assert(!type.sign);
- assert(type.norm);
- }
-
- /* Setup build context */
- lp_build_context_init(&bld, builder, type);
-
- dst = LLVMBuildLoad(builder, dst_ptr, "");
-
- lp_build_name(dst, "zsbuf");
-
- /* Align the source depth bits with the destination's, and mask out any
- * stencil or padding bits from both */
- if(format_desc->channel[z_swizzle].size == format_desc->block.bits) {
- assert(z_swizzle == 0);
- /* nothing to do */
- }
- else {
- unsigned padding_left;
- unsigned padding_right;
- unsigned chan;
-
- assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
- assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
- assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits);
- assert(format_desc->channel[z_swizzle].normalized);
-
- padding_right = 0;
- for(chan = 0; chan < z_swizzle; ++chan)
- padding_right += format_desc->channel[chan].size;
- padding_left = format_desc->block.bits -
- (padding_right + format_desc->channel[z_swizzle].size);
-
- if(padding_left || padding_right) {
- const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1;
- const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1;
- z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right);
- }
-
- if(padding_left)
- src = LLVMBuildLShr(builder, src, lp_build_int_const_scalar(type, padding_left), "");
- if(padding_right)
- src = LLVMBuildAnd(builder, src, z_bitmask, "");
- if(padding_left || padding_right)
- dst = LLVMBuildAnd(builder, dst, z_bitmask, "");
- }
-
- lp_build_name(dst, "zsbuf.z");
-
- test = lp_build_cmp(&bld, state->func, src, dst);
- lp_build_mask_update(mask, test);
-
- if(state->writemask) {
- if(z_bitmask)
- z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, "");
- else
- z_bitmask = mask->value;
-
- dst = lp_build_select(&bld, z_bitmask, src, dst);
- LLVMBuildStore(builder, dst, dst_ptr);
- }
-}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index bc831389085..8f15b1d287d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -308,7 +308,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow)
* Note: this function has no dependencies on the flow code and could
* be used elsewhere.
*/
-static LLVMBasicBlockRef
+LLVMBasicBlockRef
lp_build_insert_new_block(LLVMBuilderRef builder, const char *name)
{
LLVMBasicBlockRef current_block;
@@ -570,6 +570,35 @@ lp_build_loop_end(LLVMBuilderRef builder,
LLVMPositionBuilderAtEnd(builder, after_block);
}
+void
+lp_build_loop_end_cond(LLVMBuilderRef builder,
+ LLVMValueRef end,
+ LLVMValueRef step,
+ int llvm_cond,
+ struct lp_build_loop_state *state)
+{
+ LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(block);
+ LLVMValueRef next;
+ LLVMValueRef cond;
+ LLVMBasicBlockRef after_block;
+
+ if (!step)
+ step = LLVMConstInt(LLVMTypeOf(end), 1, 0);
+
+ next = LLVMBuildAdd(builder, state->counter, step, "");
+
+ cond = LLVMBuildICmp(builder, llvm_cond, next, end, "");
+
+ after_block = LLVMAppendBasicBlock(function, "");
+
+ LLVMBuildCondBr(builder, cond, after_block, state->block);
+
+ LLVMAddIncoming(state->counter, &next, &block, 1);
+
+ LLVMPositionBuilderAtEnd(builder, after_block);
+}
+
/*
@@ -648,7 +677,9 @@ lp_build_if(struct lp_build_if_state *ctx,
ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
/* add add the initial value of the var from the entry block */
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1);
+ if (!LLVMIsUndef(*flow->variables[i]))
+ LLVMAddIncoming(ifthen->phi[i], flow->variables[i],
+ &ifthen->entry_block, 1);
}
/* create/insert true_block before merge_block */
@@ -695,18 +726,21 @@ lp_build_endif(struct lp_build_if_state *ctx)
{
struct lp_build_flow_context *flow = ctx->flow;
struct lp_build_flow_if *ifthen;
+ LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder);
unsigned i;
ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen;
assert(ifthen);
+ /* Insert branch to the merge block from current block */
+ LLVMBuildBr(ctx->builder, ifthen->merge_block);
+
if (ifthen->false_block) {
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
/* for each variable, update the Phi node with a (variable, block) pair */
for (i = 0; i < flow->num_variables; i++) {
assert(*flow->variables[i]);
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1);
-
+ LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1);
/* replace the variable ref with the phi function */
*flow->variables[i] = ifthen->phi[i];
}
@@ -742,16 +776,94 @@ lp_build_endif(struct lp_build_if_state *ctx)
ifthen->true_block, ifthen->merge_block);
}
- /* Append an unconditional Br(anch) instruction on the true_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
- LLVMBuildBr(ctx->builder, ifthen->merge_block);
+ /* Insert branch from end of true_block to merge_block */
if (ifthen->false_block) {
- /* Append an unconditional Br(anch) instruction on the false_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block);
+ /* Append an unconditional Br(anch) instruction on the true_block */
+ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
LLVMBuildBr(ctx->builder, ifthen->merge_block);
}
-
+ else {
+ /* No else clause.
+ * Note that we've already inserted the branch at the end of
+ * true_block. See the very first LLVMBuildBr() call in this function.
+ */
+ }
/* Resume building code at end of the ifthen->merge_block */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
}
+
+
+/**
+ * Allocate a scalar (or vector) variable.
+ *
+ * Although not strictly part of control flow, control flow has deep impact in
+ * how variables should be allocated.
+ *
+ * The mem2reg optimization pass is the recommended way to dealing with mutable
+ * variables, and SSA. It looks for allocas and if it can handle them, it
+ * promotes them, but only looks for alloca instructions in the entry block of
+ * the function. Being in the entry block guarantees that the alloca is only
+ * executed once, which makes analysis simpler.
+ *
+ * See also:
+ * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory
+ */
+LLVMValueRef
+lp_build_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ const char *name)
+{
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+ LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+ LLVMBuilderRef first_builder = LLVMCreateBuilder();
+ LLVMValueRef res;
+
+ LLVMPositionBuilderAtEnd(first_builder, first_block);
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+
+ res = LLVMBuildAlloca(first_builder, type, name);
+
+ LLVMDisposeBuilder(first_builder);
+
+ return res;
+}
+
+
+/**
+ * Allocate an array of scalars/vectors.
+ *
+ * mem2reg pass is not capable of promoting structs or arrays to registers, but
+ * we still put it in the first block anyway as failure to put allocas in the
+ * first block may prevent the X86 backend from successfully align the stack as
+ * required.
+ *
+ * Also the scalarrepl pass is supossedly more powerful and can promote
+ * arrays in many cases.
+ *
+ * See also:
+ * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory
+ */
+LLVMValueRef
+lp_build_array_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ LLVMValueRef count,
+ const char *name)
+{
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+ LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+ LLVMBuilderRef first_builder = LLVMCreateBuilder();
+ LLVMValueRef res;
+
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+
+ res = LLVMBuildArrayAlloca(first_builder, type, count, name);
+
+ LLVMDisposeBuilder(first_builder);
+
+ return res;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
index 4c225a0d4f9..fffb493a93b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -35,7 +35,7 @@
#define LP_BLD_FLOW_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
struct lp_type;
@@ -124,6 +124,13 @@ lp_build_loop_end(LLVMBuilderRef builder,
LLVMValueRef step,
struct lp_build_loop_state *state);
+void
+lp_build_loop_end_cond(LLVMBuilderRef builder,
+ LLVMValueRef end,
+ LLVMValueRef step,
+ int cond, /* LLVM condition */
+ struct lp_build_loop_state *state);
+
@@ -145,7 +152,19 @@ lp_build_else(struct lp_build_if_state *ctx);
void
lp_build_endif(struct lp_build_if_state *ctx);
-
+LLVMBasicBlockRef
+lp_build_insert_new_block(LLVMBuilderRef builder, const char *name);
+
+LLVMValueRef
+lp_build_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ const char *name);
+
+LLVMValueRef
+lp_build_array_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ LLVMValueRef count,
+ const char *name);
#endif /* !LP_BLD_FLOW_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 970bee379f5..085937588ff 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -34,7 +34,7 @@
* Pixel format helpers.
*/
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
#include "pipe/p_format.h"
@@ -42,35 +42,37 @@ struct util_format_description;
struct lp_type;
-boolean
-lp_format_is_rgba8(const struct util_format_description *desc);
-
-
-void
-lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
- struct lp_type type,
- const LLVMValueRef *unswizzled,
- LLVMValueRef *swizzled);
-
+/*
+ * AoS
+ */
LLVMValueRef
lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *desc,
LLVMValueRef packed);
-
-LLVMValueRef
-lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
- const struct util_format_description *desc,
- struct lp_type type,
- LLVMValueRef packed);
-
-
LLVMValueRef
lp_build_pack_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *desc,
LLVMValueRef rgba);
+LLVMValueRef
+lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ LLVMValueRef ptr,
+ LLVMValueRef i,
+ LLVMValueRef j);
+
+
+/*
+ * SoA
+ */
+
+void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+ struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ LLVMValueRef *swizzled);
void
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
@@ -80,4 +82,15 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef *rgba);
+void
+lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ LLVMValueRef i,
+ LLVMValueRef j,
+ LLVMValueRef *rgba);
+
+
#endif /* !LP_BLD_FORMAT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index a07f7418f2c..6257e9a4047 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -33,12 +33,14 @@
*/
-#include "util/u_cpu_detect.h"
#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_string.h"
+#include "lp_bld_init.h"
#include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_swizzle.h"
+#include "lp_bld_flow.h"
#include "lp_bld_format.h"
@@ -48,16 +50,12 @@
* @param packed integer.
*
* @return RGBA in a 4 floats vector.
- *
- * XXX: This is mostly for reference and testing -- operating a single pixel at
- * a time is rarely if ever needed.
*/
LLVMValueRef
lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *desc,
LLVMValueRef packed)
{
- LLVMTypeRef type;
LLVMValueRef shifted, casted, scaled, masked;
LLVMValueRef shifts[4];
LLVMValueRef masks[4];
@@ -66,17 +64,16 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
LLVMValueRef aux[4];
bool normalized;
int empty_channel;
+ bool needs_uitofp;
unsigned shift;
unsigned i;
- /* FIXME: Support more formats */
+ /* TODO: Support more formats */
assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
assert(desc->block.width == 1);
assert(desc->block.height == 1);
assert(desc->block.bits <= 32);
- type = LLVMIntType(desc->block.bits);
-
/* Do the intermediate integer computations with 32bit integers since it
* matches floating point size */
if (desc->block.bits < 32)
@@ -96,6 +93,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
/* Initialize vector constants */
normalized = FALSE;
+ needs_uitofp = FALSE;
empty_channel = -1;
shift = 0;
for (i = 0; i < 4; ++i) {
@@ -108,10 +106,13 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
empty_channel = i;
}
else {
- unsigned mask = (1 << bits) - 1;
+ unsigned long long mask = (1ULL << bits) - 1;
assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
- assert(bits < 32);
+
+ if (bits == 32) {
+ needs_uitofp = TRUE;
+ }
shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
@@ -129,8 +130,12 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
- /* UIToFP can't be expressed in SSE2 */
- casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
+ if (!needs_uitofp) {
+ /* UIToFP can't be expressed in SSE2 */
+ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
+ } else {
+ casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
+ }
if (normalized)
scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
@@ -141,7 +146,22 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
aux[i] = LLVMGetUndef(LLVMFloatType());
for (i = 0; i < 4; ++i) {
- enum util_format_swizzle swizzle = desc->swizzle[i];
+ enum util_format_swizzle swizzle;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ /*
+ * For ZS formats do RGBA = ZZZ1
+ */
+ if (i == 3) {
+ swizzle = UTIL_FORMAT_SWIZZLE_1;
+ } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
+ swizzle = UTIL_FORMAT_SWIZZLE_0;
+ } else {
+ swizzle = desc->swizzle[0];
+ }
+ } else {
+ swizzle = desc->swizzle[i];
+ }
switch (swizzle) {
case UTIL_FORMAT_SWIZZLE_X:
@@ -170,117 +190,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
/**
- * Take a vector with packed pixels and unpack into a rgba8 vector.
- *
- * Formats with bit depth smaller than 32bits are accepted, but they must be
- * padded to 32bits.
- */
-LLVMValueRef
-lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
- const struct util_format_description *desc,
- struct lp_type type,
- LLVMValueRef packed)
-{
- struct lp_build_context bld;
- bool rgba8;
- LLVMValueRef res;
- unsigned i;
-
- lp_build_context_init(&bld, builder, type);
-
- /* FIXME: Support more formats */
- assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
- assert(desc->block.width == 1);
- assert(desc->block.height == 1);
- assert(desc->block.bits <= 32);
-
- assert(!type.floating);
- assert(!type.fixed);
- assert(type.norm);
- assert(type.width == 8);
- assert(type.length % 4 == 0);
-
- rgba8 = TRUE;
- for(i = 0; i < 4; ++i) {
- assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
- desc->channel[i].type == UTIL_FORMAT_TYPE_VOID);
- if(desc->channel[0].size != 8)
- rgba8 = FALSE;
- }
-
- if(rgba8) {
- /*
- * The pixel is already in a rgba8 format variant. All it is necessary
- * is to swizzle the channels.
- */
-
- unsigned char swizzles[4];
- boolean zeros[4]; /* bitwise AND mask */
- boolean ones[4]; /* bitwise OR mask */
- boolean swizzles_needed = FALSE;
- boolean zeros_needed = FALSE;
- boolean ones_needed = FALSE;
-
- for(i = 0; i < 4; ++i) {
- enum util_format_swizzle swizzle = desc->swizzle[i];
-
- /* Initialize with the no-op case */
- swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i;
- zeros[i] = TRUE;
- ones[i] = FALSE;
-
- switch (swizzle) {
- case UTIL_FORMAT_SWIZZLE_X:
- case UTIL_FORMAT_SWIZZLE_Y:
- case UTIL_FORMAT_SWIZZLE_Z:
- case UTIL_FORMAT_SWIZZLE_W:
- if(swizzle != swizzles[i]) {
- swizzles[i] = swizzle;
- swizzles_needed = TRUE;
- }
- break;
- case UTIL_FORMAT_SWIZZLE_0:
- zeros[i] = FALSE;
- zeros_needed = TRUE;
- break;
- case UTIL_FORMAT_SWIZZLE_1:
- ones[i] = TRUE;
- ones_needed = TRUE;
- break;
- case UTIL_FORMAT_SWIZZLE_NONE:
- assert(0);
- break;
- }
- }
-
- res = packed;
-
- if(swizzles_needed)
- res = lp_build_swizzle1_aos(&bld, res, swizzles);
-
- if(zeros_needed) {
- /* Mask out zero channels */
- LLVMValueRef mask = lp_build_const_mask_aos(type, zeros);
- res = LLVMBuildAnd(builder, res, mask, "");
- }
-
- if(ones_needed) {
- /* Or one channels */
- LLVMValueRef mask = lp_build_const_mask_aos(type, ones);
- res = LLVMBuildOr(builder, res, mask, "");
- }
- }
- else {
- /* FIXME */
- assert(0);
- res = lp_build_undef(type);
- }
-
- return res;
-}
-
-
-/**
* Pack a single pixel.
*
* @param rgba 4 float vector with the unpacked components.
@@ -381,3 +290,106 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
return packed;
}
+
+
+/**
+ * Fetch a pixel into a 4 float AoS.
+ *
+ * i and j are the sub-block pixel coordinates.
+ */
+LLVMValueRef
+lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ LLVMValueRef ptr,
+ LLVMValueRef i,
+ LLVMValueRef j)
+{
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+ (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
+ format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
+ format_desc->block.width == 1 &&
+ format_desc->block.height == 1 &&
+ util_is_pot(format_desc->block.bits) &&
+ format_desc->block.bits <= 32 &&
+ format_desc->is_bitmask &&
+ !format_desc->is_mixed &&
+ (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+ format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
+ {
+ LLVMValueRef packed;
+
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
+ "");
+
+ packed = LLVMBuildLoad(builder, ptr, "packed");
+
+ return lp_build_unpack_rgba_aos(builder, format_desc, packed);
+ }
+ else if (format_desc->fetch_rgba_float) {
+ /*
+ * Fallback to calling util_format_description::fetch_rgba_float.
+ *
+ * This is definitely not the most efficient way of fetching pixels, as
+ * we miss the opportunity to do vectorization, but this it is a
+ * convenient for formats or scenarios for which there was no opportunity
+ * or incentive to optimize.
+ */
+
+ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+ char name[256];
+ LLVMValueRef function;
+ LLVMValueRef tmp;
+ LLVMValueRef args[4];
+
+ util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
+ format_desc->short_name);
+
+ /*
+ * Declare and bind format_desc->fetch_rgba_float().
+ */
+
+ function = LLVMGetNamedFunction(module, name);
+ if (!function) {
+ LLVMTypeRef ret_type;
+ LLVMTypeRef arg_types[4];
+ LLVMTypeRef function_type;
+
+ ret_type = LLVMVoidType();
+ arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+ arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+ arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
+ function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
+ function = LLVMAddFunction(module, name, function_type);
+
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+ LLVMSetLinkage(function, LLVMExternalLinkage);
+
+ assert(LLVMIsDeclaration(function));
+
+ LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
+ }
+
+ tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+
+ /*
+ * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
+ * in the SoA vectors.
+ */
+
+ args[0] = LLVMBuildBitCast(builder, tmp,
+ LLVMPointerType(LLVMFloatType(), 0), "");
+ args[1] = ptr;
+ args[2] = i;
+ args[3] = j;
+
+ LLVMBuildCall(builder, function, args, 4, "");
+
+ return LLVMBuildLoad(builder, tmp, "");
+ }
+ else {
+ assert(0);
+ return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
+ }
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_query.c b/src/gallium/auxiliary/gallivm/lp_bld_format_query.c
deleted file mode 100644
index f3832d07ff9..00000000000
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_query.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * Utility functions to make assertions about formats.
- *
- * This module centralizes most of logic used when determining what algorithm
- * is most suitable (i.e., most efficient yet correct) for a given format.
- *
- * It might be possible to move some of these functions to u_format module,
- * but since tiny differences in the format my render it more/less
- * appropriate to a given algorithm it is impossible to make any long term
- * guarantee about the semantics of these functions.
- *
- * @author Jose Fonseca <[email protected]>
- */
-
-
-#include "util/u_format.h"
-
-#include "lp_bld_format.h"
-
-
-/**
- * Whether this format is a 4 rgba8 variant
- */
-boolean
-lp_format_is_rgba8(const struct util_format_description *desc)
-{
- unsigned chan;
-
- if(desc->block.width != 1 ||
- desc->block.height != 1 ||
- desc->block.bits != 32)
- return FALSE;
-
- for(chan = 0; chan < 4; ++chan) {
- if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED &&
- desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED &&
- desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID)
- return FALSE;
- if(desc->channel[chan].size != 8)
- return FALSE;
- }
-
- return TRUE;
-}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index abb27e4c328..26b947b3b1c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -27,10 +27,13 @@
#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
+#include "lp_bld_sample.h" /* for lp_build_gather */
#include "lp_bld_format.h"
@@ -80,6 +83,24 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
}
+/**
+ * Unpack several pixels in SoA.
+ *
+ * It takes a vector of packed pixels:
+ *
+ * packed = {P0, P1, P2, P3, ..., Pn}
+ *
+ * And will produce four vectors:
+ *
+ * red = {R0, R1, R2, R3, ..., Rn}
+ * green = {G0, G1, G2, G3, ..., Gn}
+ * blue = {B0, B1, B2, B3, ..., Bn}
+ * alpha = {A0, A1, A2, A3, ..., An}
+ *
+ * It requires that a packed pixel fits into an element of the output
+ * channels. The common case is when converting pixel with a depth of 32 bit or
+ * less into floats.
+ */
void
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
@@ -91,15 +112,17 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
unsigned start;
unsigned chan;
- /* FIXME: Support more formats */
assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
assert(format_desc->block.width == 1);
assert(format_desc->block.height == 1);
- assert(format_desc->block.bits <= 32);
+ assert(format_desc->block.bits <= type.width);
+ /* FIXME: Support more output types */
+ assert(type.floating);
+ assert(type.width == 32);
/* Decode the input vector components */
start = 0;
- for (chan = 0; chan < 4; ++chan) {
+ for (chan = 0; chan < format_desc->nr_channels; ++chan) {
unsigned width = format_desc->channel[chan].size;
unsigned stop = start + width;
LLVMValueRef input;
@@ -108,22 +131,106 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
switch(format_desc->channel[chan].type) {
case UTIL_FORMAT_TYPE_VOID:
- input = NULL;
+ input = lp_build_undef(type);
break;
case UTIL_FORMAT_TYPE_UNSIGNED:
- if(type.floating) {
- if(start)
- input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), "");
- if(stop < format_desc->block.bits) {
- unsigned mask = ((unsigned long long)1 << width) - 1;
- input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), "");
- }
+ /*
+ * Align the LSB
+ */
+
+ if (start) {
+ input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
+ }
+
+ /*
+ * Zero the MSBs
+ */
+ if (stop < format_desc->block.bits) {
+ unsigned mask = ((unsigned long long)1 << width) - 1;
+ input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
+ }
+
+ /*
+ * Type conversion
+ */
+
+ if (type.floating) {
if(format_desc->channel[chan].normalized)
input = lp_build_unsigned_norm_to_float(builder, width, type, input);
else
- input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), "");
+ input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
+ }
+ else {
+ /* FIXME */
+ assert(0);
+ input = lp_build_undef(type);
+ }
+
+ break;
+
+ case UTIL_FORMAT_TYPE_SIGNED:
+ /*
+ * Align the sign bit first.
+ */
+
+ if (stop < type.width) {
+ unsigned bits = type.width - stop;
+ LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
+ input = LLVMBuildShl(builder, input, bits_val, "");
+ }
+
+ /*
+ * Align the LSB (with an arithmetic shift to preserve the sign)
+ */
+
+ if (format_desc->channel[chan].size < type.width) {
+ unsigned bits = type.width - format_desc->channel[chan].size;
+ LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
+ input = LLVMBuildAShr(builder, input, bits_val, "");
+ }
+
+ /*
+ * Type conversion
+ */
+
+ if (type.floating) {
+ input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
+ if (format_desc->channel[chan].normalized) {
+ double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
+ LLVMValueRef scale_val = lp_build_const_vec(type, scale);
+ input = LLVMBuildMul(builder, input, scale_val, "");
+ }
+ }
+ else {
+ /* FIXME */
+ assert(0);
+ input = lp_build_undef(type);
+ }
+
+ break;
+
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if (type.floating) {
+ assert(start == 0);
+ assert(stop == 32);
+ assert(type.width == 32);
+ input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
+ }
+ else {
+ /* FIXME */
+ assert(0);
+ input = lp_build_undef(type);
+ }
+ break;
+
+ case UTIL_FORMAT_TYPE_FIXED:
+ if (type.floating) {
+ double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
+ LLVMValueRef scale_val = lp_build_const_vec(type, scale);
+ input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
+ input = LLVMBuildMul(builder, input, scale_val, "");
}
else {
/* FIXME */
@@ -133,7 +240,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
break;
default:
- /* fall through */
+ assert(0);
input = lp_build_undef(type);
break;
}
@@ -145,3 +252,100 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
}
+
+
+/**
+ * Fetch a pixel into a SoA.
+ *
+ * i and j are the sub-block pixel coordinates.
+ */
+void
+lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef i,
+ LLVMValueRef j,
+ LLVMValueRef *rgba)
+{
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+ (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
+ format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
+ format_desc->block.width == 1 &&
+ format_desc->block.height == 1 &&
+ format_desc->block.bits <= type.width &&
+ (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
+ format_desc->channel[0].size == 32))
+ {
+ /*
+ * The packed pixel fits into an element of the destination format. Put
+ * the packed pixels into a vector and estract each component for all
+ * vector elements in parallel.
+ */
+
+ LLVMValueRef packed;
+
+ /*
+ * gather the texels from the texture
+ */
+ packed = lp_build_gather(builder,
+ type.length,
+ format_desc->block.bits,
+ type.width,
+ base_ptr, offset);
+
+ /*
+ * convert texels to float rgba
+ */
+ lp_build_unpack_rgba_soa(builder,
+ format_desc,
+ type,
+ packed, rgba);
+ }
+ else {
+ /*
+ * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
+ *
+ * This is not the most efficient way of fetching pixels, as
+ * we miss some opportunities to do vectorization, but this it is a
+ * convenient for formats or scenarios for which there was no opportunity
+ * or incentive to optimize.
+ */
+
+ unsigned k, chan;
+
+ assert(type.floating);
+
+ for (chan = 0; chan < 4; ++chan) {
+ rgba[chan] = lp_build_undef(type);
+ }
+
+ for(k = 0; k < type.length; ++k) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+ LLVMValueRef offset_elem;
+ LLVMValueRef ptr;
+ LLVMValueRef i_elem, j_elem;
+ LLVMValueRef tmp;
+
+ offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
+ ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
+
+ i_elem = LLVMBuildExtractElement(builder, i, index, "");
+ j_elem = LLVMBuildExtractElement(builder, j, index, "");
+
+ tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem);
+
+ /*
+ * AoS to SoA
+ */
+
+ for (chan = 0; chan < 4; ++chan) {
+ LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
+ tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
+ rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
+ }
+ }
+ }
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.cpp b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 067397a520b..5067d0a164f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -26,33 +26,52 @@
**************************************************************************/
-#include <llvm/Config/config.h>
-#include <llvm/Target/TargetSelect.h>
-#include <llvm/Target/TargetOptions.h>
-
-#include "pipe/p_config.h"
-
+#include "pipe/p_compiler.h"
+#include "util/u_cpu_detect.h"
+#include "util/u_debug.h"
#include "lp_bld_init.h"
-extern "C" void LLVMLinkInJIT();
+LLVMModuleRef lp_build_module = NULL;
+LLVMExecutionEngineRef lp_build_engine = NULL;
+LLVMModuleProviderRef lp_build_provider = NULL;
+LLVMTargetDataRef lp_build_target = NULL;
-extern "C" void
+void
lp_build_init(void)
{
-#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86)
- /*
- * This is mis-detected on some hardware / software combinations.
- */
- llvm::StackAlignment = 4;
- llvm::RealignStack = true;
-#endif
-
- /* Same as LLVMInitializeNativeTarget(); */
- llvm::InitializeNativeTarget();
+ LLVMInitializeNativeTarget();
LLVMLinkInJIT();
+
+ if (!lp_build_module)
+ lp_build_module = LLVMModuleCreateWithName("gallivm");
+
+ if (!lp_build_provider)
+ lp_build_provider = LLVMCreateModuleProviderForExistingModule(lp_build_module);
+
+ if (!lp_build_engine) {
+ char *error = NULL;
+
+ if (LLVMCreateJITCompiler(&lp_build_engine, lp_build_provider, 1, &error)) {
+ _debug_printf("%s\n", error);
+ LLVMDisposeMessage(error);
+ assert(0);
+ }
+ }
+
+ if (!lp_build_target)
+ lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine);
+
+ util_cpu_detect();
+
+#if 0
+ /* For simulating less capable machines */
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_ssse3 = 0;
+ util_cpu_caps.has_sse4_1 = 0;
+#endif
}
@@ -64,6 +83,6 @@ lp_build_init(void)
*/
#if defined(_MSC_VER) && defined(_DEBUG)
#include <crtdefs.h>
-extern "C" _CRTIMP void __cdecl
+_CRTIMP void __cdecl
_invalid_parameter_noinfo(void) {}
#endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index 07f50d1c433..0ec2afcd1be 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -30,18 +30,18 @@
#define LP_BLD_INIT_H
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "lp_bld.h"
+#include <llvm-c/ExecutionEngine.h>
-void
-lp_build_init(void);
+extern LLVMModuleRef lp_build_module;
+extern LLVMExecutionEngineRef lp_build_engine;
+extern LLVMModuleProviderRef lp_build_provider;
+extern LLVMTargetDataRef lp_build_target;
-#ifdef __cplusplus
-}
-#endif
+void
+lp_build_init(void);
#endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.c b/src/gallium/auxiliary/gallivm/lp_bld_interp.c
deleted file mode 100644
index 2fc894017d8..00000000000
--- a/src/gallium/auxiliary/gallivm/lp_bld_interp.c
+++ /dev/null
@@ -1,408 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * Position and shader input interpolation.
- *
- * @author Jose Fonseca <[email protected]>
- */
-
-#include "pipe/p_shader_tokens.h"
-#include "util/u_debug.h"
-#include "util/u_memory.h"
-#include "util/u_math.h"
-#include "tgsi/tgsi_parse.h"
-#include "lp_bld_debug.h"
-#include "lp_bld_const.h"
-#include "lp_bld_arit.h"
-#include "lp_bld_swizzle.h"
-#include "lp_bld_interp.h"
-
-
-/*
- * The shader JIT function operates on blocks of quads.
- * Each block has 2x2 quads and each quad has 2x2 pixels.
- *
- * We iterate over the quads in order 0, 1, 2, 3:
- *
- * #################
- * # | # | #
- * #---0---#---1---#
- * # | # | #
- * #################
- * # | # | #
- * #---2---#---3---#
- * # | # | #
- * #################
- *
- * Within each quad, we have four pixels which are represented in SOA
- * order:
- *
- * #########
- * # 0 | 1 #
- * #---+---#
- * # 2 | 3 #
- * #########
- *
- * So the green channel (for example) of the four pixels is stored in
- * a single vector register: {g0, g1, g2, g3}.
- */
-
-
-static void
-attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
-{
- if(attrib == 0)
- lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
- else
- lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
-}
-
-
-/**
- * Initialize the bld->a0, dadx, dady fields. This involves fetching
- * those values from the arrays which are passed into the JIT function.
- */
-static void
-coeffs_init(struct lp_build_interp_soa_context *bld,
- LLVMValueRef a0_ptr,
- LLVMValueRef dadx_ptr,
- LLVMValueRef dady_ptr)
-{
- LLVMBuilderRef builder = bld->base.builder;
- unsigned attrib;
- unsigned chan;
-
- for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
- unsigned mask = bld->mask[attrib];
- unsigned mode = bld->mode[attrib];
- for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- if(mask & (1 << chan)) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
- LLVMValueRef a0 = NULL;
- LLVMValueRef dadx = NULL;
- LLVMValueRef dady = NULL;
-
- switch( mode ) {
- case TGSI_INTERPOLATE_PERSPECTIVE:
- /* fall-through */
-
- case TGSI_INTERPOLATE_LINEAR:
- dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
- dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
- dadx = lp_build_broadcast_scalar(&bld->base, dadx);
- dady = lp_build_broadcast_scalar(&bld->base, dady);
- attrib_name(dadx, attrib, chan, ".dadx");
- attrib_name(dady, attrib, chan, ".dady");
- /* fall-through */
-
- case TGSI_INTERPOLATE_CONSTANT:
- a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
- a0 = lp_build_broadcast_scalar(&bld->base, a0);
- attrib_name(a0, attrib, chan, ".a0");
- break;
-
- default:
- assert(0);
- break;
- }
-
- bld->a0 [attrib][chan] = a0;
- bld->dadx[attrib][chan] = dadx;
- bld->dady[attrib][chan] = dady;
- }
- }
- }
-}
-
-
-/**
- * Emit LLVM code to compute the fragment shader input attribute values.
- * For example, for a color input, we'll compute red, green, blue and alpha
- * values for the four pixels in a quad.
- * Recall that we're operating on 4-element vectors so each arithmetic
- * operation is operating on the four pixels in a quad.
- */
-static void
-attribs_init(struct lp_build_interp_soa_context *bld)
-{
- LLVMValueRef x = bld->pos[0];
- LLVMValueRef y = bld->pos[1];
- LLVMValueRef oow = NULL;
- unsigned attrib;
- unsigned chan;
-
- for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
- unsigned mask = bld->mask[attrib];
- unsigned mode = bld->mode[attrib];
- for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- if(mask & (1 << chan)) {
- LLVMValueRef a0 = bld->a0 [attrib][chan];
- LLVMValueRef dadx = bld->dadx[attrib][chan];
- LLVMValueRef dady = bld->dady[attrib][chan];
- LLVMValueRef res;
-
- res = a0;
-
- if (mode != TGSI_INTERPOLATE_CONSTANT) {
- /* res = res + x * dadx */
- res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx));
- /* res = res + y * dady */
- res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady));
- }
-
- /* Keep the value of the attribue before perspective divide for faster updates */
- bld->attribs_pre[attrib][chan] = res;
-
- if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
- LLVMValueRef w = bld->pos[3];
- assert(attrib != 0);
- if(!oow)
- oow = lp_build_rcp(&bld->base, w);
- res = lp_build_mul(&bld->base, res, oow);
- }
-
- attrib_name(res, attrib, chan, "");
-
- bld->attribs[attrib][chan] = res;
- }
- }
- }
-}
-
-
-/**
- * Increment the shader input attribute values.
- * This is called when we move from one quad to the next.
- */
-static void
-attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
-{
- LLVMValueRef oow = NULL;
- unsigned attrib;
- unsigned chan;
-
- assert(quad_index < 4);
-
- for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
- unsigned mask = bld->mask[attrib];
- unsigned mode = bld->mode[attrib];
-
- if (mode != TGSI_INTERPOLATE_CONSTANT) {
- for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- if(mask & (1 << chan)) {
- LLVMValueRef dadx = bld->dadx[attrib][chan];
- LLVMValueRef dady = bld->dady[attrib][chan];
- LLVMValueRef res;
-
- res = bld->attribs_pre[attrib][chan];
-
- if (quad_index == 1 || quad_index == 3) {
- /* top-right or bottom-right quad */
- /* build res = res + dadx + dadx */
- res = lp_build_add(&bld->base, res, dadx);
- res = lp_build_add(&bld->base, res, dadx);
- }
-
- if (quad_index == 2 || quad_index == 3) {
- /* bottom-left or bottom-right quad */
- /* build res = res + dady + dady */
- res = lp_build_add(&bld->base, res, dady);
- res = lp_build_add(&bld->base, res, dady);
- }
-
- //XXX bld->attribs_pre[attrib][chan] = res;
-
- if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
- LLVMValueRef w = bld->pos[3];
- assert(attrib != 0);
- if(!oow)
- oow = lp_build_rcp(&bld->base, w);
- res = lp_build_mul(&bld->base, res, oow);
- }
-
- attrib_name(res, attrib, chan, "");
-
- bld->attribs[attrib][chan] = res;
- }
- }
- }
- }
-}
-
-
-/**
- * Generate the position vectors.
- *
- * Parameter x0, y0 are the integer values with the quad upper left coordinates.
- */
-static void
-pos_init(struct lp_build_interp_soa_context *bld,
- LLVMValueRef x0,
- LLVMValueRef y0)
-{
- lp_build_name(x0, "pos.x");
- lp_build_name(y0, "pos.y");
-
- bld->attribs[0][0] = x0;
- bld->attribs[0][1] = y0;
-}
-
-
-/**
- * Update quad position values when moving to the next quad.
- */
-static void
-pos_update(struct lp_build_interp_soa_context *bld, int quad_index)
-{
- LLVMValueRef x = bld->attribs[0][0];
- LLVMValueRef y = bld->attribs[0][1];
- const int xstep = 2, ystep = 2;
-
- if (quad_index == 1 || quad_index == 3) {
- /* top-right or bottom-right quad in block */
- /* build x += xstep */
- x = lp_build_add(&bld->base, x,
- lp_build_const_scalar(bld->base.type, xstep));
- }
-
- if (quad_index == 2) {
- /* bottom-left quad in block */
- /* build y += ystep */
- y = lp_build_add(&bld->base, y,
- lp_build_const_scalar(bld->base.type, ystep));
- /* build x -= xstep */
- x = lp_build_sub(&bld->base, x,
- lp_build_const_scalar(bld->base.type, xstep));
- }
-
- lp_build_name(x, "pos.x");
- lp_build_name(y, "pos.y");
-
- bld->attribs[0][0] = x;
- bld->attribs[0][1] = y;
-}
-
-
-/**
- * Initialize fragment shader input attribute info.
- */
-void
-lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
- const struct tgsi_token *tokens,
- boolean flatshade,
- LLVMBuilderRef builder,
- struct lp_type type,
- LLVMValueRef a0_ptr,
- LLVMValueRef dadx_ptr,
- LLVMValueRef dady_ptr,
- LLVMValueRef x0,
- LLVMValueRef y0)
-{
- struct tgsi_parse_context parse;
- struct tgsi_full_declaration *decl;
-
- memset(bld, 0, sizeof *bld);
-
- lp_build_context_init(&bld->base, builder, type);
-
- /* For convenience */
- bld->pos = bld->attribs[0];
- bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
-
- /* Position */
- bld->num_attribs = 1;
- bld->mask[0] = TGSI_WRITEMASK_ZW;
- bld->mode[0] = TGSI_INTERPOLATE_LINEAR;
-
- /* Inputs */
- tgsi_parse_init( &parse, tokens );
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
- tgsi_parse_token( &parse );
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- decl = &parse.FullToken.FullDeclaration;
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- unsigned attrib;
-
- first = decl->Range.First;
- last = decl->Range.Last;
- mask = decl->Declaration.UsageMask;
-
- for( attrib = first; attrib <= last; ++attrib ) {
- bld->mask[1 + attrib] = mask;
-
- /* XXX: have mesa set INTERP_CONSTANT in the fragment
- * shader.
- */
- if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
- flatshade)
- bld->mode[1 + attrib] = TGSI_INTERPOLATE_CONSTANT;
- else
- bld->mode[1 + attrib] = decl->Declaration.Interpolate;
- }
-
- bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1);
- }
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- case TGSI_TOKEN_TYPE_PROPERTY:
- break;
-
- default:
- assert( 0 );
- }
- }
- tgsi_parse_free( &parse );
-
- coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
-
- pos_init(bld, x0, y0);
-
- attribs_init(bld);
-}
-
-
-/**
- * Advance the position and inputs to the given quad within the block.
- */
-void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
- int quad_index)
-{
- assert(quad_index < 4);
-
- pos_update(bld, quad_index);
-
- attribs_update(bld, quad_index);
-}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h
deleted file mode 100644
index ca958cdf343..00000000000
--- a/src/gallium/auxiliary/gallivm/lp_bld_interp.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * Position and shader input interpolation.
- *
- * Special attention is given to the interpolation of side by side quads.
- * Multiplications are made only for the first quad. Interpolation of
- * inputs for posterior quads are done exclusively with additions, and
- * perspective divide if necessary.
- *
- * @author Jose Fonseca <[email protected]>
- */
-
-#ifndef LP_BLD_INTERP_H
-#define LP_BLD_INTERP_H
-
-
-#include <llvm-c/Core.h>
-
-#include "tgsi/tgsi_exec.h"
-
-#include "lp_bld_type.h"
-
-
-struct tgsi_token;
-
-
-struct lp_build_interp_soa_context
-{
- struct lp_build_context base;
-
- unsigned num_attribs;
- unsigned mask[1 + PIPE_MAX_SHADER_INPUTS];
- unsigned mode[1 + PIPE_MAX_SHADER_INPUTS];
-
- LLVMValueRef a0 [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
- LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
- LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-
- /* Attribute values before perspective divide */
- LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-
- LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-
- /*
- * Convenience pointers. Callers may access this one.
- */
- const LLVMValueRef *pos;
- const LLVMValueRef (*inputs)[NUM_CHANNELS];
-};
-
-
-void
-lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
- const struct tgsi_token *tokens,
- boolean flatshade,
- LLVMBuilderRef builder,
- struct lp_type type,
- LLVMValueRef a0_ptr,
- LLVMValueRef dadx_ptr,
- LLVMValueRef dady_ptr,
- LLVMValueRef x0,
- LLVMValueRef y0);
-
-void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
- int quad_index);
-
-
-#endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index f813f27074b..977f7673228 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -37,7 +37,7 @@
#define LP_BLD_INTR_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
/**
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 2726747eaea..d13fa1a5d04 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -42,6 +42,26 @@
#include "lp_bld_logic.h"
+/*
+ * XXX
+ *
+ * Selection with vector conditional like
+ *
+ * select <4 x i1> %C, %A, %B
+ *
+ * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
+ * supported on any backend.
+ *
+ * Expanding the boolean vector to full SIMD register width, as in
+ *
+ * sext <4 x i1> %C to <4 x i32>
+ *
+ * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
+ * it causes assertion failures in LLVM 2.6. It appears to work correctly on
+ * LLVM 2.7.
+ */
+
+
/**
* Build code to compare two values 'a' and 'b' of 'type' using the given func.
* \param func one of PIPE_FUNC_x
@@ -54,13 +74,11 @@ lp_build_compare(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b)
{
- LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
LLVMValueRef cond;
LLVMValueRef res;
- unsigned i;
assert(func >= PIPE_FUNC_NEVER);
assert(func <= PIPE_FUNC_ALWAYS);
@@ -74,10 +92,12 @@ lp_build_compare(LLVMBuilderRef builder,
/* XXX: It is not clear if we should use the ordered or unordered operators */
+#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
if(type.floating && util_cpu_caps.has_sse) {
/* float[4] comparison */
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef args[3];
unsigned cc;
boolean swap;
@@ -147,6 +167,7 @@ lp_build_compare(LLVMBuilderRef builder,
const char *pcmpgt;
LLVMValueRef args[2];
LLVMValueRef res;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
switch (type.width) {
case 8:
@@ -172,7 +193,7 @@ lp_build_compare(LLVMBuilderRef builder,
if(table[func].gt &&
((type.width == 8 && type.sign) ||
(type.width != 8 && !type.sign))) {
- LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
a = LLVMBuildXor(builder, a, msb, "");
b = LLVMBuildXor(builder, b, msb, "");
}
@@ -198,8 +219,9 @@ lp_build_compare(LLVMBuilderRef builder,
return res;
}
- }
+ } /* if (type.width * type.length == 128) */
#endif
+#endif /* HAVE_LLVM < 0x0207 */
if(type.floating) {
LLVMRealPredicate op;
@@ -233,25 +255,33 @@ lp_build_compare(LLVMBuilderRef builder,
return lp_build_undef(type);
}
-#if 0
- /* XXX: Although valid IR, no LLVM target currently support this */
+#if HAVE_LLVM >= 0x0207
cond = LLVMBuildFCmp(builder, op, a, b, "");
- res = LLVMBuildSelect(builder, cond, ones, zeros, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
- debug_printf("%s: warning: using slow element-wise vector comparison\n",
- __FUNCTION__);
- res = LLVMGetUndef(int_vec_type);
- for(i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- cond = LLVMBuildFCmp(builder, op,
- LLVMBuildExtractElement(builder, a, index, ""),
- LLVMBuildExtractElement(builder, b, index, ""),
- "");
- cond = LLVMBuildSelect(builder, cond,
- LLVMConstExtractElement(ones, index),
- LLVMConstExtractElement(zeros, index),
- "");
- res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ if (type.length == 1) {
+ cond = LLVMBuildFCmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+ }
+ else {
+ unsigned i;
+
+ res = LLVMGetUndef(int_vec_type);
+
+ debug_printf("%s: warning: using slow element-wise float"
+ " vector comparison\n", __FUNCTION__);
+ for (i = 0; i < type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ cond = LLVMBuildFCmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
}
#endif
}
@@ -281,25 +311,34 @@ lp_build_compare(LLVMBuilderRef builder,
return lp_build_undef(type);
}
-#if 0
- /* XXX: Although valid IR, no LLVM target currently support this */
+#if HAVE_LLVM >= 0x0207
cond = LLVMBuildICmp(builder, op, a, b, "");
- res = LLVMBuildSelect(builder, cond, ones, zeros, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
- debug_printf("%s: warning: using slow element-wise int vector comparison\n",
- __FUNCTION__);
- res = LLVMGetUndef(int_vec_type);
- for(i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- cond = LLVMBuildICmp(builder, op,
- LLVMBuildExtractElement(builder, a, index, ""),
- LLVMBuildExtractElement(builder, b, index, ""),
- "");
- cond = LLVMBuildSelect(builder, cond,
- LLVMConstExtractElement(ones, index),
- LLVMConstExtractElement(zeros, index),
- "");
- res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ if (type.length == 1) {
+ cond = LLVMBuildICmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+ }
+ else {
+ unsigned i;
+
+ res = LLVMGetUndef(int_vec_type);
+
+ debug_printf("%s: warning: using slow element-wise int"
+ " vector comparison\n", __FUNCTION__);
+
+ for(i = 0; i < type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ cond = LLVMBuildICmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
}
#endif
}
@@ -326,6 +365,8 @@ lp_build_cmp(struct lp_build_context *bld,
/**
* Return mask ? a : b;
+ *
+ * mask is a bitwise mask, composed of 0 or ~0 for each element.
*/
LLVMValueRef
lp_build_select(struct lp_build_context *bld,
@@ -339,26 +380,32 @@ lp_build_select(struct lp_build_context *bld,
if(a == b)
return a;
- if(type.floating) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ if (type.length == 1) {
+ mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
+ res = LLVMBuildSelect(bld->builder, mask, a, b, "");
}
+ else {
+ if(type.floating) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ }
- a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
- /* This often gets translated to PANDN, but sometimes the NOT is
- * pre-computed and stored in another constant. The best strategy depends
- * on available registers, so it is not a big deal -- hopefully LLVM does
- * the right decision attending the rest of the program.
- */
- b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+ /* This often gets translated to PANDN, but sometimes the NOT is
+ * pre-computed and stored in another constant. The best strategy depends
+ * on available registers, so it is not a big deal -- hopefully LLVM does
+ * the right decision attending the rest of the program.
+ */
+ b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
- res = LLVMBuildOr(bld->builder, a, b, "");
+ res = LLVMBuildOr(bld->builder, a, b, "");
- if(type.floating) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ if(type.floating) {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ }
}
return res;
@@ -425,14 +472,12 @@ lp_build_select_aos(struct lp_build_context *bld,
}
}
+
+/** Return (a & ~b) */
LLVMValueRef
-lp_build_alloca(struct lp_build_context *bld)
+lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
{
- const struct lp_type type = bld->type;
-
- if (type.length > 1) { /*vector*/
- return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), "");
- } else { /*scalar*/
- return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), "");
- }
+ b = LLVMBuildNot(bld->builder, b, "");
+ b = LLVMBuildAnd(bld->builder, a, b, "");
+ return b;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index a399ebf39ef..29f9fc3b205 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -37,7 +37,7 @@
#define LP_BLD_LOGIC_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
@@ -76,7 +76,9 @@ lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef b,
const boolean cond[4]);
+
LLVMValueRef
-lp_build_alloca(struct lp_build_context *bld);
+lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b);
+
#endif /* !LP_BLD_LOGIC_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index bc360ad77ad..186f8849b8d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -164,7 +164,7 @@ lp_build_unpack2(LLVMBuilderRef builder,
if(dst_type.sign && src_type.sign) {
/* Replicate the sign bit in the most significant bits */
- msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), "");
+ msb = LLVMBuildAShr(builder, src, lp_build_const_int_vec(src_type, src_type.width - 1), "");
}
else
/* Most significant bits always zero */
@@ -256,13 +256,13 @@ lp_build_pack2(LLVMBuilderRef builder,
LLVMValueRef lo,
LLVMValueRef hi)
{
+#if HAVE_LLVM < 0x0207
LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
+#endif
LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
LLVMValueRef shuffle;
LLVMValueRef res;
- dst_vec_type = lp_build_vec_type(dst_type);
-
assert(!src_type.floating);
assert(!dst_type.floating);
assert(src_type.width == dst_type.width * 2);
@@ -272,11 +272,14 @@ lp_build_pack2(LLVMBuilderRef builder,
switch(src_type.width) {
case 32:
if(dst_type.sign) {
+#if HAVE_LLVM >= 0x0207
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", dst_vec_type, lo, hi);
+#else
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
+#endif
}
else {
if (util_cpu_caps.has_sse4_1) {
- /* PACKUSDW is the only instrinsic with a consistent signature */
return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
}
else {
@@ -288,9 +291,17 @@ lp_build_pack2(LLVMBuilderRef builder,
case 16:
if(dst_type.sign)
+#if HAVE_LLVM >= 0x0207
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", dst_vec_type, lo, hi);
+#else
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
+#endif
else
+#if HAVE_LLVM >= 0x0207
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", dst_vec_type, lo, hi);
+#else
res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
+#endif
break;
default:
@@ -348,7 +359,7 @@ lp_build_packs2(LLVMBuilderRef builder,
if(clamp) {
struct lp_build_context bld;
unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
- LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
+ LLVMValueRef dst_max = lp_build_const_int_vec(src_type, ((unsigned long long)1 << dst_bits) - 1);
lp_build_context_init(&bld, builder, src_type);
lo = lp_build_min(&bld, lo, dst_max);
hi = lp_build_min(&bld, hi, dst_max);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index fb2a34984a4..41adeed220c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -37,7 +37,7 @@
#define LP_BLD_PACK_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
struct lp_type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
new file mode 100644
index 00000000000..153ba5b15b1
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
@@ -0,0 +1,121 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "lp_bld_printf.h"
+
+
+static int
+lp_get_printf_arg_count(const char *fmt)
+{
+ int count =0;
+ const char *p = fmt;
+ int c;
+
+ while ((c = *p++)) {
+ if (c != '%')
+ continue;
+ switch (*p) {
+ case '\0':
+ continue;
+ case '%':
+ p++;
+ continue;
+ case '.':
+ if (p[1] == '*' && p[2] == 's') {
+ count += 2;
+ p += 3;
+ continue;
+ }
+ /* fallthrough */
+ default:
+ count ++;
+ }
+ }
+ return count;
+}
+
+LLVMValueRef
+lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len)
+{
+ LLVMValueRef string = LLVMAddGlobal(module, LLVMArrayType(LLVMInt8Type(), len + 1), "");
+ LLVMSetGlobalConstant(string, TRUE);
+ LLVMSetLinkage(string, LLVMInternalLinkage);
+ LLVMSetInitializer(string, LLVMConstString(str, len + 1, TRUE));
+ return string;
+}
+
+
+/**
+ * lp_build_printf.
+ *
+ * Build printf call in LLVM IR. The output goes to stdout.
+ * The additional variable arguments need to have type
+ * LLVMValueRef.
+ */
+LLVMValueRef
+lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...)
+{
+ va_list arglist;
+ int i = 0;
+ int argcount = lp_get_printf_arg_count(fmt);
+ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+ LLVMValueRef params[50];
+ LLVMValueRef fmtarg = lp_build_const_string_variable(module, fmt, strlen(fmt) + 1);
+ LLVMValueRef int0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef index[2];
+ LLVMValueRef func_printf = LLVMGetNamedFunction(module, "printf");
+
+ assert(Elements(params) >= argcount + 1);
+
+ index[0] = index[1] = int0;
+
+ if (!func_printf) {
+ LLVMTypeRef printf_type = LLVMFunctionType(LLVMIntType(32), NULL, 0, 1);
+ func_printf = LLVMAddFunction(module, "printf", printf_type);
+ }
+
+ params[0] = LLVMBuildGEP(builder, fmtarg, index, 2, "");
+
+ va_start(arglist, fmt);
+ for (i = 1; i <= argcount; i++) {
+ LLVMValueRef val = va_arg(arglist, LLVMValueRef);
+ LLVMTypeRef type = LLVMTypeOf(val);
+ /* printf wants doubles, so lets convert so that
+ * we can actually print them */
+ if (LLVMGetTypeKind(type) == LLVMFloatTypeKind)
+ val = LLVMBuildFPExt(builder, val, LLVMDoubleType(), "");
+ params[i] = val;
+ }
+ va_end(arglist);
+
+ return LLVMBuildCall(builder, func_printf, params, argcount + 1, "");
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.h
index 7245730350c..83bd8f1d557 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,39 +25,15 @@
*
**************************************************************************/
-/**
- * Alpha testing to LLVM IR translation.
- *
- * @author Jose Fonseca <[email protected]>
- */
-
-#include "pipe/p_state.h"
-
-#include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_logic.h"
-#include "lp_bld_flow.h"
-#include "lp_bld_debug.h"
-#include "lp_bld_alpha.h"
-
+#ifndef LP_BLD_PRINTF_H
+#define LP_BLD_PRINTF_H
-void
-lp_build_alpha_test(LLVMBuilderRef builder,
- const struct pipe_alpha_state *state,
- struct lp_type type,
- struct lp_build_mask_context *mask,
- LLVMValueRef alpha,
- LLVMValueRef ref)
-{
- struct lp_build_context bld;
- lp_build_context_init(&bld, builder, type);
+#include "pipe/p_compiler.h"
+#include "lp_bld.h"
- if(state->enabled) {
- LLVMValueRef test = lp_build_cmp(&bld, state->func, alpha, ref);
+LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len);
+LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...);
- lp_build_name(test, "alpha_mask");
+#endif
- lp_build_mask_update(mask, test);
- }
-}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 55ac2e94363..195a4953ab1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -51,9 +51,11 @@
*/
void
lp_sampler_static_state(struct lp_sampler_static_state *state,
- const struct pipe_texture *texture,
+ const struct pipe_sampler_view *view,
const struct pipe_sampler_state *sampler)
{
+ const struct pipe_resource *texture = view->texture;
+
memset(state, 0, sizeof *state);
if(!texture)
@@ -62,7 +64,24 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
if(!sampler)
return;
- state->format = texture->format;
+ /*
+ * We don't copy sampler state over unless it is actually enabled, to avoid
+ * spurious recompiles, as the sampler static state is part of the shader
+ * key.
+ *
+ * Ideally the state tracker or cso_cache module would make all state
+ * canonical, but until that happens it's better to be safe than sorry here.
+ *
+ * XXX: Actually there's much more than can be done here, especially
+ * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
+ */
+
+ state->format = view->format;
+ state->swizzle_r = view->swizzle_r;
+ state->swizzle_g = view->swizzle_g;
+ state->swizzle_b = view->swizzle_b;
+ state->swizzle_a = view->swizzle_a;
+
state->target = texture->target;
state->pot_width = util_is_pot(texture->width0);
state->pot_height = util_is_pot(texture->height0);
@@ -72,17 +91,30 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->wrap_t = sampler->wrap_t;
state->wrap_r = sampler->wrap_r;
state->min_img_filter = sampler->min_img_filter;
- state->min_mip_filter = sampler->min_mip_filter;
state->mag_img_filter = sampler->mag_img_filter;
+ if (texture->last_level) {
+ state->min_mip_filter = sampler->min_mip_filter;
+ } else {
+ state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ }
+
state->compare_mode = sampler->compare_mode;
+ if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
+ state->compare_func = sampler->compare_func;
+ }
+
+ state->normalized_coords = sampler->normalized_coords;
+ state->lod_bias = sampler->lod_bias;
+ state->min_lod = sampler->min_lod;
+ state->max_lod = sampler->max_lod;
state->border_color[0] = sampler->border_color[0];
state->border_color[1] = sampler->border_color[1];
state->border_color[2] = sampler->border_color[2];
state->border_color[3] = sampler->border_color[3];
- if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
- state->compare_func = sampler->compare_func;
- }
- state->normalized_coords = sampler->normalized_coords;
+
+ /*
+ * FIXME: Handle the remainder of pipe_sampler_view.
+ */
}
@@ -136,62 +168,34 @@ lp_build_gather(LLVMBuilderRef builder,
/**
- * Compute the offset of a pixel.
+ * Compute the offset of a pixel block.
*
- * x, y, y_stride are vectors
+ * x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as
+ * per format description, and not individual pixels.
*/
LLVMValueRef
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
+ LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef data_ptr)
+ LLVMValueRef z_stride)
{
LLVMValueRef x_stride;
LLVMValueRef offset;
- x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8);
-
- if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- LLVMValueRef x_lo, x_hi;
- LLVMValueRef y_lo, y_hi;
- LLVMValueRef x_stride_lo, x_stride_hi;
- LLVMValueRef y_stride_lo, y_stride_hi;
- LLVMValueRef x_offset_lo, x_offset_hi;
- LLVMValueRef y_offset_lo, y_offset_hi;
- LLVMValueRef offset_lo, offset_hi;
-
- x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
- y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");
-
- x_hi = LLVMBuildLShr(bld->builder, x, bld->one, "");
- y_hi = LLVMBuildLShr(bld->builder, y, bld->one, "");
-
- x_stride_lo = x_stride;
- y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8);
+ x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
+ offset = lp_build_mul(bld, x, x_stride);
- x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8);
- y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, "");
-
- x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo);
- y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo);
- offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo);
-
- x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi);
- y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi);
- offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi);
-
- offset = lp_build_add(bld, offset_hi, offset_lo);
+ if (y && y_stride) {
+ LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
+ offset = lp_build_add(bld, offset, y_offset);
}
- else {
- LLVMValueRef x_offset;
- LLVMValueRef y_offset;
-
- x_offset = lp_build_mul(bld, x, x_stride);
- y_offset = lp_build_mul(bld, y, y_stride);
- offset = lp_build_add(bld, x_offset, y_offset);
+ if (z && z_stride) {
+ LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
+ offset = lp_build_add(bld, offset, z_offset);
}
return offset;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index a791d886127..8ceb20473d5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -36,9 +36,10 @@
#define LP_BLD_SAMPLE_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
-struct pipe_texture;
+struct pipe_resource;
+struct pipe_sampler_view;
struct pipe_sampler_state;
struct util_format_description;
struct lp_type;
@@ -48,14 +49,20 @@ struct lp_build_context;
/**
* Sampler static state.
*
- * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * These are the bits of state from pipe_resource and pipe_sampler_state that
* are embedded in the generated code.
*/
struct lp_sampler_static_state
{
- /* pipe_texture's state */
+ /* pipe_sampler_view's state */
enum pipe_format format;
- unsigned target:2;
+ unsigned swizzle_r:3;
+ unsigned swizzle_g:3;
+ unsigned swizzle_b:3;
+ unsigned swizzle_a:3;
+
+ /* pipe_texture's state */
+ unsigned target:3;
unsigned pot_width:1;
unsigned pot_height:1;
unsigned pot_depth:1;
@@ -70,6 +77,7 @@ struct lp_sampler_static_state
unsigned compare_mode:1;
unsigned compare_func:3;
unsigned normalized_coords:1;
+ float lod_bias, min_lod, max_lod;
float border_color[4];
};
@@ -77,7 +85,7 @@ struct lp_sampler_static_state
/**
* Sampler dynamic state.
*
- * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * These are the bits of state from pipe_resource and pipe_sampler_state that
* are computed in runtime.
*
* There are obtained through callbacks, as we don't want to tie the texture
@@ -99,10 +107,27 @@ struct lp_sampler_dynamic_state
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain the base texture depth. */
LLVMValueRef
- (*stride)( struct lp_sampler_dynamic_state *state,
- LLVMBuilderRef builder,
- unsigned unit);
+ (*depth)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ /** Obtain the number of mipmap levels (minus one). */
+ LLVMValueRef
+ (*last_level)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ LLVMValueRef
+ (*row_stride)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ LLVMValueRef
+ (*img_stride)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
LLVMValueRef
(*data_ptr)( struct lp_sampler_dynamic_state *state,
@@ -117,7 +142,7 @@ struct lp_sampler_dynamic_state
*/
void
lp_sampler_static_state(struct lp_sampler_static_state *state,
- const struct pipe_texture *texture,
+ const struct pipe_sampler_view *view,
const struct pipe_sampler_state *sampler);
@@ -135,8 +160,9 @@ lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
+ LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef data_ptr);
+ LLVMValueRef z_stride);
void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index fe41d5ee493..54ef921678d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -48,6 +48,7 @@
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_pack.h"
+#include "lp_bld_flow.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
@@ -65,6 +66,14 @@ struct lp_build_sample_context
const struct util_format_description *format_desc;
+ /** regular scalar float type */
+ struct lp_type float_type;
+ struct lp_build_context float_bld;
+
+ /** regular scalar float type */
+ struct lp_type int_type;
+ struct lp_build_context int_bld;
+
/** Incoming coordinates type and build context */
struct lp_type coord_type;
struct lp_build_context coord_bld;
@@ -108,9 +117,125 @@ wrap_mode_uses_border_color(unsigned mode)
}
+static LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, LLVMValueRef level)
+{
+ LLVMValueRef indexes[2], data_ptr;
+ indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indexes[1] = level;
+ data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+ data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
+ return data_ptr;
+}
+
+
+static LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, int level)
+{
+ LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+ return lp_build_get_mipmap_level(bld, data_array, lvl);
+}
+
+
+/**
+ * Dereference stride_array[mipmap_level] array to get a stride.
+ * Return stride as a vector.
+ */
+static LLVMValueRef
+lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
+ LLVMValueRef stride_array, LLVMValueRef level)
+{
+ LLVMValueRef indexes[2], stride;
+ indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indexes[1] = level;
+ stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
+ stride = LLVMBuildLoad(bld->builder, stride, "");
+ stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
+ return stride;
+}
+
+
+/** Dereference stride_array[0] array to get a stride (as vector). */
+static LLVMValueRef
+lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
+ LLVMValueRef stride_array, int level)
+{
+ LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+ return lp_build_get_level_stride_vec(bld, stride_array, lvl);
+}
+
+
+static int
+texture_dims(enum pipe_texture_target tex)
+{
+ switch (tex) {
+ case PIPE_TEXTURE_1D:
+ return 1;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_CUBE:
+ return 2;
+ case PIPE_TEXTURE_3D:
+ return 3;
+ default:
+ assert(0 && "bad texture target in texture_dims()");
+ return 2;
+ }
+}
+
+
+static LLVMValueRef
+lp_build_swizzle_chan_soa(struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ enum util_format_swizzle swizzle)
+{
+ switch (swizzle) {
+ case PIPE_SWIZZLE_RED:
+ case PIPE_SWIZZLE_GREEN:
+ case PIPE_SWIZZLE_BLUE:
+ case PIPE_SWIZZLE_ALPHA:
+ return unswizzled[swizzle];
+ case PIPE_SWIZZLE_ZERO:
+ return lp_build_zero(type);
+ case PIPE_SWIZZLE_ONE:
+ return lp_build_one(type);
+ default:
+ assert(0);
+ return lp_build_undef(type);
+ }
+}
+
+
+static void
+lp_build_swizzle_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef *texel)
+{
+ LLVMValueRef unswizzled[4];
+ unsigned char swizzles[4];
+ unsigned chan;
+
+ for (chan = 0; chan < 4; ++chan) {
+ unswizzled[chan] = texel[chan];
+ }
+
+ swizzles[0] = bld->static_state->swizzle_r;
+ swizzles[1] = bld->static_state->swizzle_g;
+ swizzles[2] = bld->static_state->swizzle_b;
+ swizzles[3] = bld->static_state->swizzle_a;
+
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned swizzle = swizzles[chan];
+ texel[chan] = lp_build_swizzle_chan_soa(bld->texel_type,
+ unswizzled, swizzle);
+ }
+}
+
+
/**
- * Gen code to fetch a texel from a texture at int coords (x, y).
+ * Generate code to fetch a texel from a texture at int coords (x, y, z).
+ * The computation depends on whether the texture is 1D, 2D or 3D.
* The result, texel, will be:
* texel[0] = red values
* texel[1] = green values
@@ -121,15 +246,19 @@ static void
lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
LLVMValueRef width,
LLVMValueRef height,
+ LLVMValueRef depth,
LLVMValueRef x,
LLVMValueRef y,
+ LLVMValueRef z,
LLVMValueRef y_stride,
+ LLVMValueRef z_stride,
LLVMValueRef data_ptr,
LLVMValueRef *texel)
{
+ const int dims = texture_dims(bld->static_state->target);
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef offset;
- LLVMValueRef packed;
+ LLVMValueRef i, j;
LLVMValueRef use_border = NULL;
/* use_border = x < 0 || x >= width || y < 0 || y >= height */
@@ -140,7 +269,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
}
- if (wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
+ if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
@@ -153,6 +282,58 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
}
+ if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
+ LLVMValueRef b1, b2;
+ b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
+ b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
+ if (use_border) {
+ use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
+ use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
+ }
+ else {
+ use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
+ }
+ }
+
+ /*
+ * Describe the coordinates in terms of pixel blocks.
+ *
+ * TODO: pixel blocks are power of two. LLVM should convert rem/div to
+ * bit arithmetic. Verify this.
+ */
+
+ if (bld->format_desc->block.width == 1) {
+ i = bld->uint_coord_bld.zero;
+ }
+ else {
+ LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
+ i = LLVMBuildURem(bld->builder, x, block_width, "");
+ x = LLVMBuildUDiv(bld->builder, x, block_width, "");
+ }
+
+ if (bld->format_desc->block.height == 1) {
+ j = bld->uint_coord_bld.zero;
+ }
+ else {
+ LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
+ j = LLVMBuildURem(bld->builder, y, block_height, "");
+ y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+ }
+
+ /* convert x,y,z coords to linear offset from start of texture, in bytes */
+ offset = lp_build_sample_offset(&bld->uint_coord_bld,
+ bld->format_desc,
+ x, y, z, y_stride, z_stride);
+
+ lp_build_fetch_rgba_soa(bld->builder,
+ bld->format_desc,
+ bld->texel_type,
+ data_ptr, offset,
+ i, j,
+ texel);
+
+ lp_build_swizzle_soa(bld, texel);
+
/*
* Note: if we find an app which frequently samples the texture border
* we might want to implement a true conditional here to avoid sampling
@@ -168,35 +349,12 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
* the texel color results with the border color.
*/
- /* convert x,y coords to linear offset from start of texture, in bytes */
- offset = lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, y_stride,
- data_ptr);
-
- assert(bld->format_desc->block.width == 1);
- assert(bld->format_desc->block.height == 1);
- assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
- /* gather the texels from the texture */
- packed = lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset);
-
- /* convert texels to float rgba */
- lp_build_unpack_rgba_soa(bld->builder,
- bld->format_desc,
- bld->texel_type,
- packed, texel);
-
if (use_border) {
/* select texel color or border color depending on use_border */
int chan;
for (chan = 0; chan < 4; chan++) {
LLVMValueRef border_chan =
- lp_build_const_scalar(bld->texel_type,
+ lp_build_const_vec(bld->texel_type,
bld->static_state->border_color[chan]);
texel[chan] = lp_build_select(&bld->texel_bld, use_border,
border_chan, texel[chan]);
@@ -210,19 +368,22 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef y_stride,
- LLVMValueRef data_ptr)
+ LLVMValueRef data_array)
{
LLVMValueRef offset;
+ LLVMValueRef data_ptr;
offset = lp_build_sample_offset(&bld->uint_coord_bld,
bld->format_desc,
- x, y, y_stride,
- data_ptr);
+ x, y, NULL, y_stride, NULL);
assert(bld->format_desc->block.width == 1);
assert(bld->format_desc->block.height == 1);
assert(bld->format_desc->block.bits <= bld->texel_type.width);
+ /* get pointer to mipmap level 0 data */
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
return lp_build_gather(bld->builder,
bld->texel_type.length,
bld->format_desc->block.bits,
@@ -358,18 +519,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
- LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
+ LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
+ LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
- LLVMValueRef length_minus_one;
- LLVMValueRef length_f_minus_one;
+ LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+ LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
LLVMValueRef coord0, coord1, weight;
- /* XXX check for normalized vs. unnormalized coords */
-
- length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
- length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
-
switch(wrap_mode) {
case PIPE_TEX_WRAP_REPEAT:
/* mul by size and subtract 0.5 */
@@ -394,7 +550,9 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
break;
case PIPE_TEX_WRAP_CLAMP:
- coord = lp_build_mul(coord_bld, coord, length_f);
+ if (bld->static_state->normalized_coords) {
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
weight = lp_build_fract(coord_bld, coord);
coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
length_f_minus_one);
@@ -406,11 +564,20 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- /* clamp to [0,1] */
- coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
- /* mul by tex size and subtract 0.5 */
- coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_sub(coord_bld, coord, half);
+ if (bld->static_state->normalized_coords) {
+ /* clamp to [0,1] */
+ coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
+ /* mul by tex size and subtract 0.5 */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_sub(coord_bld, coord, half);
+ }
+ else {
+ LLVMValueRef min, max;
+ /* clamp to [0.5, length - 0.5] */
+ min = lp_build_const_vec(coord_bld->type, 0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ }
/* compute lerp weight */
weight = lp_build_fract(coord_bld, coord);
/* coord0 = floor(coord); */
@@ -425,16 +592,26 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
{
LLVMValueRef min, max;
- /* min = -1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- min = lp_build_negate(coord_bld, min);
- /* max = 1.0 - min */
- max = lp_build_sub(coord_bld, coord_bld->one, min);
- /* coord = clamp(coord, min, max) */
- coord = lp_build_clamp(coord_bld, coord, min, max);
- /* scale coord to length (and sub 0.5?) */
- coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_sub(coord_bld, coord, half);
+ if (bld->static_state->normalized_coords) {
+ /* min = -1.0 / (2 * length) = -0.5 / length */
+ min = lp_build_mul(coord_bld,
+ lp_build_const_vec(coord_bld->type, -0.5F),
+ lp_build_rcp(coord_bld, length_f));
+ /* max = 1.0 - min */
+ max = lp_build_sub(coord_bld, coord_bld->one, min);
+ /* coord = clamp(coord, min, max) */
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ /* scale coord to length (and sub 0.5?) */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_sub(coord_bld, coord, half);
+ }
+ else {
+ /* clamp to [-0.5, length + 0.5] */
+ min = lp_build_const_vec(coord_bld->type, -0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ coord = lp_build_sub(coord_bld, coord, half);
+ }
/* compute lerp weight */
weight = lp_build_fract(coord_bld, coord);
/* convert to int */
@@ -503,9 +680,10 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
{
LLVMValueRef min, max;
- /* min = -1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- min = lp_build_negate(coord_bld, min);
+ /* min = -1.0 / (2 * length) = -0.5 / length */
+ min = lp_build_mul(coord_bld,
+ lp_build_const_vec(coord_bld->type, -0.5F),
+ lp_build_rcp(coord_bld, length_f));
/* max = 1.0 - min */
max = lp_build_sub(coord_bld, coord_bld->one, min);
@@ -521,6 +699,9 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
default:
assert(0);
+ coord0 = NULL;
+ coord1 = NULL;
+ weight = NULL;
}
*x0_out = coord0;
@@ -546,14 +727,12 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
+ LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
LLVMValueRef icoord;
- /* XXX check for normalized vs. unnormalized coords */
-
switch(wrap_mode) {
case PIPE_TEX_WRAP_REPEAT:
coord = lp_build_mul(coord_bld, coord, length_f);
@@ -568,7 +747,9 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_CLAMP:
/* mul by size */
- coord = lp_build_mul(coord_bld, coord, length_f);
+ if (bld->static_state->normalized_coords) {
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
/* floor */
icoord = lp_build_ifloor(coord_bld, coord);
/* clamp to [0, size-1]. Note: int coord builder type */
@@ -579,12 +760,19 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
{
LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
- /* scale coord to length */
- coord = lp_build_mul(coord_bld, coord, length_f);
+ if (bld->static_state->normalized_coords) {
+ /* min = 1.0 / (2 * length) */
+ min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
+ /* max = length - min */
+ max = lp_build_sub(coord_bld, length_f, min);
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+ else {
+ /* clamp to [0.5, length - 0.5] */
+ min = lp_build_const_vec(coord_bld->type, 0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ }
/* coord = clamp(coord, min, max) */
coord = lp_build_clamp(coord_bld, coord, min, max);
icoord = lp_build_ifloor(coord_bld, coord);
@@ -595,13 +783,21 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
/* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
{
LLVMValueRef min, max;
- /* min = -1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- min = lp_build_negate(coord_bld, min);
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
- /* scale coord to length */
- coord = lp_build_mul(coord_bld, coord, length_f);
+ if (bld->static_state->normalized_coords) {
+ /* min = -1.0 / (2 * length) = -0.5 / length */
+ min = lp_build_mul(coord_bld,
+ lp_build_const_vec(coord_bld->type, -0.5F),
+ lp_build_rcp(coord_bld, length_f));
+ /* max = length - min */
+ max = lp_build_sub(coord_bld, length_f, min);
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+ else {
+ /* clamp to [-0.5, length + 0.5] */
+ min = lp_build_const_vec(coord_bld->type, -0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ }
/* coord = clamp(coord, min, max) */
coord = lp_build_clamp(coord_bld, coord, min, max);
icoord = lp_build_ifloor(coord_bld, coord);
@@ -668,6 +864,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
default:
assert(0);
+ icoord = NULL;
}
return icoord;
@@ -675,83 +872,916 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
/**
- * Sample 2D texture with nearest filtering.
+ * Codegen equivalent for u_minify().
+ * Return max(1, base_size >> level);
+ */
+static LLVMValueRef
+lp_build_minify(struct lp_build_sample_context *bld,
+ LLVMValueRef base_size,
+ LLVMValueRef level)
+{
+ LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+ size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
+ return size;
+}
+
+
+/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param s vector of texcoord s values
+ * \param t vector of texcoord t values
+ * \param r vector of texcoord r values
+ * \param shader_lod_bias vector float with the shader lod bias,
+ * \param width scalar int texture width
+ * \param height scalar int texture height
+ * \param depth scalar int texture depth
+ */
+static LLVMValueRef
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef shader_lod_bias,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth)
+
+{
+ if (bld->static_state->min_lod == bld->static_state->max_lod) {
+ /* User is forcing sampling from a particular mipmap level.
+ * This is hit during mipmap generation.
+ */
+ return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
+ }
+ else {
+ const int dims = texture_dims(bld->static_state->target);
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->lod_bias);
+ LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->min_lod);
+ LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->max_lod);
+
+ LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+
+ LLVMValueRef s0, s1, s2;
+ LLVMValueRef t0, t1, t2;
+ LLVMValueRef r0, r1, r2;
+ LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+ LLVMValueRef rho, lod;
+
+ /*
+ * dsdx = abs(s[1] - s[0]);
+ * dsdy = abs(s[2] - s[0]);
+ * dtdx = abs(t[1] - t[0]);
+ * dtdy = abs(t[2] - t[0]);
+ * drdx = abs(r[1] - r[0]);
+ * drdy = abs(r[2] - r[0]);
+ * XXX we're assuming a four-element quad in 2x2 layout here.
+ */
+ s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
+ s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
+ s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
+ dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
+ dsdx = lp_build_abs(float_bld, dsdx);
+ dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
+ dsdy = lp_build_abs(float_bld, dsdy);
+ if (dims > 1) {
+ t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
+ t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
+ t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
+ dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
+ dtdx = lp_build_abs(float_bld, dtdx);
+ dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
+ dtdy = lp_build_abs(float_bld, dtdy);
+ if (dims > 2) {
+ r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
+ r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
+ r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
+ drdx = LLVMBuildSub(bld->builder, r1, r0, "");
+ drdx = lp_build_abs(float_bld, drdx);
+ drdy = LLVMBuildSub(bld->builder, r2, r0, "");
+ drdy = lp_build_abs(float_bld, drdy);
+ }
+ }
+
+ /* Compute rho = max of all partial derivatives scaled by texture size.
+ * XXX this could be vectorized somewhat
+ */
+ rho = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, dsdx, dsdy),
+ lp_build_int_to_float(float_bld, width), "");
+ if (dims > 1) {
+ LLVMValueRef max;
+ max = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, dtdx, dtdy),
+ lp_build_int_to_float(float_bld, height), "");
+ rho = lp_build_max(float_bld, rho, max);
+ if (dims > 2) {
+ max = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, drdx, drdy),
+ lp_build_int_to_float(float_bld, depth), "");
+ rho = lp_build_max(float_bld, rho, max);
+ }
+ }
+
+ /* compute lod = log2(rho) */
+ lod = lp_build_log2(float_bld, rho);
+
+ /* add sampler lod bias */
+ lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias");
+
+ /* add shader lod bias */
+ /* XXX for now we take only the first element since our lod is scalar */
+ shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias,
+ LLVMConstInt(LLVMInt32Type(), 0, 0), "");
+ lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias");
+
+ /* clamp lod */
+ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+
+ return lod;
+ }
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod scalar float texture level of detail
+ * \param level_out returns integer
*/
static void
-lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef stride,
- LLVMValueRef data_ptr,
- LLVMValueRef *texel)
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level_out)
{
- LLVMValueRef x, y;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMValueRef last_level, level;
- x = lp_build_sample_wrap_nearest(bld, s, width,
- bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y = lp_build_sample_wrap_nearest(bld, t, height,
- bld->static_state->pot_height,
- bld->static_state->wrap_t);
+ LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
- lp_build_name(x, "tex.x.wrapped");
- lp_build_name(y, "tex.y.wrapped");
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
- lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel);
+ /* convert float lod to integer */
+ level = lp_build_iround(float_bld, lod);
+
+ /* clamp level to legal range of levels */
+ *level_out = lp_build_clamp(int_bld, level, zero, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes. Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+static void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level0_out,
+ LLVMValueRef *level1_out,
+ LLVMValueRef *weight_out)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMValueRef last_level, level;
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
+
+ /* convert float lod to integer */
+ level = lp_build_ifloor(float_bld, lod);
+
+ /* compute level 0 and clamp to legal range of levels */
+ *level0_out = lp_build_clamp(int_bld, level,
+ int_bld->zero,
+ last_level);
+ /* compute level 1 and clamp to legal range of levels */
+ *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
+ *level1_out = lp_build_min(int_bld, *level1_out, last_level);
+
+ *weight_out = lp_build_fract(float_bld, lod);
}
/**
- * Sample 2D texture with bilinear filtering.
+ * Generate code to sample a mipmap level with nearest filtering.
+ * If sampling a cube texture, r = cube face in [0,5].
*/
static void
-lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
+lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_vec,
+ LLVMValueRef img_stride_vec,
+ LLVMValueRef data_ptr,
LLVMValueRef s,
LLVMValueRef t,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef stride,
- LLVMValueRef data_ptr,
- LLVMValueRef *texel)
+ LLVMValueRef r,
+ LLVMValueRef colors_out[4])
{
- LLVMValueRef s_fpart;
- LLVMValueRef t_fpart;
- LLVMValueRef x0, x1;
- LLVMValueRef y0, y1;
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef x, y, z;
+
+ /*
+ * Compute integer texcoords.
+ */
+ x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s);
+ lp_build_name(x, "tex.x.wrapped");
+
+ if (dims >= 2) {
+ y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t);
+ lp_build_name(y, "tex.y.wrapped");
+
+ if (dims == 3) {
+ z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_r);
+ lp_build_name(z, "tex.z.wrapped");
+ }
+ else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ z = r;
+ }
+ else {
+ z = NULL;
+ }
+ }
+ else {
+ y = z = NULL;
+ }
+
+ /*
+ * Get texture colors.
+ */
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x, y, z,
+ row_stride_vec, img_stride_vec,
+ data_ptr, colors_out);
+}
+
+
+/**
+ * Generate code to sample a mipmap level with linear filtering.
+ * If sampling a cube texture, r = cube face in [0,5].
+ */
+static void
+lp_build_sample_image_linear(struct lp_build_sample_context *bld,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_vec,
+ LLVMValueRef img_stride_vec,
+ LLVMValueRef data_ptr,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef colors_out[4])
+{
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef x0, y0, z0, x1, y1, z1;
+ LLVMValueRef s_fpart, t_fpart, r_fpart;
LLVMValueRef neighbors[2][2][4];
- unsigned chan;
+ int chan;
+
+ /*
+ * Compute integer texcoords.
+ */
+ lp_build_sample_wrap_linear(bld, s, width_vec,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x0, &x1, &s_fpart);
+ lp_build_name(x0, "tex.x0.wrapped");
+ lp_build_name(x1, "tex.x1.wrapped");
+
+ if (dims >= 2) {
+ lp_build_sample_wrap_linear(bld, t, height_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y0, &y1, &t_fpart);
+ lp_build_name(y0, "tex.y0.wrapped");
+ lp_build_name(y1, "tex.y1.wrapped");
+
+ if (dims == 3) {
+ lp_build_sample_wrap_linear(bld, r, depth_vec,
+ bld->static_state->pot_depth,
+ bld->static_state->wrap_r,
+ &z0, &z1, &r_fpart);
+ lp_build_name(z0, "tex.z0.wrapped");
+ lp_build_name(z1, "tex.z1.wrapped");
+ }
+ else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ z0 = z1 = r; /* cube face */
+ r_fpart = NULL;
+ }
+ else {
+ z0 = z1 = NULL;
+ r_fpart = NULL;
+ }
+ }
+ else {
+ y0 = y1 = t_fpart = NULL;
+ z0 = z1 = r_fpart = NULL;
+ }
- lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width,
- bld->static_state->wrap_s, &x0, &x1, &s_fpart);
- lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height,
- bld->static_state->wrap_t, &y0, &y1, &t_fpart);
+ /*
+ * Get texture colors.
+ */
+ /* get x0/x1 texels */
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x0, y0, z0,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors[0][0]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x1, y0, z0,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors[0][1]);
+
+ if (dims == 1) {
+ /* Interpolate two samples from 1D image to produce one color */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan]);
+ }
+ }
+ else {
+ /* 2D/3D texture */
+ LLVMValueRef colors0[4];
+
+ /* get x0/x1 texels at y1 */
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x0, y1, z0,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors[1][0]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x1, y1, z0,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors[1][1]);
+
+ /* Bilinear interpolate the four samples from the 2D image / 3D slice */
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
+ s_fpart, t_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan],
+ neighbors[1][0][chan],
+ neighbors[1][1][chan]);
+ }
- lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]);
- lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]);
- lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]);
- lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]);
+ if (dims == 3) {
+ LLVMValueRef neighbors1[2][2][4];
+ LLVMValueRef colors1[4];
+
+ /* get x0/x1/y0/y1 texels at z1 */
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x0, y0, z1,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors1[0][0]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x1, y0, z1,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors1[0][1]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x0, y1, z1,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors1[1][0]);
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ x1, y1, z1,
+ row_stride_vec, img_stride_vec,
+ data_ptr, neighbors1[1][1]);
+
+ /* Bilinear interpolate the four samples from the second Z slice */
+ for (chan = 0; chan < 4; chan++) {
+ colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
+ s_fpart, t_fpart,
+ neighbors1[0][0][chan],
+ neighbors1[0][1][chan],
+ neighbors1[1][0][chan],
+ neighbors1[1][1][chan]);
+ }
+
+ /* Linearly interpolate the two samples from the two 3D slices */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = lp_build_lerp(&bld->texel_bld,
+ r_fpart,
+ colors0[chan], colors1[chan]);
+ }
+ }
+ else {
+ /* 2D tex */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = colors0[chan];
+ }
+ }
+ }
+}
- /* TODO: Don't interpolate missing channels */
- for(chan = 0; chan < 4; ++chan) {
- texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
- s_fpart, t_fpart,
- neighbors[0][0][chan],
- neighbors[0][1][chan],
- neighbors[1][0][chan],
- neighbors[1][1][chan]);
+
+/** Helper used by lp_build_cube_lookup() */
+static LLVMValueRef
+lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
+{
+ /* ima = -0.5 / abs(coord); */
+ LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
+ LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
+ LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
+ lp_build_rcp(coord_bld, absCoord));
+ return ima;
+}
+
+
+/**
+ * Helper used by lp_build_cube_lookup()
+ * \param sign scalar +1 or -1
+ * \param coord float vector
+ * \param ima float vector
+ */
+static LLVMValueRef
+lp_build_cube_coord(struct lp_build_context *coord_bld,
+ LLVMValueRef sign, int negate_coord,
+ LLVMValueRef coord, LLVMValueRef ima)
+{
+ /* return negate(coord) * ima * sign + 0.5; */
+ LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
+ LLVMValueRef res;
+
+ assert(negate_coord == +1 || negate_coord == -1);
+
+ if (negate_coord == -1) {
+ coord = lp_build_negate(coord_bld, coord);
+ }
+
+ res = lp_build_mul(coord_bld, coord, ima);
+ if (sign) {
+ sign = lp_build_broadcast_scalar(coord_bld, sign);
+ res = lp_build_mul(coord_bld, res, sign);
+ }
+ res = lp_build_add(coord_bld, res, half);
+
+ return res;
+}
+
+
+/** Helper used by lp_build_cube_lookup()
+ * Return (major_coord >= 0) ? pos_face : neg_face;
+ */
+static LLVMValueRef
+lp_build_cube_face(struct lp_build_sample_context *bld,
+ LLVMValueRef major_coord,
+ unsigned pos_face, unsigned neg_face)
+{
+ LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
+ major_coord,
+ bld->float_bld.zero, "");
+ LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
+ LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
+ LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
+ return res;
+}
+
+
+
+/**
+ * Generate code to do cube face selection and per-face texcoords.
+ */
+static void
+lp_build_cube_lookup(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *face,
+ LLVMValueRef *face_s,
+ LLVMValueRef *face_t)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ LLVMValueRef rx, ry, rz;
+ LLVMValueRef arx, ary, arz;
+ LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
+ LLVMValueRef arx_ge_ary, arx_ge_arz;
+ LLVMValueRef ary_ge_arx, ary_ge_arz;
+ LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
+ LLVMValueRef rx_pos, ry_pos, rz_pos;
+
+ assert(bld->coord_bld.type.length == 4);
+
+ /*
+ * Use the average of the four pixel's texcoords to choose the face.
+ */
+ rx = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, s));
+ ry = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, t));
+ rz = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, r));
+
+ arx = lp_build_abs(float_bld, rx);
+ ary = lp_build_abs(float_bld, ry);
+ arz = lp_build_abs(float_bld, rz);
+
+ /*
+ * Compare sign/magnitude of rx,ry,rz to determine face
+ */
+ arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
+ arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
+ ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
+ ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
+
+ arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
+ ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
+
+ rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
+ ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
+ rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
+
+ {
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+
+ flow_ctx = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx);
+
+ *face_s = bld->coord_bld.undef;
+ *face_t = bld->coord_bld.undef;
+ *face = bld->int_bld.undef;
+
+ lp_build_name(*face_s, "face_s");
+ lp_build_name(*face_t, "face_t");
+ lp_build_name(*face, "face");
+
+ lp_build_flow_scope_declare(flow_ctx, face_s);
+ lp_build_flow_scope_declare(flow_ctx, face_t);
+ lp_build_flow_scope_declare(flow_ctx, face);
+
+ lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
+ {
+ /* +/- X face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, rx);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
+ *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
+ *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ *face = lp_build_cube_face(bld, rx,
+ PIPE_TEX_FACE_POS_X,
+ PIPE_TEX_FACE_NEG_X);
+ }
+ lp_build_else(&if_ctx);
+ {
+ struct lp_build_flow_context *flow_ctx2;
+ struct lp_build_if_state if_ctx2;
+
+ LLVMValueRef face_s2 = bld->coord_bld.undef;
+ LLVMValueRef face_t2 = bld->coord_bld.undef;
+ LLVMValueRef face2 = bld->int_bld.undef;
+
+ flow_ctx2 = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx2);
+ lp_build_flow_scope_declare(flow_ctx2, &face_s2);
+ lp_build_flow_scope_declare(flow_ctx2, &face_t2);
+ lp_build_flow_scope_declare(flow_ctx2, &face2);
+
+ ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
+
+ lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
+ {
+ /* +/- Y face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, ry);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
+ face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
+ face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
+ face2 = lp_build_cube_face(bld, ry,
+ PIPE_TEX_FACE_POS_Y,
+ PIPE_TEX_FACE_NEG_Y);
+ }
+ lp_build_else(&if_ctx2);
+ {
+ /* +/- Z face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, rz);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
+ face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
+ face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ face2 = lp_build_cube_face(bld, rz,
+ PIPE_TEX_FACE_POS_Z,
+ PIPE_TEX_FACE_NEG_Z);
+ }
+ lp_build_endif(&if_ctx2);
+ lp_build_flow_scope_end(flow_ctx2);
+ lp_build_flow_destroy(flow_ctx2);
+
+ *face_s = face_s2;
+ *face_t = face_t2;
+ *face = face2;
+ }
+
+ lp_build_endif(&if_ctx);
+ lp_build_flow_scope_end(flow_ctx);
+ lp_build_flow_destroy(flow_ctx);
+ }
+}
+
+
+
+/**
+ * Sample the texture/mipmap using given image filter and mip filter.
+ * data0_ptr and data1_ptr point to the two mipmap levels to sample
+ * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
+ * If we're using nearest miplevel sampling the '1' values will be null/unused.
+ */
+static void
+lp_build_sample_mipmap(struct lp_build_sample_context *bld,
+ unsigned img_filter,
+ unsigned mip_filter,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef lod_fpart,
+ LLVMValueRef width0_vec,
+ LLVMValueRef width1_vec,
+ LLVMValueRef height0_vec,
+ LLVMValueRef height1_vec,
+ LLVMValueRef depth0_vec,
+ LLVMValueRef depth1_vec,
+ LLVMValueRef row_stride0_vec,
+ LLVMValueRef row_stride1_vec,
+ LLVMValueRef img_stride0_vec,
+ LLVMValueRef img_stride1_vec,
+ LLVMValueRef data_ptr0,
+ LLVMValueRef data_ptr1,
+ LLVMValueRef *colors_out)
+{
+ LLVMValueRef colors0[4], colors1[4];
+ int chan;
+
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ lp_build_sample_image_nearest(bld,
+ width0_vec, height0_vec, depth0_vec,
+ row_stride0_vec, img_stride0_vec,
+ data_ptr0, s, t, r, colors0);
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* sample the second mipmap level, and interp */
+ lp_build_sample_image_nearest(bld,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r, colors1);
+ }
+ }
+ else {
+ assert(img_filter == PIPE_TEX_FILTER_LINEAR);
+
+ lp_build_sample_image_linear(bld,
+ width0_vec, height0_vec, depth0_vec,
+ row_stride0_vec, img_stride0_vec,
+ data_ptr0, s, t, r, colors0);
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* sample the second mipmap level, and interp */
+ lp_build_sample_image_linear(bld,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r, colors1);
+ }
+ }
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* interpolate samples from the two mipmap levels */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
+ colors0[chan], colors1[chan]);
+ }
+ }
+ else {
+ /* use first/only level's colors */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = colors0[chan];
+ }
+ }
+}
+
+
+
+/**
+ * General texture sampling codegen.
+ * This function handles texture sampling for all texture targets (1D,
+ * 2D, 3D, cube) and all filtering modes.
+ */
+static void
+lp_build_sample_general(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef lodbias,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_array,
+ LLVMValueRef img_stride_array,
+ LLVMValueRef data_array,
+ LLVMValueRef *colors_out)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ const unsigned mip_filter = bld->static_state->min_mip_filter;
+ const unsigned min_filter = bld->static_state->min_img_filter;
+ const unsigned mag_filter = bld->static_state->mag_img_filter;
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef lod = NULL, lod_fpart = NULL;
+ LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
+ LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
+ LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
+ LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
+ LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
+ LLVMValueRef data_ptr0, data_ptr1 = NULL;
+
+ /*
+ printf("%s mip %d min %d mag %d\n", __FUNCTION__,
+ mip_filter, min_filter, mag_filter);
+ */
+
+ /*
+ * Compute the level of detail (float).
+ */
+ if (min_filter != mag_filter ||
+ mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ /* Need to compute lod either to choose mipmap levels or to
+ * distinguish between minification/magnification with one mipmap level.
+ */
+ lod = lp_build_lod_selector(bld, s, t, r, lodbias, width, height, depth);
+ }
+
+ /*
+ * Compute integer mipmap level(s) to fetch texels from.
+ */
+ if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ /* always use mip level 0 */
+ ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ }
+ else {
+ if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+ lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ }
+ else {
+ assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
+ lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
+ &lod_fpart);
+ lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
+ }
+ }
+
+ /*
+ * Convert scalar integer mipmap levels into vectors.
+ */
+ ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+ ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
+
+ /*
+ * Compute width, height at mipmap level 'ilevel0'
+ */
+ width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
+ if (dims >= 2) {
+ height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
+ row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
+ ilevel0);
+ if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ img_stride0_vec = lp_build_get_level_stride_vec(bld,
+ img_stride_array,
+ ilevel0);
+ if (dims == 3) {
+ depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
+ }
+ }
+ }
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* compute width, height, depth for second mipmap level at 'ilevel1' */
+ width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
+ if (dims >= 2) {
+ height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
+ row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
+ ilevel1);
+ if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ img_stride1_vec = lp_build_get_level_stride_vec(bld,
+ img_stride_array,
+ ilevel1);
+ if (dims ==3) {
+ depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
+ }
+ }
+ }
+ }
+
+ /*
+ * Choose cube face, recompute per-face texcoords.
+ */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef face, face_s, face_t;
+ lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+ s = face_s; /* vec */
+ t = face_t; /* vec */
+ /* use 'r' to indicate cube face */
+ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
+ }
+
+ /*
+ * Get pointer(s) to image data for mipmap level(s).
+ */
+ data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
+ }
+
+ /*
+ * Get/interpolate texture colors.
+ */
+ if (min_filter == mag_filter) {
+ /* no need to distinquish between minification and magnification */
+ lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
+ width0_vec, width1_vec,
+ height0_vec, height1_vec,
+ depth0_vec, depth1_vec,
+ row_stride0_vec, row_stride1_vec,
+ img_stride0_vec, img_stride1_vec,
+ data_ptr0, data_ptr1,
+ colors_out);
+ }
+ else {
+ /* Emit conditional to choose min image filter or mag image filter
+ * depending on the lod being >0 or <= 0, respectively.
+ */
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef minify;
+
+ flow_ctx = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx);
+
+ lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
+ lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
+ lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
+ lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
+
+ /* minify = lod > 0.0 */
+ minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
+ lod, float_bld->zero, "");
+
+ lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
+ {
+ /* Use the minification filter */
+ lp_build_sample_mipmap(bld, min_filter, mip_filter,
+ s, t, r, lod_fpart,
+ width0_vec, width1_vec,
+ height0_vec, height1_vec,
+ depth0_vec, depth1_vec,
+ row_stride0_vec, row_stride1_vec,
+ img_stride0_vec, img_stride1_vec,
+ data_ptr0, data_ptr1,
+ colors_out);
+ }
+ lp_build_else(&if_ctx);
+ {
+ /* Use the magnification filter */
+ lp_build_sample_mipmap(bld, mag_filter, mip_filter,
+ s, t, r, lod_fpart,
+ width0_vec, width1_vec,
+ height0_vec, height1_vec,
+ depth0_vec, depth1_vec,
+ row_stride0_vec, row_stride1_vec,
+ img_stride0_vec, img_stride1_vec,
+ data_ptr0, data_ptr1,
+ colors_out);
+ }
+ lp_build_endif(&if_ctx);
+
+ lp_build_flow_scope_end(flow_ctx);
+ lp_build_flow_destroy(flow_ctx);
}
}
+
static void
lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
struct lp_type dst_type,
LLVMValueRef packed,
LLVMValueRef *rgba)
{
- LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
+ LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
unsigned chan;
/* Decode the input vector components */
@@ -763,7 +1793,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
input = packed;
if(start)
- input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
+ input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
if(stop < 32)
input = LLVMBuildAnd(builder, input, mask, "");
@@ -781,8 +1811,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef t,
LLVMValueRef width,
LLVMValueRef height,
- LLVMValueRef stride,
- LLVMValueRef data_ptr,
+ LLVMValueRef stride_array,
+ LLVMValueRef data_array,
LLVMValueRef *texel)
{
LLVMBuilderRef builder = bld->builder;
@@ -798,8 +1828,9 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef neighbors_hi[2][2];
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
+ LLVMValueRef stride;
- lp_build_context_init(&i32, builder, lp_type_int(32));
+ lp_build_context_init(&i32, builder, lp_type_int_vec(32));
lp_build_context_init(&h16, builder, lp_type_ufixed(16));
lp_build_context_init(&u8n, builder, lp_type_unorm(8));
@@ -824,17 +1855,17 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
/* subtract 0.5 (add -128) */
- i32_c128 = lp_build_int_const_scalar(i32.type, -128);
+ i32_c128 = lp_build_const_int_vec(i32.type, -128);
s = LLVMBuildAdd(builder, s, i32_c128, "");
t = LLVMBuildAdd(builder, t, i32_c128, "");
/* compute floor (shift right 8) */
- i32_c8 = lp_build_int_const_scalar(i32.type, 8);
+ i32_c8 = lp_build_const_int_vec(i32.type, 8);
s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
/* compute fractional part (AND with 0xff) */
- i32_c255 = lp_build_int_const_scalar(i32.type, 255);
+ i32_c255 = lp_build_const_int_vec(i32.type, 255);
s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
@@ -905,6 +1936,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
}
+ stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
+
/*
* Fetch the pixels as 4 x 32bit (rgba order might differ):
*
@@ -922,10 +1955,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
* The higher 8 bits of the resulting elements will be zero.
*/
- neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
- neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
- neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
- neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
+ neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
+ neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
+ neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
+ neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
@@ -970,6 +2003,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
lp_build_format_swizzle_soa(bld->format_desc,
bld->texel_type, unswizzled,
texel);
+
+ lp_build_swizzle_soa(bld, texel);
}
@@ -999,7 +2034,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
}
assert(res);
- res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
+ res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
/* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
for(chan = 0; chan < 3; ++chan)
@@ -1012,6 +2047,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
* Build texture sampling code.
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
+ * \param type vector float type to use for coords, etc.
*/
void
lp_build_sample_soa(LLVMBuilderRef builder,
@@ -1025,13 +2061,14 @@ lp_build_sample_soa(LLVMBuilderRef builder,
LLVMValueRef *texel)
{
struct lp_build_sample_context bld;
- LLVMValueRef width;
- LLVMValueRef height;
- LLVMValueRef stride;
- LLVMValueRef data_ptr;
+ LLVMValueRef width, width_vec;
+ LLVMValueRef height, height_vec;
+ LLVMValueRef depth, depth_vec;
+ LLVMValueRef row_stride_array, img_stride_array;
+ LLVMValueRef data_array;
LLVMValueRef s;
LLVMValueRef t;
- LLVMValueRef p;
+ LLVMValueRef r;
/* Setup our build context */
memset(&bld, 0, sizeof bld);
@@ -1039,10 +2076,16 @@ lp_build_sample_soa(LLVMBuilderRef builder,
bld.static_state = static_state;
bld.dynamic_state = dynamic_state;
bld.format_desc = util_format_description(static_state->format);
+
+ bld.float_type = lp_type_float(32);
+ bld.int_type = lp_type_int(32);
bld.coord_type = type;
bld.uint_coord_type = lp_uint_type(type);
bld.int_coord_type = lp_int_type(type);
bld.texel_type = type;
+
+ lp_build_context_init(&bld.float_bld, builder, bld.float_type);
+ lp_build_context_init(&bld.int_bld, builder, bld.int_type);
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
@@ -1051,41 +2094,39 @@ lp_build_sample_soa(LLVMBuilderRef builder,
/* Get the dynamic state */
width = dynamic_state->width(dynamic_state, builder, unit);
height = dynamic_state->height(dynamic_state, builder, unit);
- stride = dynamic_state->stride(dynamic_state, builder, unit);
- data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
+ depth = dynamic_state->depth(dynamic_state, builder, unit);
+ row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+ img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
+ data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
+ /* Note that data_array is an array[level] of pointers to texture images */
s = coords[0];
t = coords[1];
- p = coords[2];
-
- width = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
- height = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
- stride = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
-
- if(static_state->target == PIPE_TEXTURE_1D)
- t = bld.coord_bld.zero;
-
- switch (static_state->min_img_filter) {
- case PIPE_TEX_FILTER_NEAREST:
- lp_build_sample_2d_nearest_soa(&bld, s, t, width, height,
- stride, data_ptr, texel);
- break;
- case PIPE_TEX_FILTER_LINEAR:
- if(lp_format_is_rgba8(bld.format_desc) &&
- is_simple_wrap_mode(static_state->wrap_s) &&
- is_simple_wrap_mode(static_state->wrap_t))
- lp_build_sample_2d_linear_aos(&bld, s, t, width, height,
- stride, data_ptr, texel);
- else
- lp_build_sample_2d_linear_soa(&bld, s, t, width, height,
- stride, data_ptr, texel);
- break;
- default:
- assert(0);
+ r = coords[2];
+
+ width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
+ height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
+ depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
+
+ if (util_format_is_rgba8_variant(bld.format_desc) &&
+ static_state->target == PIPE_TEXTURE_2D &&
+ static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
+ static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
+ static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
+ is_simple_wrap_mode(static_state->wrap_s) &&
+ is_simple_wrap_mode(static_state->wrap_t)) {
+ /* special case */
+ lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
+ row_stride_array, data_array, texel);
+ }
+ else {
+ lp_build_sample_general(&bld, unit, s, t, r, lodbias,
+ width, height, depth,
+ width_vec, height_vec, depth_vec,
+ row_stride_array, img_stride_array,
+ data_array,
+ texel);
}
- /* FIXME: respect static_state->min_mip_filter */;
- /* FIXME: respect static_state->mag_img_filter */;
-
- lp_build_sample_compare(&bld, p, texel);
+ lp_build_sample_compare(&bld, r, texel);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
index 740392f5611..147336edb4b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
@@ -37,7 +37,7 @@
#define LP_BLD_STRUCT_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
#include <llvm-c/Target.h>
#include "util/u_debug.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 64e81f7b1fe..278c838eaca 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -144,9 +144,9 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
#endif
if(shift > 0)
- tmp = LLVMBuildLShr(bld->builder, a, lp_build_int_const_scalar(type4, shift*type.width), "");
+ tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_int_vec(type4, shift*type.width), "");
if(shift < 0)
- tmp = LLVMBuildShl(bld->builder, a, lp_build_int_const_scalar(type4, -shift*type.width), "");
+ tmp = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(type4, -shift*type.width), "");
assert(tmp);
if(tmp)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
index b9472127a63..138ca620e63 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -37,7 +37,7 @@
#define LP_BLD_SWIZZLE_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
struct lp_type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index eddb7a83fa2..2eac5da6c69 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -35,10 +35,11 @@
#ifndef LP_BLD_TGSI_H
#define LP_BLD_TGSI_H
-#include <llvm-c/Core.h>
+#include "gallivm/lp_bld.h"
struct tgsi_token;
+struct tgsi_shader_info;
struct lp_type;
struct lp_build_context;
struct lp_build_mask_context;
@@ -78,7 +79,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[4],
LLVMValueRef (*outputs)[4],
- struct lp_build_sampler_soa *sampler);
+ struct lp_build_sampler_soa *sampler,
+ struct tgsi_shader_info *info);
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 5f2c2a54ee9..d3c769e28b8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -41,10 +41,12 @@
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_scan.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
@@ -95,6 +97,19 @@ struct lp_exec_mask {
int cond_stack_size;
LLVMValueRef cond_mask;
+ LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
+ int break_stack_size;
+ LLVMValueRef break_mask;
+
+ LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
+ int cont_stack_size;
+ LLVMValueRef cont_mask;
+
+ LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
+ int loop_stack_size;
+ LLVMBasicBlockRef loop_block;
+
+
LLVMValueRef exec_mask;
};
@@ -111,6 +126,12 @@ struct lp_build_tgsi_soa_context
LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
+ LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS];
+
+ /* we allocate an array of temps if we have indirect
+ * addressing and then the temps above is unused */
+ LLVMValueRef temps_array;
+ boolean has_indirect_addressing;
struct lp_build_mask_context *mask;
struct lp_exec_mask exec_mask;
@@ -145,15 +166,33 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context
mask->bld = bld;
mask->has_mask = FALSE;
mask->cond_stack_size = 0;
+ mask->loop_stack_size = 0;
+ mask->break_stack_size = 0;
+ mask->cont_stack_size = 0;
mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
}
static void lp_exec_mask_update(struct lp_exec_mask *mask)
{
- mask->exec_mask = mask->cond_mask;
- if (mask->cond_stack_size > 0)
- mask->has_mask = TRUE;
+ if (mask->loop_stack_size) {
+ /*for loops we need to update the entire mask at runtime */
+ LLVMValueRef tmp;
+ assert(mask->break_mask);
+ tmp = LLVMBuildAnd(mask->bld->builder,
+ mask->cont_mask,
+ mask->break_mask,
+ "maskcb");
+ mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cond_mask,
+ tmp,
+ "maskfull");
+ } else
+ mask->exec_mask = mask->cond_mask;
+
+
+ mask->has_mask = (mask->cond_stack_size > 0 ||
+ mask->loop_stack_size > 0);
}
static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
@@ -190,6 +229,104 @@ static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
lp_exec_mask_update(mask);
}
+static void lp_exec_bgnloop(struct lp_exec_mask *mask)
+{
+
+ if (mask->cont_stack_size == 0)
+ mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
+ if (mask->break_stack_size == 0)
+ mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
+ if (mask->cond_stack_size == 0)
+ mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
+
+ mask->break_stack[mask->break_stack_size++] = mask->break_mask;
+ mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
+ mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
+ mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
+ LLVMBuildBr(mask->bld->builder, mask->loop_block);
+ LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_break(struct lp_exec_mask *mask)
+{
+ LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
+ mask->exec_mask,
+ "break");
+
+ /* mask->break_stack_size > 1 implies that we encountered a break
+ * statemant already and if that's the case we want to make sure
+ * our mask is a combination of the previous break and the current
+ * execution mask */
+ if (mask->break_stack_size > 1) {
+ mask->break_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->break_mask,
+ exec_mask, "break_full");
+ } else
+ mask->break_mask = exec_mask;
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_continue(struct lp_exec_mask *mask)
+{
+ LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
+ mask->exec_mask,
+ "");
+
+ if (mask->cont_stack_size > 1) {
+ mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cont_mask,
+ exec_mask, "");
+ } else
+ mask->cont_mask = exec_mask;
+
+ lp_exec_mask_update(mask);
+}
+
+
+static void lp_exec_endloop(struct lp_exec_mask *mask)
+{
+ LLVMBasicBlockRef endloop;
+ LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
+ mask->bld->type.length);
+ LLVMValueRef i1cond;
+
+ assert(mask->break_mask);
+
+ /* i1cond = (mask == 0) */
+ i1cond = LLVMBuildICmp(
+ mask->bld->builder,
+ LLVMIntNE,
+ LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
+ LLVMConstNull(reg_type), "");
+
+ endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
+
+ LLVMBuildCondBr(mask->bld->builder,
+ i1cond, mask->loop_block, endloop);
+
+ LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
+
+ mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
+ /* pop the cont mask */
+ if (mask->cont_stack_size) {
+ mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
+ }
+ /* pop the break mask */
+ if (mask->break_stack_size) {
+ mask->break_mask = mask->break_stack[--mask->break_stack_size];
+ }
+
+ lp_exec_mask_update(mask);
+}
+
+/* stores val into an address pointed to by dst.
+ * mask->exec_mask is used to figure out which bits of val
+ * should be stored into the address
+ * (0 means don't store this bit, 1 means do store).
+ */
static void lp_exec_mask_store(struct lp_exec_mask *mask,
LLVMValueRef val,
LLVMValueRef dst)
@@ -227,6 +364,23 @@ emit_ddy(struct lp_build_tgsi_soa_context *bld,
return lp_build_sub(&bld->base, src_top, src_bottom);
}
+static LLVMValueRef
+get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
+ unsigned index,
+ unsigned swizzle,
+ boolean is_indirect,
+ LLVMValueRef addr)
+{
+ if (!bld->has_indirect_addressing) {
+ return bld->temps[index][swizzle];
+ } else {
+ LLVMValueRef lindex =
+ LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
+ if (is_indirect)
+ lindex = lp_build_add(&bld->base, lindex, addr);
+ return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
+ }
+}
/**
* Register fetch.
@@ -241,6 +395,7 @@ emit_fetch(
const struct tgsi_full_src_register *reg = &inst->Src[index];
unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
LLVMValueRef res;
+ LLVMValueRef addr;
switch (swizzle) {
case TGSI_SWIZZLE_X:
@@ -248,11 +403,34 @@ emit_fetch(
case TGSI_SWIZZLE_Z:
case TGSI_SWIZZLE_W:
+ if (reg->Register.Indirect) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
+ unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
+ addr = LLVMBuildLoad(bld->base.builder,
+ bld->addr[reg->Indirect.Index][swizzle],
+ "");
+ /* for indexing we want integers */
+ addr = LLVMBuildFPToSI(bld->base.builder, addr,
+ int_vec_type, "");
+ addr = LLVMBuildExtractElement(bld->base.builder,
+ addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
+ "");
+ addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ }
+
switch (reg->Register.File) {
case TGSI_FILE_CONSTANT: {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
- LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
- LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+ LLVMValueRef scalar, scalar_ptr;
+
+ if (reg->Register.Indirect) {
+ /*lp_build_printf(bld->base.builder,
+ "\taddr = %d\n", addr);*/
+ index = lp_build_add(&bld->base, index, addr);
+ }
+ scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
+ scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
res = lp_build_broadcast_scalar(&bld->base, scalar);
break;
}
@@ -267,11 +445,16 @@ emit_fetch(
assert(res);
break;
- case TGSI_FILE_TEMPORARY:
- res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
+ case TGSI_FILE_TEMPORARY: {
+ LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+ swizzle,
+ reg->Register.Indirect,
+ addr);
+ res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
if(!res)
return bld->base.undef;
break;
+ }
default:
assert( 0 );
@@ -349,6 +532,7 @@ emit_store(
LLVMValueRef value)
{
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
+ LLVMValueRef addr;
switch( inst->Instruction.Saturate ) {
case TGSI_SAT_NONE:
@@ -360,7 +544,7 @@ emit_store(
break;
case TGSI_SAT_MINUS_PLUS_ONE:
- value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
+ value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
value = lp_build_min(&bld->base, value, bld->base.one);
break;
@@ -368,18 +552,42 @@ emit_store(
assert(0);
}
+ if (reg->Register.Indirect) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
+ unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
+ addr = LLVMBuildLoad(bld->base.builder,
+ bld->addr[reg->Indirect.Index][swizzle],
+ "");
+ /* for indexing we want integers */
+ addr = LLVMBuildFPToSI(bld->base.builder, addr,
+ int_vec_type, "");
+ addr = LLVMBuildExtractElement(bld->base.builder,
+ addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
+ "");
+ addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ }
+
switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
lp_exec_mask_store(&bld->exec_mask, value,
bld->outputs[reg->Register.Index][chan_index]);
break;
- case TGSI_FILE_TEMPORARY:
- lp_exec_mask_store(&bld->exec_mask, value,
- bld->temps[reg->Register.Index][chan_index]);
+ case TGSI_FILE_TEMPORARY: {
+ LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+ chan_index,
+ reg->Register.Indirect,
+ addr);
+ lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
break;
+ }
case TGSI_FILE_ADDRESS:
+ lp_exec_mask_store(&bld->exec_mask, value,
+ bld->addr[reg->Indirect.Index][chan_index]);
+ break;
+
+ case TGSI_FILE_PREDICATE:
/* FIXME */
assert(0);
break;
@@ -456,6 +664,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
}
+/**
+ * Kill fragment if any of the src register values are negative.
+ */
static void
emit_kil(
struct lp_build_tgsi_soa_context *bld,
@@ -486,6 +697,9 @@ emit_kil(
if(terms[chan_index]) {
LLVMValueRef chan_mask;
+ /*
+ * If term < 0 then mask = 0 else mask = ~0.
+ */
chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
if(mask)
@@ -501,26 +715,28 @@ emit_kil(
/**
- * Check if inst src/dest regs use indirect addressing into temporary
- * register file.
+ * Predicated fragment kill.
+ * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
+ * The only predication is the execution mask which will apply if
+ * we're inside a loop or conditional.
*/
-static boolean
-indirect_temp_reference(const struct tgsi_full_instruction *inst)
+static void
+emit_kilp(struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst)
{
- uint i;
- for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *reg = &inst->Src[i];
- if (reg->Register.File == TGSI_FILE_TEMPORARY &&
- reg->Register.Indirect)
- return TRUE;
+ LLVMValueRef mask;
+
+ /* For those channels which are "alive", disable fragment shader
+ * execution.
+ */
+ if (bld->exec_mask.has_mask) {
+ mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
}
- for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
- const struct tgsi_full_dst_register *reg = &inst->Dst[i];
- if (reg->Register.File == TGSI_FILE_TEMPORARY &&
- reg->Register.Indirect)
- return TRUE;
+ else {
+ mask = bld->base.zero;
}
- return FALSE;
+
+ lp_build_mask_update(bld->mask, mask);
}
static int
@@ -528,39 +744,54 @@ emit_declaration(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_declaration *decl)
{
+ LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
+
unsigned first = decl->Range.First;
unsigned last = decl->Range.Last;
unsigned idx, i;
for (idx = first; idx <= last; ++idx) {
- boolean ok;
-
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
- for (i = 0; i < NUM_CHANNELS; i++)
- bld->temps[idx][i] = lp_build_alloca(&bld->base);
- ok = TRUE;
+ if (bld->has_indirect_addressing) {
+ LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
+ last*4 + 4, 0);
+ bld->temps_array = lp_build_array_alloca(bld->base.builder,
+ vec_type, val, "");
+ } else {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ }
break;
case TGSI_FILE_OUTPUT:
for (i = 0; i < NUM_CHANNELS; i++)
- bld->outputs[idx][i] = lp_build_alloca(&bld->base);
- ok = TRUE;
+ bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
break;
default:
/* don't need to declare other vars */
- ok = TRUE;
+ break;
}
-
- if (!ok)
- return FALSE;
}
return TRUE;
}
-static int
+
+/**
+ * Emit LLVM for one TGSI instruction.
+ * \param return TRUE for success, FALSE otherwise
+ */
+static boolean
emit_instruction(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
@@ -577,9 +808,16 @@ emit_instruction(
LLVMValueRef res;
LLVMValueRef dst0[NUM_CHANNELS];
- /* we can't handle indirect addressing into temp register file yet */
- if (indirect_temp_reference(inst))
- return FALSE;
+ /*
+ * Stores and write masks are handled in a general fashion after the long
+ * instruction opcode switch statement.
+ *
+ * Although not stricitly necessary, we avoid generating instructions for
+ * channels which won't be stored, in cases where's that easy. For some
+ * complex instructions, like texture sampling, it is more convenient to
+ * assume a full writemask and then let LLVM optimization passes eliminate
+ * redundant code.
+ */
assert(info->num_dst <= 1);
if(info->num_dst) {
@@ -589,17 +827,13 @@ emit_instruction(
}
switch (inst->Instruction.Opcode) {
-#if 0
case TGSI_OPCODE_ARL:
- /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_flr(bld, 0, 0);
- emit_f2it( bld, 0 );
+ tmp0 = lp_build_floor(&bld->base, tmp0);
dst0[chan_index] = tmp0;
}
break;
-#endif
case TGSI_OPCODE_MOV:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
@@ -865,7 +1099,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
src2 = emit_fetch( bld, inst, 2, chan_index );
- tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
+ tmp1 = lp_build_const_vec(bld->base.type, 0.5);
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
}
@@ -997,7 +1231,7 @@ emit_instruction(
case TGSI_OPCODE_RCC:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
case TGSI_OPCODE_DPH:
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
@@ -1040,8 +1274,7 @@ emit_instruction(
case TGSI_OPCODE_KILP:
/* predicated kill */
- /* FIXME */
- return 0;
+ emit_kilp( bld, inst );
break;
case TGSI_OPCODE_KIL:
@@ -1050,23 +1283,23 @@ emit_instruction(
break;
case TGSI_OPCODE_PK2H:
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_PK2US:
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_PK4B:
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_PK4UB:
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_RFL:
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_SEQ:
@@ -1126,77 +1359,72 @@ emit_instruction(
break;
case TGSI_OPCODE_TEX:
- /* XXX what about dst0 writemask? */
emit_tex( bld, inst, FALSE, FALSE, dst0 );
break;
case TGSI_OPCODE_TXD:
/* FIXME */
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_UP2H:
/* deprecated */
assert (0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_UP2US:
/* deprecated */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_UP4B:
/* deprecated */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_UP4UB:
/* deprecated */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_X2D:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_ARA:
/* deprecated */
assert(0);
- return 0;
+ return FALSE;
break;
-#if 0
case TGSI_OPCODE_ARR:
- /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_rnd( bld, 0, 0 );
- emit_f2it( bld, 0 );
+ tmp0 = lp_build_round(&bld->base, tmp0);
dst0[chan_index] = tmp0;
}
break;
-#endif
case TGSI_OPCODE_BRA:
/* deprecated */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_CAL:
/* FIXME */
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_RET:
/* FIXME */
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_END:
@@ -1326,7 +1554,7 @@ emit_instruction(
case TGSI_OPCODE_DIV:
/* deprecated */
assert( 0 );
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_DP2:
@@ -1349,27 +1577,20 @@ emit_instruction(
case TGSI_OPCODE_TXP:
emit_tex( bld, inst, FALSE, TRUE, dst0 );
break;
-
+
case TGSI_OPCODE_BRK:
- /* FIXME */
- return 0;
+ lp_exec_break(&bld->exec_mask);
break;
case TGSI_OPCODE_IF:
tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
+ tmp0, bld->base.zero);
lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
break;
- case TGSI_OPCODE_BGNFOR:
- /* deprecated */
- assert(0);
- return 0;
- break;
-
- case TGSI_OPCODE_REP:
- /* deprecated */
- assert(0);
- return 0;
+ case TGSI_OPCODE_BGNLOOP:
+ lp_exec_bgnloop(&bld->exec_mask);
break;
case TGSI_OPCODE_ELSE:
@@ -1380,28 +1601,20 @@ emit_instruction(
lp_exec_mask_cond_pop(&bld->exec_mask);
break;
- case TGSI_OPCODE_ENDFOR:
- /* deprecated */
- assert(0);
- return 0;
- break;
-
- case TGSI_OPCODE_ENDREP:
- /* deprecated */
- assert(0);
- return 0;
+ case TGSI_OPCODE_ENDLOOP:
+ lp_exec_endloop(&bld->exec_mask);
break;
case TGSI_OPCODE_PUSHA:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_POPA:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_CEIL:
@@ -1414,13 +1627,13 @@ emit_instruction(
case TGSI_OPCODE_I2F:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_NOT:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_TRUNC:
@@ -1433,75 +1646,74 @@ emit_instruction(
case TGSI_OPCODE_SHL:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_ISHR:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_AND:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_OR:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_MOD:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_XOR:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_SAD:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_TXF:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_TXQ:
/* deprecated? */
assert(0);
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_CONT:
- /* FIXME */
- return 0;
+ lp_exec_continue(&bld->exec_mask);
break;
case TGSI_OPCODE_EMIT:
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_ENDPRIM:
- return 0;
+ return FALSE;
break;
case TGSI_OPCODE_NOP:
break;
default:
- return 0;
+ return FALSE;
}
if(info->num_dst) {
@@ -1510,7 +1722,7 @@ emit_instruction(
}
}
- return 1;
+ return TRUE;
}
@@ -1523,7 +1735,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[NUM_CHANNELS],
LLVMValueRef (*outputs)[NUM_CHANNELS],
- struct lp_build_sampler_soa *sampler)
+ struct lp_build_sampler_soa *sampler,
+ struct tgsi_shader_info *info)
{
struct lp_build_tgsi_soa_context bld;
struct tgsi_parse_context parse;
@@ -1539,6 +1752,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.sampler = sampler;
+ bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
+ info->opcode_count[TGSI_OPCODE_ARL] > 0;
lp_exec_mask_init(&bld.exec_mask, &bld.base);
@@ -1559,10 +1774,10 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
case TGSI_TOKEN_TYPE_INSTRUCTION:
{
unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
- const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
- if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
+ const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
+ if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
_debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
- info ? info->mnemonic : "<invalid>");
+ opcode_info->mnemonic);
}
break;
@@ -1575,7 +1790,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
assert(num_immediates < LP_MAX_IMMEDIATES);
for( i = 0; i < size; ++i )
bld.immediates[num_immediates][i] =
- lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
+ lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
for( i = size; i < 4; ++i )
bld.immediates[num_immediates][i] = bld.base.undef;
num_immediates++;
@@ -1589,7 +1804,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
assert( 0 );
}
}
-
+ if (0) {
+ LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(block);
+ debug_printf("11111111111111111111111111111 \n");
+ tgsi_dump(tokens, 0);
+ LLVMDumpValue(function);
+ debug_printf("2222222222222222222222222222 \n");
+ }
tgsi_parse_free( &parse );
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c
index c327ba045a6..796af88caad 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c
@@ -58,7 +58,10 @@ LLVMTypeRef
lp_build_vec_type(struct lp_type type)
{
LLVMTypeRef elem_type = lp_build_elem_type(type);
- return LLVMVectorType(elem_type, type.length);
+ if (type.length == 1)
+ return elem_type;
+ else
+ return LLVMVectorType(elem_type, type.length);
}
@@ -115,6 +118,9 @@ lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type)
if(!vec_type)
return FALSE;
+ if (type.length == 1)
+ return lp_check_elem_type(type, vec_type);
+
if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind)
return FALSE;
@@ -153,7 +159,10 @@ LLVMTypeRef
lp_build_int_vec_type(struct lp_type type)
{
LLVMTypeRef elem_type = lp_build_int_elem_type(type);
- return LLVMVectorType(elem_type, type.length);
+ if (type.length == 1)
+ return elem_type;
+ else
+ return LLVMVectorType(elem_type, type.length);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 16946cc28a2..cd59d2faa66 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -37,9 +37,9 @@
#define LP_BLD_TYPE_H
-#include <llvm-c/Core.h>
+#include "pipe/p_compiler.h"
+#include "gallivm/lp_bld.h"
-#include <pipe/p_compiler.h>
/**
@@ -103,7 +103,7 @@ struct lp_type {
unsigned width:14;
/**
- * Vector length.
+ * Vector length. If length==1, this is a scalar (float/int) type.
*
* width*length should be a power of two greater or equal to eight.
*
@@ -139,6 +139,7 @@ struct lp_build_context
};
+/** Create scalar float type */
static INLINE struct lp_type
lp_type_float(unsigned width)
{
@@ -148,12 +149,29 @@ lp_type_float(unsigned width)
res_type.floating = TRUE;
res_type.sign = TRUE;
res_type.width = width;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/** Create vector of float type */
+static INLINE struct lp_type
+lp_type_float_vec(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.floating = TRUE;
+ res_type.sign = TRUE;
+ res_type.width = width;
res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
return res_type;
}
+/** Create scalar int type */
static INLINE struct lp_type
lp_type_int(unsigned width)
{
@@ -162,12 +180,28 @@ lp_type_int(unsigned width)
memset(&res_type, 0, sizeof res_type);
res_type.sign = TRUE;
res_type.width = width;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/** Create vector int type */
+static INLINE struct lp_type
+lp_type_int_vec(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.width = width;
res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
return res_type;
}
+/** Create scalar uint type */
static INLINE struct lp_type
lp_type_uint(unsigned width)
{
@@ -175,6 +209,20 @@ lp_type_uint(unsigned width)
memset(&res_type, 0, sizeof res_type);
res_type.width = width;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/** Create vector uint type */
+static INLINE struct lp_type
+lp_type_uint_vec(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = width;
res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
return res_type;
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index a04df4106f2..c09e8a7a76f 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -40,7 +40,7 @@
#include "util/u_debug.h" /* for assert */
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
#include <pthread.h> /* POSIX threads headers */
#include <stdio.h> /* for perror() */
@@ -257,7 +257,7 @@ typedef unsigned pipe_condvar;
* pipe_barrier
*/
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
typedef pthread_barrier_t pipe_barrier;
@@ -299,22 +299,50 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
#else
-typedef unsigned pipe_barrier;
+typedef struct {
+ unsigned count;
+ unsigned waiters;
+ uint64_t sequence;
+ pipe_mutex mutex;
+ pipe_condvar condvar;
+} pipe_barrier;
static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
{
- /* XXX we could implement barriers with a mutex and condition var */
- assert(0);
+ barrier->count = count;
+ barrier->waiters = 0;
+ barrier->sequence = 0;
+ pipe_mutex_init(barrier->mutex);
+ pipe_condvar_init(barrier->condvar);
}
static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
{
- assert(0);
+ assert(barrier->waiters == 0);
+ pipe_mutex_destroy(barrier->mutex);
+ pipe_condvar_destroy(barrier->condvar);
}
static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
{
- assert(0);
+ pipe_mutex_lock(barrier->mutex);
+
+ assert(barrier->waiters < barrier->count);
+ barrier->waiters++;
+
+ if (barrier->waiters < barrier->count) {
+ uint64_t sequence = barrier->sequence;
+
+ do {
+ pipe_condvar_wait(barrier->condvar, barrier->mutex);
+ } while (sequence == barrier->sequence);
+ } else {
+ barrier->waiters = 0;
+ barrier->sequence++;
+ pipe_condvar_broadcast(barrier->condvar);
+ }
+
+ pipe_mutex_unlock(barrier->mutex);
}
@@ -377,7 +405,7 @@ pipe_semaphore_wait(pipe_semaphore *sema)
*/
typedef struct {
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
pthread_key_t key;
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
DWORD key;
@@ -392,7 +420,7 @@ typedef struct {
static INLINE void
pipe_tsd_init(pipe_tsd *tsd)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
if (pthread_key_create(&tsd->key, NULL/*free*/) != 0) {
perror("pthread_key_create(): failed to allocate key for thread specific data");
exit(-1);
@@ -409,7 +437,7 @@ pipe_tsd_get(pipe_tsd *tsd)
if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
pipe_tsd_init(tsd);
}
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
return pthread_getspecific(tsd->key);
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
assert(0);
@@ -426,7 +454,7 @@ pipe_tsd_set(pipe_tsd *tsd, void *value)
if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
pipe_tsd_init(tsd);
}
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED)
if (pthread_setspecific(tsd->key, value) != 0) {
perror("pthread_set_specific() failed");
exit(-1);
diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
index 5b55c1b3747..7e0f67a76b0 100644
--- a/src/gallium/auxiliary/os/os_time.h
+++ b/src/gallium/auxiliary/os/os_time.h
@@ -71,7 +71,7 @@ os_time_sleep(int64_t usecs);
/*
* Helper function for detecting time outs, taking in account overflow.
*
- * Returns true the the current time has elapsed beyond the specified interval.
+ * Returns true if the current time has elapsed beyond the specified interval.
*/
static INLINE boolean
os_time_timeout(int64_t start,
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 34b1b77df40..a6c50dcf0c1 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -48,7 +48,6 @@
#include "util/u_debug.h"
#include "util/u_inlines.h"
#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
#ifdef __cplusplus
@@ -58,8 +57,23 @@ extern "C" {
struct pb_vtbl;
struct pb_validate;
+struct pipe_fence_handle;
+#define PB_USAGE_CPU_READ (1 << 0)
+#define PB_USAGE_CPU_WRITE (1 << 1)
+#define PB_USAGE_GPU_READ (1 << 2)
+#define PB_USAGE_GPU_WRITE (1 << 3)
+#define PB_USAGE_UNSYNCHRONIZED (1 << 10)
+#define PB_USAGE_DONTBLOCK (1 << 9)
+
+#define PB_USAGE_CPU_READ_WRITE \
+ ( PB_USAGE_CPU_READ | PB_USAGE_CPU_WRITE )
+#define PB_USAGE_GPU_READ_WRITE \
+ ( PB_USAGE_GPU_READ | PB_USAGE_GPU_WRITE )
+#define PB_USAGE_WRITE \
+ ( PB_USAGE_CPU_WRITE | PB_USAGE_GPU_WRITE )
+
/**
* Buffer description.
*
@@ -83,7 +97,14 @@ typedef unsigned pb_size;
*/
struct pb_buffer
{
- struct pipe_buffer base;
+ /* This used to be a pipe_buffer struct:
+ */
+ struct {
+ struct pipe_reference reference;
+ unsigned size;
+ unsigned alignment;
+ unsigned usage;
+ } base;
/**
* Pointer to the virtual function table.
@@ -106,7 +127,7 @@ struct pb_vtbl
/**
* Map the entire data store of a buffer object into the client's address.
- * flags is bitmask of PIPE_BUFFER_FLAG_READ/WRITE.
+ * flags is bitmask of PB_USAGE_CPU_READ/WRITE.
*/
void *(*map)( struct pb_buffer *buf,
unsigned flags );
@@ -138,23 +159,6 @@ struct pb_vtbl
};
-static INLINE struct pipe_buffer *
-pb_pipe_buffer( struct pb_buffer *pbuf )
-{
- assert(pbuf);
- return &pbuf->base;
-}
-
-
-static INLINE struct pb_buffer *
-pb_buffer( struct pipe_buffer *buf )
-{
- assert(buf);
- /* Could add a magic cookie check on debug builds.
- */
- return (struct pb_buffer *)buf;
-}
-
/* Accessor functions for pb->vtbl:
*/
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index d97f749b6ed..d6cf6405825 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -135,7 +135,7 @@ struct fenced_buffer
void *data;
/**
- * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current
+ * A bitmask of PB_USAGE_CPU/GPU_READ/WRITE describing the current
* buffer usage.
*/
unsigned flags;
@@ -270,7 +270,7 @@ fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
assert(pipe_is_referenced(&fenced_buf->base.base.reference));
- assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+ assert(fenced_buf->flags & PB_USAGE_GPU_READ_WRITE);
assert(fenced_buf->fence);
p_atomic_inc(&fenced_buf->base.base.reference.count);
@@ -299,7 +299,7 @@ fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
assert(fenced_buf->mgr == fenced_mgr);
ops->fence_reference(ops, &fenced_buf->fence, NULL);
- fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+ fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE;
assert(fenced_buf->head.prev);
assert(fenced_buf->head.next);
@@ -377,7 +377,7 @@ fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
assert(!destroyed);
- fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+ fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE;
ret = PIPE_OK;
}
@@ -624,7 +624,7 @@ fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf)
assert(fenced_buf->data);
assert(fenced_buf->buffer);
- map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+ map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE);
if(!map)
return PIPE_ERROR;
@@ -644,7 +644,7 @@ fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf)
assert(fenced_buf->data);
assert(fenced_buf->buffer);
- map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ);
if(!map)
return PIPE_ERROR;
@@ -683,24 +683,24 @@ fenced_buffer_map(struct pb_buffer *buf,
pipe_mutex_lock(fenced_mgr->mutex);
- assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE));
+ assert(!(flags & PB_USAGE_GPU_READ_WRITE));
/*
* Serialize writes.
*/
- while((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) ||
- ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) &&
- (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) {
+ while((fenced_buf->flags & PB_USAGE_GPU_WRITE) ||
+ ((fenced_buf->flags & PB_USAGE_GPU_READ) &&
+ (flags & PB_USAGE_CPU_WRITE))) {
/*
* Don't wait for the GPU to finish accessing it, if blocking is forbidden.
*/
- if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) &&
+ if((flags & PB_USAGE_DONTBLOCK) &&
ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) {
goto done;
}
- if (flags & PIPE_BUFFER_USAGE_UNSYNCHRONIZED) {
+ if (flags & PB_USAGE_UNSYNCHRONIZED) {
break;
}
@@ -721,7 +721,7 @@ fenced_buffer_map(struct pb_buffer *buf,
if(map) {
++fenced_buf->mapcount;
- fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ fenced_buf->flags |= flags & PB_USAGE_CPU_READ_WRITE;
}
done:
@@ -745,7 +745,7 @@ fenced_buffer_unmap(struct pb_buffer *buf)
pb_unmap(fenced_buf->buffer);
--fenced_buf->mapcount;
if(!fenced_buf->mapcount)
- fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ fenced_buf->flags &= ~PB_USAGE_CPU_READ_WRITE;
}
pipe_mutex_unlock(fenced_mgr->mutex);
@@ -771,9 +771,9 @@ fenced_buffer_validate(struct pb_buffer *buf,
goto done;
}
- assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
- assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE));
- flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+ assert(flags & PB_USAGE_GPU_READ_WRITE);
+ assert(!(flags & ~PB_USAGE_GPU_READ_WRITE));
+ flags &= PB_USAGE_GPU_READ_WRITE;
/* Buffer cannot be validated in two different lists */
if(fenced_buf->vl && fenced_buf->vl != vl) {
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
index 0372f81d0a1..004c2b939a2 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
@@ -59,7 +59,6 @@ extern "C" {
#endif
-struct pipe_buffer;
struct pipe_fence_handle;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index 6bdce5fcb06..b706f429be5 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -135,9 +135,9 @@ pb_malloc_buffer_create(pb_size size,
return NULL;
pipe_reference_init(&buf->base.base.reference, 1);
- buf->base.base.alignment = desc->alignment;
buf->base.base.usage = desc->usage;
buf->base.base.size = size;
+ buf->base.base.alignment = desc->alignment;
buf->base.vtbl = &malloc_buffer_vtbl;
buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index 06669917ff6..cec2524da2b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -60,7 +60,6 @@ extern "C" {
struct pb_desc;
-struct pipe_buffer;
/**
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 86f9266c95f..88501e8d72d 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -227,6 +227,8 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
pb_size size,
const struct pb_desc *desc)
{
+ void *map;
+
if(buf->base.base.size < size)
return FALSE;
@@ -239,6 +241,13 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
if(!pb_check_usage(desc->usage, buf->base.base.usage))
return FALSE;
+
+ map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK);
+ if (!map) {
+ return FALSE;
+ }
+
+ pb_unmap(buf->buffer);
return TRUE;
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index a5dbded2bce..0dc5b31a754 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -158,7 +158,7 @@ pb_debug_buffer_fill(struct pb_debug_buffer *buf)
{
uint8_t *map;
- map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+ map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE);
assert(map);
if(map) {
fill_random_pattern(map, buf->underflow_size);
@@ -180,8 +180,8 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf)
uint8_t *map;
map = pb_map(buf->buffer,
- PIPE_BUFFER_USAGE_CPU_READ |
- PIPE_BUFFER_USAGE_UNSYNCHRONIZED);
+ PB_USAGE_CPU_READ |
+ PB_USAGE_UNSYNCHRONIZED);
assert(map);
if(map) {
boolean underflow, overflow;
@@ -382,8 +382,8 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr,
real_size = mgr->underflow_size + size + mgr->overflow_size;
real_desc = *desc;
- real_desc.usage |= PIPE_BUFFER_USAGE_CPU_WRITE;
- real_desc.usage |= PIPE_BUFFER_USAGE_CPU_READ;
+ real_desc.usage |= PB_USAGE_CPU_WRITE;
+ real_desc.usage |= PB_USAGE_CPU_READ;
buf->buffer = mgr->provider->create_buffer(mgr->provider,
real_size,
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index 63195715d68..faf7c352674 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -268,8 +268,8 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
mm->buffer = buffer;
mm->map = pb_map(mm->buffer,
- PIPE_BUFFER_USAGE_CPU_READ |
- PIPE_BUFFER_USAGE_CPU_WRITE);
+ PB_USAGE_CPU_READ |
+ PB_USAGE_CPU_WRITE);
if(!mm->map)
goto failure;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
index cb32d251367..31f1ebbeb7c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
@@ -150,7 +150,7 @@ pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf)
if(!buf->buffer)
return PIPE_ERROR_OUT_OF_MEMORY;
- map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ map = pb_map(buf->buffer, PB_USAGE_CPU_READ);
if(!map) {
pb_reference(&buf->buffer, NULL);
return PIPE_ERROR;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
index fea234ae8c7..fdcce428784 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
@@ -284,8 +284,8 @@ pool_bufmgr_create(struct pb_manager *provider,
goto failure;
pool->map = pb_map(pool->buffer,
- PIPE_BUFFER_USAGE_CPU_READ |
- PIPE_BUFFER_USAGE_CPU_WRITE);
+ PB_USAGE_CPU_READ |
+ PB_USAGE_CPU_WRITE);
if(!pool->map)
goto failure;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 24e2820f881..7a3305aaf37 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -315,8 +315,8 @@ pb_slab_create(struct pb_slab_manager *mgr)
/* Note down the slab virtual address. All mappings are accessed directly
* through this address so it is required that the buffer is pinned. */
slab->virtual = pb_map(slab->bo,
- PIPE_BUFFER_USAGE_CPU_READ |
- PIPE_BUFFER_USAGE_CPU_WRITE);
+ PB_USAGE_CPU_READ |
+ PB_USAGE_CPU_WRITE);
if(!slab->virtual) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out_err1;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c
index 903afc749d3..b585422460b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_validate.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c
@@ -69,9 +69,9 @@ pb_validate_add_buffer(struct pb_validate *vl,
if(!buf)
return PIPE_ERROR;
- assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
- assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE));
- flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+ assert(flags & PB_USAGE_GPU_READ_WRITE);
+ assert(!(flags & ~PB_USAGE_GPU_READ_WRITE));
+ flags &= PB_USAGE_GPU_READ_WRITE;
/* We only need to store one reference for each buffer, so avoid storing
* consecutive references for the same buffer. It might not be the most
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index f675427d987..7595214bdf2 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -87,7 +87,7 @@ void x86_print_reg( struct x86_reg reg )
foo++; \
if (*foo) \
foo++; \
- debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \
+ debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \
} while (0)
#define DUMP_I( I ) do { \
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index f7612d416a0..319b836ffb1 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -102,7 +102,7 @@ enum sse_cc {
#define cc_Z cc_E
#define cc_NZ cc_NE
-/* Begin/end/retreive function creation:
+/* Begin/end/retrieve function creation:
*/
@@ -311,8 +311,8 @@ void x87_fucom( struct x86_function *p, struct x86_reg arg );
-/* Retreive a reference to one of the function arguments, taking into
- * account any push/pop activity. Note - doesn't track explict
+/* Retrieve a reference to one of the function arguments, taking into
+ * account any push/pop activity. Note - doesn't track explicit
* manipulation of ESP by other instructions.
*/
struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
diff --git a/src/gallium/auxiliary/util/u_timed_winsys.h b/src/gallium/auxiliary/target-helpers/wrap_screen.c
index 542365112d7..eb475123198 100644
--- a/src/gallium/auxiliary/util/u_timed_winsys.h
+++ b/src/gallium/auxiliary/target-helpers/wrap_screen.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,17 +25,40 @@
*
*
**************************************************************************/
+
/*
- * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com>
+ * Authors:
+ * Keith Whitwell
+ */
+
+#include "target-helpers/wrap_screen.h"
+#include "trace/tr_public.h"
+#include "identity/id_public.h"
+#include "util/u_debug.h"
+
+
+/* Centralized code to inject common wrapping layers:
*/
+struct pipe_screen *
+gallium_wrap_screen( struct pipe_screen *screen )
+{
+ /* Screen wrapping functions are required not to fail. If it is
+ * impossible to wrap a screen, the unwrapped screen should be
+ * returned instead. Any failure condition should be returned in
+ * an OUT argument.
+ *
+ * Otherwise it is really messy trying to clean up in this code.
+ */
+ if (debug_get_bool_option("GALLIUM_WRAP", FALSE)) {
+ screen = identity_screen_create(screen);
+ }
+ /* Trace does its own checking if it should run */
+ screen = trace_screen_create(screen);
-#ifndef U_TIMED_WINSYS_H
-#define U_TIMED_WINSYS_H
+ return screen;
+}
-struct pipe_winsys;
-struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend );
-#endif
diff --git a/src/gallium/auxiliary/target-helpers/wrap_screen.h b/src/gallium/auxiliary/target-helpers/wrap_screen.h
new file mode 100644
index 00000000000..7e76beb7c5a
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/wrap_screen.h
@@ -0,0 +1,16 @@
+#ifndef WRAP_SCREEN_HELPER_H
+#define WRAP_SCREEN_HELPER_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_screen;
+
+/* Centralized code to inject common wrapping layers. Other layers
+ * can be introduced by specific targets, but these are the generally
+ * helpful ones we probably want everywhere.
+ */
+struct pipe_screen *
+gallium_wrap_screen( struct pipe_screen *screen );
+
+
+#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index 080fd4c7310..5d9eed92580 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -661,25 +661,6 @@ TGSI Instruction Specification
TBD
-1.9.8 BGNFOR - Begin a For-Loop
-
- dst.x = floor(src.x)
- dst.y = floor(src.y)
- dst.z = floor(src.z)
-
- if (dst.y <= 0)
- pc = [matching ENDFOR] + 1
- endif
-
- Note: The destination must be a loop register.
- The source must be a constant register.
-
-
-1.9.9 REP - Repeat
-
- TBD
-
-
1.9.10 ELSE - Else
TBD
@@ -690,23 +671,6 @@ TGSI Instruction Specification
TBD
-1.9.12 ENDFOR - End a For-Loop
-
- dst.x = dst.x + dst.z
- dst.y = dst.y - 1.0
-
- if (dst.y > 0)
- pc = [matching BGNFOR instruction] + 1
- endif
-
- Note: The destination must be a loop register.
-
-
-1.9.13 ENDREP - End Repeat
-
- TBD
-
-
1.10 GL_NV_vertex_program3
---------------------------
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 57031419f8e..83000200189 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -586,7 +586,6 @@ iter_instruction(
/* update indentation */
if (inst->Instruction.Opcode == TGSI_OPCODE_IF ||
inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
- inst->Instruction.Opcode == TGSI_OPCODE_BGNFOR ||
inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
ctx->indentation += indent_spaces;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index f853ea2820e..82eac05dc4d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1796,6 +1796,12 @@ exec_declaration(struct tgsi_exec_machine *mach,
last = decl->Range.Last;
mask = decl->Declaration.UsageMask;
+ /* XXX we could remove this special-case code since
+ * mach->InterpCoefs[first].a0 should already have the
+ * front/back-face value. But we should first update the
+ * ureg code to emit the right UsageMask value (WRITEMASK_X).
+ * Then, we could remove the tgsi_exec_machine::Face field.
+ */
if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
uint i;
@@ -3180,14 +3186,6 @@ exec_instruction(
*pc = -1;
break;
- case TGSI_OPCODE_REP:
- assert (0);
- break;
-
- case TGSI_OPCODE_ENDREP:
- assert (0);
- break;
-
case TGSI_OPCODE_PUSHA:
assert (0);
break;
@@ -3252,29 +3250,6 @@ exec_instruction(
emit_primitive(mach);
break;
- case TGSI_OPCODE_BGNFOR:
- assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- for (chan_index = 0; chan_index < 3; chan_index++) {
- FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
- }
- ++mach->LoopCounterStackTop;
- STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
- /* update LoopMask */
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
- mach->LoopMask &= ~0x1;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
- mach->LoopMask &= ~0x2;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
- mach->LoopMask &= ~0x4;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
- mach->LoopMask &= ~0x8;
- }
- /* TODO: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- /* fall-through (for now) */
case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
@@ -3289,56 +3264,6 @@ exec_instruction(
mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
break;
- case TGSI_OPCODE_ENDFOR:
- assert(mach->LoopCounterStackTop > 0);
- micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
- /* update LoopMask */
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
- mach->LoopMask &= ~0x1;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
- mach->LoopMask &= ~0x2;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
- mach->LoopMask &= ~0x4;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
- mach->LoopMask &= ~0x8;
- }
- micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
- assert(mach->LoopLabelStackTop > 0);
- inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
- STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
- /* Restore ContMask, but don't pop */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
- UPDATE_EXEC_MASK(mach);
- if (mach->ExecMask) {
- /* repeat loop: jump to instruction just past BGNLOOP */
- assert(mach->LoopLabelStackTop > 0);
- *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
- }
- else {
- /* exit loop: pop LoopMask */
- assert(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- /* pop ContMask */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
- assert(mach->LoopLabelStackTop > 0);
- --mach->LoopLabelStackTop;
- assert(mach->LoopCounterStackTop > 0);
- --mach->LoopCounterStackTop;
-
- mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
- }
- UPDATE_EXEC_MASK(mach);
- break;
-
case TGSI_OPCODE_ENDLOOP:
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index de0e09cdbae..cfa2f631bd8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -106,12 +106,12 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL },
{ 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK },
{ 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF },
- { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR },
- { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP },
+ { 1, 1, 0, 0, 0, 1, "", 75 }, /* removed */
+ { 0, 1, 0, 0, 0, 1, "", 76 }, /* removed */
{ 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE },
{ 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF },
- { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR },
- { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP },
+ { 1, 0, 0, 0, 1, 0, "", 79 }, /* removed */
+ { 0, 0, 0, 0, 1, 0, "", 80 }, /* removed */
{ 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA },
{ 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA },
{ 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index e4af15c156f..e472947507d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -111,12 +111,8 @@ OP12(DP2)
OP12_TEX(TXL)
OP00(BRK)
OP01_LBL(IF)
-OP11(BGNFOR)
-OP01(REP)
OP00_LBL(ELSE)
OP00(ENDIF)
-OP10(ENDFOR)
-OP00(ENDREP)
OP01(PUSHA)
OP10(POPA)
OP11(CEIL)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 371f690b295..76b7564cc36 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -346,25 +346,6 @@ iter_instruction(
}
}
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_BGNFOR:
- case TGSI_OPCODE_ENDFOR:
- if (inst->Dst[0].Register.File != TGSI_FILE_LOOP ||
- inst->Dst[0].Register.Index != 0) {
- report_error(ctx, "Destination register must be LOOP[0]");
- }
- break;
- }
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_BGNFOR:
- if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT &&
- inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) {
- report_error(ctx, "Source register file must be either CONST or IMM");
- }
- break;
- }
-
ctx->num_instructions++;
return TRUE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index a85cc4659e0..1071298b497 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2533,14 +2533,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_BGNFOR:
- return 0;
- break;
-
- case TGSI_OPCODE_REP:
- return 0;
- break;
-
case TGSI_OPCODE_ELSE:
return 0;
break;
@@ -2549,14 +2541,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_ENDFOR:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDREP:
- return 0;
- break;
-
case TGSI_OPCODE_PUSHA:
return 0;
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index f918151daaa..0b468a9184e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -55,7 +55,7 @@ static boolean is_digit_alpha_underscore( const char *cur )
return is_digit( cur ) || is_alpha_underscore( cur );
}
-static boolean uprcase( char c )
+static char uprcase( char c )
{
if (c >= 'a' && c <= 'z')
return c += 'A' - 'a';
@@ -76,7 +76,7 @@ streq_nocase_uprcase(const char *str1,
str1++;
str2++;
}
- return TRUE;
+ return *str1 == 0 && *str2 == 0;
}
static boolean str_match_no_case( const char **pcur, const char *str )
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 3d0455de7ce..f725405ade1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -1158,7 +1158,7 @@ static void emit_decl_range( struct ureg_program *ureg,
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 2;
out[0].decl.File = file;
- out[0].decl.UsageMask = 0xf;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
out[0].decl.Semantic = 0;
@@ -1180,7 +1180,7 @@ emit_decl_range2D(struct ureg_program *ureg,
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 3;
out[0].decl.File = file;
- out[0].decl.UsageMask = 0xf;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
out[0].decl.Dimension = 1;
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index 54ed2c1a4be..edd95e07882 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -76,7 +76,8 @@ struct translate {
void (*set_buffer)( struct translate *,
unsigned i,
const void *ptr,
- unsigned stride );
+ unsigned stride,
+ unsigned max_index );
void (PIPE_CDECL *run_elts)( struct translate *,
const unsigned *elts,
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index c9ec2b32bfe..a9272fbb491 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -31,6 +31,7 @@
*/
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "pipe/p_state.h"
#include "translate.h"
@@ -58,6 +59,7 @@ struct translate_generic {
char *input_ptr;
unsigned input_stride;
+ unsigned max_index;
} attrib[PIPE_MAX_ATTRIBS];
@@ -393,10 +395,10 @@ static fetch_func get_fetch_func( enum pipe_format format )
return &fetch_R8G8B8A8_SSCALED;
case PIPE_FORMAT_B8G8R8A8_UNORM:
- return &fetch_A8R8G8B8_UNORM;
+ return &fetch_B8G8R8A8_UNORM;
case PIPE_FORMAT_A8R8G8B8_UNORM:
- return &fetch_B8G8R8A8_UNORM;
+ return &fetch_A8R8G8B8_UNORM;
case PIPE_FORMAT_R32_FIXED:
return &fetch_R32_FIXED;
@@ -552,10 +554,10 @@ static emit_func get_emit_func( enum pipe_format format )
return &emit_R8G8B8A8_SSCALED;
case PIPE_FORMAT_B8G8R8A8_UNORM:
- return &emit_A8R8G8B8_UNORM;
+ return &emit_B8G8R8A8_UNORM;
case PIPE_FORMAT_A8R8G8B8_UNORM:
- return &emit_B8G8R8A8_UNORM;
+ return &emit_A8R8G8B8_UNORM;
default:
assert(0);
@@ -588,19 +590,22 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
const char *src;
+ unsigned index;
char *dst = (vert +
tg->attrib[attr].output_offset);
if (tg->attrib[attr].instance_divisor) {
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride *
- (instance_id / tg->attrib[attr].instance_divisor);
+ index = instance_id / tg->attrib[attr].instance_divisor;
} else {
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * elt;
+ index = elt;
}
+ index = MIN2(index, tg->attrib[attr].max_index);
+
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * index;
+
tg->attrib[attr].fetch( src, data );
if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
@@ -670,7 +675,8 @@ static void PIPE_CDECL generic_run( struct translate *translate,
static void generic_set_buffer( struct translate *translate,
unsigned buf,
const void *ptr,
- unsigned stride )
+ unsigned stride,
+ unsigned max_index )
{
struct translate_generic *tg = translate_generic(translate);
unsigned i;
@@ -680,6 +686,7 @@ static void generic_set_buffer( struct translate *translate,
tg->attrib[i].input_ptr = ((char *)ptr +
tg->attrib[i].input_offset);
tg->attrib[i].input_stride = stride;
+ tg->attrib[i].max_index = max_index;
}
}
}
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 03e093c11ea..ef3aa674a34 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -61,6 +61,7 @@ typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
struct translate_buffer {
const void *base_ptr;
unsigned stride;
+ unsigned max_index;
};
struct translate_buffer_varient {
@@ -336,7 +337,7 @@ static boolean translate_attr( struct translate_sse *p,
case PIPE_FORMAT_R32G32B32A32_FLOAT:
emit_load_R32G32B32A32(p, dataXMM, srcECX);
break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
break;
@@ -360,7 +361,7 @@ static boolean translate_attr( struct translate_sse *p,
case PIPE_FORMAT_R32G32B32A32_FLOAT:
emit_store_R32G32B32A32(p, dstEAX, dataXMM);
break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
break;
@@ -423,6 +424,11 @@ static boolean init_inputs( struct translate_sse *p,
} else {
x86_mov(p->func, tmp_EAX, elt);
}
+
+ /*
+ * TODO: Respect translate_buffer::max_index.
+ */
+
x86_imul(p->func, tmp_EAX, buf_stride);
x86_add(p->func, tmp_EAX, buf_base_ptr);
@@ -666,13 +672,15 @@ static boolean build_vertex_emit( struct translate_sse *p,
static void translate_sse_set_buffer( struct translate *translate,
unsigned buf,
const void *ptr,
- unsigned stride )
+ unsigned stride,
+ unsigned max_index )
{
struct translate_sse *p = (struct translate_sse *)translate;
if (buf < p->nr_buffers) {
p->buffer[buf].base_ptr = (char *)ptr;
p->buffer[buf].stride = stride;
+ p->buffer[buf].max_index = max_index;
}
if (0) debug_printf("%s %d/%d: %p %d\n",
diff --git a/src/gallium/auxiliary/util/.gitignore b/src/gallium/auxiliary/util/.gitignore
index 448d2f304fb..5dd0408effb 100644
--- a/src/gallium/auxiliary/util/.gitignore
+++ b/src/gallium/auxiliary/util/.gitignore
@@ -1,3 +1,3 @@
-u_format_access.c
+u_format_srgb.c
u_format_table.c
-u_format_pack.h
+u_half.c
diff --git a/src/gallium/auxiliary/util/u_atomic.h b/src/gallium/auxiliary/util/u_atomic.h
index 3c42477ad4f..a1568233906 100644
--- a/src/gallium/auxiliary/util/u_atomic.h
+++ b/src/gallium/auxiliary/util/u_atomic.h
@@ -29,7 +29,7 @@
#define PIPE_ATOMIC_ASM_MSVC_X86
#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86))
#define PIPE_ATOMIC_ASM_GCC_X86
-#elif defined(PIPE_CC_GCC)
+#elif defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 401)
#define PIPE_ATOMIC_GCC_INTRINSIC
#else
#error "Unsupported platform"
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 0b263a9db5c..e45310b9bb7 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -45,6 +45,7 @@
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_sampler.h"
#include "util/u_simple_shaders.h"
#include "util/u_surface.h"
#include "util/u_rect.h"
@@ -63,11 +64,12 @@ struct blit_state
struct pipe_sampler_state sampler;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
+ struct pipe_vertex_element velem[2];
void *vs;
void *fs[TGSI_WRITEMASK_XYZW + 1];
- struct pipe_buffer *vbuf; /**< quad vertices */
+ struct pipe_resource *vbuf; /**< quad vertices */
unsigned vbuf_slot;
float vertices[4][2][4]; /**< vertex/texcoords for quad */
@@ -114,6 +116,15 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->sampler.mag_img_filter = 0; /* set later */
ctx->sampler.normalized_coords = 1;
+ /* vertex elements state */
+ memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+ for (i = 0; i < 2; i++) {
+ ctx->velem[i].src_offset = i * 4 * sizeof(float);
+ ctx->velem[i].instance_divisor = 0;
+ ctx->velem[i].vertex_buffer_index = 0;
+ ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
+
/* vertex shader - still required to provide the linkage between
* fragment shader input semantics and vertex_element/buffers.
*/
@@ -156,7 +167,7 @@ util_destroy_blit(struct blit_state *ctx)
if (ctx->fs[i])
pipe->delete_fs_state(pipe, ctx->fs[i]);
- pipe_buffer_reference(&ctx->vbuf, NULL);
+ pipe_resource_reference(&ctx->vbuf, NULL);
FREE(ctx);
}
@@ -175,8 +186,7 @@ get_next_slot( struct blit_state *ctx )
if (!ctx->vbuf) {
ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
- 32,
- PIPE_BUFFER_USAGE_VERTEX,
+ PIPE_BIND_VERTEX_BUFFER,
max_slots * sizeof ctx->vertices);
}
@@ -225,7 +235,7 @@ setup_vertex_data_tex(struct blit_state *ctx,
offset = get_next_slot( ctx );
- pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf,
+ pipe_buffer_write_nooverlap(ctx->pipe, ctx->vbuf,
offset, sizeof(ctx->vertices), ctx->vertices);
return offset;
@@ -270,6 +280,7 @@ regions_overlap(int srcX0, int srcY0,
void
util_blit_pixels_writemask(struct blit_state *ctx,
struct pipe_surface *src,
+ struct pipe_sampler_view *src_sampler_view,
int srcX0, int srcY0,
int srcX1, int srcY1,
struct pipe_surface *dst,
@@ -280,7 +291,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
{
struct pipe_context *pipe = ctx->pipe;
struct pipe_screen *screen = pipe->screen;
- struct pipe_texture *tex = NULL;
+ struct pipe_sampler_view *sampler_view = NULL;
struct pipe_framebuffer_state fb;
const int srcW = abs(srcX1 - srcX0);
const int srcH = abs(srcY1 - srcY0);
@@ -292,9 +303,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
filter == PIPE_TEX_MIPFILTER_LINEAR);
assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D,
- PIPE_TEXTURE_USAGE_SAMPLER, 0));
+ PIPE_BIND_SAMPLER_VIEW, 0));
assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
- PIPE_TEXTURE_USAGE_RENDER_TARGET, 0));
+ PIPE_BIND_RENDER_TARGET, 0));
/* do the regions overlap? */
overlap = util_same_surface(src, dst) &&
@@ -323,7 +334,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
}
assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
- PIPE_TEXTURE_USAGE_RENDER_TARGET, 0));
+ PIPE_BIND_RENDER_TARGET, 0));
/* Create a temporary texture when src and dest alias or when src
* is anything other than a single-level 2d texture.
@@ -334,7 +345,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
src->texture->target != PIPE_TEXTURE_2D ||
src->texture->last_level != 0)
{
- struct pipe_texture texTemp;
+ struct pipe_resource texTemp;
+ struct pipe_resource *tex;
+ struct pipe_sampler_view sv_templ;
struct pipe_surface *texSurf;
const int srcLeft = MIN2(srcX0, srcX1);
const int srcTop = MIN2(srcY0, srcY1);
@@ -361,13 +374,22 @@ util_blit_pixels_writemask(struct blit_state *ctx,
texTemp.width0 = srcW;
texTemp.height0 = srcH;
texTemp.depth0 = 1;
+ texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
- tex = screen->texture_create(screen, &texTemp);
+ tex = screen->resource_create(screen, &texTemp);
if (!tex)
return;
+ u_sampler_view_default_template(&sv_templ, tex, tex->format);
+
+ sampler_view = ctx->pipe->create_sampler_view(ctx->pipe, tex, &sv_templ);
+ if (!sampler_view) {
+ pipe_resource_reference(&tex, NULL);
+ return;
+ }
+
texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BIND_BLIT_DESTINATION);
/* load temp texture */
if (pipe->surface_copy) {
@@ -389,33 +411,38 @@ util_blit_pixels_writemask(struct blit_state *ctx,
s1 = 1.0f;
t0 = 0.0f;
t1 = 1.0f;
+
+ pipe_resource_reference(&tex, NULL);
}
else {
- pipe_texture_reference(&tex, src->texture);
- s0 = srcX0 / (float)tex->width0;
- s1 = srcX1 / (float)tex->width0;
- t0 = srcY0 / (float)tex->height0;
- t1 = srcY1 / (float)tex->height0;
+ pipe_sampler_view_reference(&sampler_view, src_sampler_view);
+ s0 = srcX0 / (float)src->texture->width0;
+ s1 = srcX1 / (float)src->texture->width0;
+ t0 = srcY0 / (float)src->texture->height0;
+ t1 = srcY1 / (float)src->texture->height0;
}
+
/* save state (restored below) */
cso_save_blend(ctx->cso);
cso_save_depth_stencil_alpha(ctx->cso);
cso_save_rasterizer(ctx->cso);
cso_save_samplers(ctx->cso);
- cso_save_sampler_textures(ctx->cso);
+ cso_save_fragment_sampler_views(ctx->cso);
cso_save_viewport(ctx->cso);
cso_save_framebuffer(ctx->cso);
cso_save_fragment_shader(ctx->cso);
cso_save_vertex_shader(ctx->cso);
cso_save_clip(ctx->cso);
+ cso_save_vertex_elements(ctx->cso);
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
+ cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
ctx->sampler.min_img_filter = filter;
@@ -435,7 +462,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
cso_set_viewport(ctx->cso, &ctx->viewport);
/* texture */
- cso_set_sampler_textures(ctx->cso, 1, &tex);
+ cso_set_fragment_sampler_views(ctx->cso, 1, &sampler_view);
if (ctx->fs[writemask] == NULL)
ctx->fs[writemask] =
@@ -474,20 +501,22 @@ util_blit_pixels_writemask(struct blit_state *ctx,
cso_restore_depth_stencil_alpha(ctx->cso);
cso_restore_rasterizer(ctx->cso);
cso_restore_samplers(ctx->cso);
- cso_restore_sampler_textures(ctx->cso);
+ cso_restore_fragment_sampler_views(ctx->cso);
cso_restore_viewport(ctx->cso);
cso_restore_framebuffer(ctx->cso);
cso_restore_fragment_shader(ctx->cso);
cso_restore_vertex_shader(ctx->cso);
cso_restore_clip(ctx->cso);
+ cso_restore_vertex_elements(ctx->cso);
- pipe_texture_reference(&tex, NULL);
+ pipe_sampler_view_reference(&sampler_view, NULL);
}
void
util_blit_pixels(struct blit_state *ctx,
struct pipe_surface *src,
+ struct pipe_sampler_view *src_sampler_view,
int srcX0, int srcY0,
int srcX1, int srcY1,
struct pipe_surface *dst,
@@ -495,7 +524,7 @@ util_blit_pixels(struct blit_state *ctx,
int dstX1, int dstY1,
float z, uint filter )
{
- util_blit_pixels_writemask( ctx, src,
+ util_blit_pixels_writemask( ctx, src, src_sampler_view,
srcX0, srcY0,
srcX1, srcY1,
dst,
@@ -511,7 +540,7 @@ util_blit_pixels(struct blit_state *ctx,
*/
void util_blit_flush( struct blit_state *ctx )
{
- pipe_buffer_reference(&ctx->vbuf, NULL);
+ pipe_resource_reference(&ctx->vbuf, NULL);
ctx->vbuf_slot = 0;
}
@@ -526,21 +555,23 @@ void util_blit_flush( struct blit_state *ctx )
*/
void
util_blit_pixels_tex(struct blit_state *ctx,
- struct pipe_texture *tex,
- int srcX0, int srcY0,
- int srcX1, int srcY1,
- struct pipe_surface *dst,
- int dstX0, int dstY0,
- int dstX1, int dstY1,
- float z, uint filter)
+ struct pipe_sampler_view *src_sampler_view,
+ int srcX0, int srcY0,
+ int srcX1, int srcY1,
+ struct pipe_surface *dst,
+ int dstX0, int dstY0,
+ int dstX1, int dstY1,
+ float z, uint filter)
{
struct pipe_framebuffer_state fb;
float s0, t0, s1, t1;
unsigned offset;
+ struct pipe_resource *tex = src_sampler_view->texture;
assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
filter == PIPE_TEX_MIPFILTER_LINEAR);
+ assert(tex);
assert(tex->width0 != 0);
assert(tex->height0 != 0);
@@ -551,7 +582,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
PIPE_TEXTURE_2D,
- PIPE_TEXTURE_USAGE_RENDER_TARGET,
+ PIPE_BIND_RENDER_TARGET,
0));
/* save state (restored below) */
@@ -559,17 +590,19 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_save_depth_stencil_alpha(ctx->cso);
cso_save_rasterizer(ctx->cso);
cso_save_samplers(ctx->cso);
- cso_save_sampler_textures(ctx->cso);
+ cso_save_fragment_sampler_views(ctx->cso);
cso_save_framebuffer(ctx->cso);
cso_save_fragment_shader(ctx->cso);
cso_save_vertex_shader(ctx->cso);
cso_save_clip(ctx->cso);
+ cso_save_vertex_elements(ctx->cso);
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
+ cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
ctx->sampler.min_img_filter = filter;
@@ -589,7 +622,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_set_viewport(ctx->cso, &ctx->viewport);
/* texture */
- cso_set_sampler_textures(ctx->cso, 1, &tex);
+ cso_set_fragment_sampler_views(ctx->cso, 1, &src_sampler_view);
/* shaders */
cso_set_fragment_shader_handle(ctx->cso, ctx->fs[TGSI_WRITEMASK_XYZW]);
@@ -623,9 +656,10 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_restore_depth_stencil_alpha(ctx->cso);
cso_restore_rasterizer(ctx->cso);
cso_restore_samplers(ctx->cso);
- cso_restore_sampler_textures(ctx->cso);
+ cso_restore_fragment_sampler_views(ctx->cso);
cso_restore_framebuffer(ctx->cso);
cso_restore_fragment_shader(ctx->cso);
cso_restore_vertex_shader(ctx->cso);
cso_restore_clip(ctx->cso);
+ cso_restore_vertex_elements(ctx->cso);
}
diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h
index a102021529e..464ff9aaced 100644
--- a/src/gallium/auxiliary/util/u_blit.h
+++ b/src/gallium/auxiliary/util/u_blit.h
@@ -37,7 +37,7 @@ extern "C" {
struct pipe_context;
struct pipe_surface;
-struct pipe_texture;
+struct pipe_resource;
struct cso_context;
@@ -53,6 +53,7 @@ util_destroy_blit(struct blit_state *ctx);
extern void
util_blit_pixels(struct blit_state *ctx,
struct pipe_surface *src,
+ struct pipe_sampler_view *src_sampler_view,
int srcX0, int srcY0,
int srcX1, int srcY1,
struct pipe_surface *dst,
@@ -63,6 +64,7 @@ util_blit_pixels(struct blit_state *ctx,
void
util_blit_pixels_writemask(struct blit_state *ctx,
struct pipe_surface *src,
+ struct pipe_sampler_view *src_sampler_view,
int srcX0, int srcY0,
int srcX1, int srcY1,
struct pipe_surface *dst,
@@ -73,7 +75,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
extern void
util_blit_pixels_tex(struct blit_state *ctx,
- struct pipe_texture *tex,
+ struct pipe_sampler_view *src_sampler_view,
int srcX0, int srcY0,
int srcX1, int srcY1,
struct pipe_surface *dst,
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 0ba09d33bfc..956aedc8a15 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -45,6 +45,7 @@
#include "util/u_draw_quad.h"
#include "util/u_pack_color.h"
#include "util/u_rect.h"
+#include "util/u_sampler.h"
#include "util/u_simple_shaders.h"
#include "util/u_texture.h"
@@ -55,7 +56,7 @@ struct blitter_context_priv
struct blitter_context blitter;
struct pipe_context *pipe; /**< pipe context */
- struct pipe_buffer *vbuf; /**< quad */
+ struct pipe_resource *vbuf; /**< quad */
float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */
@@ -88,12 +89,16 @@ struct blitter_context_priv
void *dsa_write_depth_keep_stencil;
void *dsa_keep_depth_stencil;
+ void *velem_state;
+
/* Sampler state for clamping to a miplevel. */
void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
/* Rasterizer state. */
void *rs_state;
+ struct pipe_sampler_view *sampler_view;
+
/* Viewport state. */
struct pipe_viewport_state viewport;
@@ -104,10 +109,11 @@ struct blitter_context_priv
struct blitter_context *util_blitter_create(struct pipe_context *pipe)
{
struct blitter_context_priv *ctx;
- struct pipe_blend_state blend = { 0 };
- struct pipe_depth_stencil_alpha_state dsa = { { 0 } };
- struct pipe_rasterizer_state rs_state = { 0 };
+ struct pipe_blend_state blend;
+ struct pipe_depth_stencil_alpha_state dsa;
+ struct pipe_rasterizer_state rs_state;
struct pipe_sampler_state *sampler_state;
+ struct pipe_vertex_element velem[2];
unsigned i;
ctx = CALLOC_STRUCT(blitter_context_priv);
@@ -122,17 +128,21 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
ctx->blitter.saved_rs_state = INVALID_PTR;
ctx->blitter.saved_fs = INVALID_PTR;
ctx->blitter.saved_vs = INVALID_PTR;
+ ctx->blitter.saved_velem_state = INVALID_PTR;
ctx->blitter.saved_fb_state.nr_cbufs = ~0;
- ctx->blitter.saved_num_textures = ~0;
+ ctx->blitter.saved_num_sampler_views = ~0;
ctx->blitter.saved_num_sampler_states = ~0;
+ ctx->blitter.saved_num_vertex_buffers = ~0;
/* blend state objects */
+ memset(&blend, 0, sizeof(blend));
ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend);
blend.rt[0].colormask = PIPE_MASK_RGBA;
ctx->blend_write_color = pipe->create_blend_state(pipe, &blend);
/* depth stencil alpha state objects */
+ memset(&dsa, 0, sizeof(dsa));
ctx->dsa_keep_depth_stencil =
pipe->create_depth_stencil_alpha_state(pipe, &dsa);
@@ -159,6 +169,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
sampler_state->wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
sampler_state->wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
sampler_state->wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler_state->normalized_coords = TRUE;
/* The sampler state objects which sample from a specified mipmap level
* are created on-demand. */
@@ -170,6 +181,16 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
rs_state.flatshade = 1;
ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
+ /* vertex elements state */
+ memset(&velem[0], 0, sizeof(velem[0]) * 2);
+ for (i = 0; i < 2; i++) {
+ velem[i].src_offset = i * 4 * sizeof(float);
+ velem[i].instance_divisor = 0;
+ velem[i].vertex_buffer_index = 0;
+ velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
+ ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
+
/* fragment shaders are created on-demand */
/* vertex shaders */
@@ -196,8 +217,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
/* create the vertex buffer */
ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
- 32,
- PIPE_BUFFER_USAGE_VERTEX,
+ PIPE_BIND_VERTEX_BUFFER,
sizeof(ctx->vertices));
return &ctx->blitter;
@@ -219,6 +239,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_rasterizer_state(pipe, ctx->rs_state);
pipe->delete_vs_state(pipe, ctx->vs_col);
pipe->delete_vs_state(pipe, ctx->vs_tex);
+ pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
if (ctx->fs_texfetch_col[i])
@@ -235,7 +256,11 @@ void util_blitter_destroy(struct blitter_context *blitter)
if (ctx->sampler_state[i])
pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
- pipe_buffer_reference(&ctx->vbuf, NULL);
+ if (ctx->sampler_view) {
+ pipe_sampler_view_reference(&ctx->sampler_view, NULL);
+ }
+
+ pipe_resource_reference(&ctx->vbuf, NULL);
FREE(ctx);
}
@@ -246,7 +271,8 @@ static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
ctx->blitter.saved_dsa_state != INVALID_PTR &&
ctx->blitter.saved_rs_state != INVALID_PTR &&
ctx->blitter.saved_fs != INVALID_PTR &&
- ctx->blitter.saved_vs != INVALID_PTR);
+ ctx->blitter.saved_vs != INVALID_PTR &&
+ ctx->blitter.saved_velem_state != INVALID_PTR);
}
static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
@@ -259,12 +285,14 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
+ pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state);
ctx->blitter.saved_blend_state = INVALID_PTR;
ctx->blitter.saved_dsa_state = INVALID_PTR;
ctx->blitter.saved_rs_state = INVALID_PTR;
ctx->blitter.saved_fs = INVALID_PTR;
ctx->blitter.saved_vs = INVALID_PTR;
+ ctx->blitter.saved_velem_state = INVALID_PTR;
pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref);
@@ -285,11 +313,18 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
ctx->blitter.saved_num_sampler_states = ~0;
}
- if (ctx->blitter.saved_num_textures != ~0) {
- pipe->set_fragment_sampler_textures(pipe,
- ctx->blitter.saved_num_textures,
- ctx->blitter.saved_textures);
- ctx->blitter.saved_num_textures = ~0;
+ if (ctx->blitter.saved_num_sampler_views != ~0) {
+ pipe->set_fragment_sampler_views(pipe,
+ ctx->blitter.saved_num_sampler_views,
+ ctx->blitter.saved_sampler_views);
+ ctx->blitter.saved_num_sampler_views = ~0;
+ }
+
+ if (ctx->blitter.saved_num_vertex_buffers != ~0) {
+ pipe->set_vertex_buffers(pipe,
+ ctx->blitter.saved_num_vertex_buffers,
+ ctx->blitter.saved_vertex_buffers);
+ ctx->blitter.saved_num_vertex_buffers = ~0;
}
}
@@ -424,7 +459,7 @@ static void blitter_draw_quad(struct blitter_context_priv *ctx)
struct pipe_context *pipe = ctx->pipe;
/* write vertices and draw them */
- pipe_buffer_write(pipe->screen, ctx->vbuf,
+ pipe_buffer_write(pipe, ctx->vbuf,
0, sizeof(ctx->vertices), ctx->vertices);
util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN,
@@ -569,6 +604,7 @@ void util_blitter_clear(struct blitter_context *blitter,
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+ pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
pipe->bind_vs_state(pipe, ctx->vs_col);
@@ -600,9 +636,10 @@ static void util_blitter_do_copy(struct blitter_context *blitter,
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->pipe;
struct pipe_framebuffer_state fb_state;
+ struct pipe_sampler_view viewTempl, *view;
assert(blitter->saved_fb_state.nr_cbufs != ~0);
- assert(blitter->saved_num_textures != ~0);
+ assert(blitter->saved_num_sampler_views != ~0);
assert(blitter->saved_num_sampler_states != ~0);
assert(src->texture->target < PIPE_MAX_TEXTURE_TYPES);
@@ -630,11 +667,24 @@ static void util_blitter_do_copy(struct blitter_context *blitter,
fb_state.zsbuf = 0;
}
+ u_sampler_view_default_template(&viewTempl,
+ src->texture,
+ src->texture->format);
+ view = pipe->create_sampler_view(pipe,
+ src->texture,
+ &viewTempl);
+
+ if (ctx->sampler_view) {
+ pipe_sampler_view_reference(&ctx->sampler_view, NULL);
+ }
+ ctx->sampler_view = view;
+
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_vs_state(pipe, ctx->vs_tex);
pipe->bind_fragment_sampler_states(pipe, 1,
blitter_get_sampler_state(ctx, src->level));
- pipe->set_fragment_sampler_textures(pipe, 1, &src->texture);
+ pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
+ pipe->set_fragment_sampler_views(pipe, 1, &view);
pipe->set_framebuffer_state(pipe, &fb_state);
/* set texture coordinates */
@@ -672,8 +722,8 @@ static void util_blitter_overlap_copy(struct blitter_context *blitter,
struct pipe_context *pipe = ctx->pipe;
struct pipe_screen *screen = pipe->screen;
- struct pipe_texture texTemp;
- struct pipe_texture *texture;
+ struct pipe_resource texTemp;
+ struct pipe_resource *texture;
struct pipe_surface *tex_surf;
/* check whether the states are properly saved */
@@ -687,13 +737,13 @@ static void util_blitter_overlap_copy(struct blitter_context *blitter,
texTemp.height0 = height;
texTemp.depth0 = 1;
- texture = screen->texture_create(screen, &texTemp);
+ texture = screen->resource_create(screen, &texTemp);
if (!texture)
return;
tex_surf = screen->get_tex_surface(screen, texture, 0, 0, 0,
- PIPE_BUFFER_USAGE_GPU_READ |
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BIND_BLIT_SOURCE |
+ PIPE_BIND_BLIT_DESTINATION);
/* blit from the src to the temp */
util_blitter_do_copy(blitter, tex_surf, 0, 0,
@@ -705,7 +755,7 @@ static void util_blitter_overlap_copy(struct blitter_context *blitter,
width, height,
FALSE);
pipe_surface_reference(&tex_surf, NULL);
- pipe_texture_reference(&texture, NULL);
+ pipe_resource_reference(&texture, NULL);
blitter_restore_CSOs(ctx);
}
@@ -739,8 +789,8 @@ void util_blitter_copy(struct blitter_context *blitter,
is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0;
- dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL :
- PIPE_TEXTURE_USAGE_RENDER_TARGET;
+ dst_tex_usage = is_depth || is_stencil ? PIPE_BIND_DEPTH_STENCIL :
+ PIPE_BIND_RENDER_TARGET;
/* check if we can sample from and render to the surfaces */
/* (assuming copying a stencil buffer is not possible) */
@@ -748,7 +798,7 @@ void util_blitter_copy(struct blitter_context *blitter,
!screen->is_format_supported(screen, dst->format, dst->texture->target,
dst_tex_usage, 0) ||
!screen->is_format_supported(screen, src->format, src->texture->target,
- PIPE_TEXTURE_USAGE_SAMPLER, 0)) {
+ PIPE_BIND_SAMPLER_VIEW, 0)) {
util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy,
width, height);
return;
@@ -785,7 +835,7 @@ void util_blitter_fill(struct blitter_context *blitter,
/* check if we can render to the surface */
if (util_format_is_depth_or_stencil(dst->format) || /* unlikely, but you never know */
!screen->is_format_supported(screen, dst->format, dst->texture->target,
- PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
+ PIPE_BIND_RENDER_TARGET, 0)) {
util_surface_fill(pipe, dst, dstx, dsty, width, height, value);
return;
}
@@ -807,6 +857,7 @@ void util_blitter_fill(struct blitter_context *blitter,
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
pipe->bind_vs_state(pipe, ctx->vs_col);
+ pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
fb_state.width = dst->width;
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 92008fce992..f6e3ce4874e 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -43,6 +43,7 @@ struct blitter_context
/* Private members, really. */
void *saved_blend_state; /**< blend state */
void *saved_dsa_state; /**< depth stencil alpha state */
+ void *saved_velem_state; /**< vertex elements state */
void *saved_rs_state; /**< rasterizer state */
void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */
@@ -52,10 +53,13 @@ struct blitter_context
struct pipe_clip_state saved_clip;
int saved_num_sampler_states;
- void *saved_sampler_states[32];
+ void *saved_sampler_states[PIPE_MAX_SAMPLERS];
- int saved_num_textures;
- struct pipe_texture *saved_textures[32]; /* is 32 enough? */
+ int saved_num_sampler_views;
+ struct pipe_sampler_view *saved_sampler_views[PIPE_MAX_SAMPLERS];
+
+ int saved_num_vertex_buffers;
+ struct pipe_vertex_buffer saved_vertex_buffers[PIPE_MAX_ATTRIBS];
};
/**
@@ -173,6 +177,13 @@ void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
}
static INLINE
+void util_blitter_save_vertex_elements(struct blitter_context *blitter,
+ void *state)
+{
+ blitter->saved_velem_state = state;
+}
+
+static INLINE
void util_blitter_save_stencil_ref(struct blitter_context *blitter,
const struct pipe_stencil_ref *state)
{
@@ -234,17 +245,30 @@ void util_blitter_save_fragment_sampler_states(
num_sampler_states * sizeof(void *));
}
-static INLINE
-void util_blitter_save_fragment_sampler_textures(
- struct blitter_context *blitter,
- int num_textures,
- struct pipe_texture **textures)
+static INLINE void
+util_blitter_save_fragment_sampler_views(struct blitter_context *blitter,
+ int num_views,
+ struct pipe_sampler_view **views)
+{
+ assert(num_views <= Elements(blitter->saved_sampler_views));
+
+ blitter->saved_num_sampler_views = num_views;
+ memcpy(blitter->saved_sampler_views,
+ views,
+ num_views * sizeof(struct pipe_sampler_view *));
+}
+
+static INLINE void
+util_blitter_save_vertex_buffers(struct blitter_context *blitter,
+ int num_vertex_buffers,
+ struct pipe_vertex_buffer *vertex_buffers)
{
- assert(num_textures <= Elements(blitter->saved_textures));
+ assert(num_vertex_buffers <= Elements(blitter->saved_vertex_buffers));
- blitter->saved_num_textures = num_textures;
- memcpy(blitter->saved_textures, textures,
- num_textures * sizeof(struct pipe_texture *));
+ blitter->saved_num_vertex_buffers = num_vertex_buffers;
+ memcpy(blitter->saved_vertex_buffers,
+ vertex_buffers,
+ num_vertex_buffers * sizeof(struct pipe_vertex_buffer));
}
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h
new file mode 100644
index 00000000000..919967b55a7
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_box.h
@@ -0,0 +1,73 @@
+#ifndef UTIL_BOX_INLINES_H
+#define UTIL_BOX_INLINES_H
+
+#include "pipe/p_state.h"
+
+static INLINE
+void u_box_1d( unsigned x,
+ unsigned w,
+ struct pipe_box *box )
+{
+ box->x = x;
+ box->y = 0;
+ box->z = 0;
+ box->width = w;
+ box->height = 1;
+ box->depth = 1;
+}
+
+static INLINE
+void u_box_2d( unsigned x,
+ unsigned y,
+ unsigned w,
+ unsigned h,
+ struct pipe_box *box )
+{
+ box->x = x;
+ box->y = y;
+ box->z = 0;
+ box->width = w;
+ box->height = h;
+ box->depth = 1;
+}
+
+static INLINE
+void u_box_origin_2d( unsigned w,
+ unsigned h,
+ struct pipe_box *box )
+{
+ box->x = 0;
+ box->y = 0;
+ box->z = 0;
+ box->width = w;
+ box->height = h;
+ box->depth = 1;
+}
+
+static INLINE
+void u_box_2d_zslice( unsigned x,
+ unsigned y,
+ unsigned z,
+ unsigned w,
+ unsigned h,
+ struct pipe_box *box )
+{
+ box->x = x;
+ box->y = y;
+ box->z = z;
+ box->width = w;
+ box->height = h;
+ box->depth = 1;
+}
+
+static INLINE
+struct pipe_subresource u_subresource( unsigned face,
+ unsigned level )
+{
+ struct pipe_subresource subresource;
+ subresource.face = face;
+ subresource.level = level;
+ return subresource;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c
index 47c16b1c927..15f4a8831f2 100644
--- a/src/gallium/auxiliary/util/u_cache.c
+++ b/src/gallium/auxiliary/util/u_cache.c
@@ -202,7 +202,7 @@ util_cache_destroy(struct util_cache *cache)
double stddev = sqrt(mean);
unsigned i;
for(i = 0; i < cache->size; ++i) {
- double z = fabs(cache->count - mean)/stddev;
+ double z = fabs(cache->entries[i].count - mean)/stddev;
/* This assert should not fail 99.9999998027% of the times, unless
* the hash function is a poor one */
assert(z <= 6.0);
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 94be682c4b1..0de38e791d6 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -74,6 +74,24 @@ void debug_print_blob( const char *name,
#endif
+static boolean
+debug_get_option_should_print(void)
+{
+ static boolean first = TRUE;
+ static boolean value = FALSE;
+
+ if (!first)
+ return value;
+
+ /* Oh hey this will call into this function,
+ * but its cool since we set first to false
+ */
+ first = FALSE;
+ value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE);
+ /* XXX should we print this option? Currently it wont */
+ return value;
+}
+
const char *
debug_get_option(const char *name, const char *dfault)
{
@@ -82,8 +100,9 @@ debug_get_option(const char *name, const char *dfault)
result = os_get_option(name);
if(!result)
result = dfault;
-
- debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)");
+
+ if (debug_get_option_should_print())
+ debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)");
return result;
}
@@ -109,7 +128,8 @@ debug_get_bool_option(const char *name, boolean dfault)
else
result = TRUE;
- debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE");
+ if (debug_get_option_should_print())
+ debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE");
return result;
}
@@ -142,8 +162,9 @@ debug_get_num_option(const char *name, long dfault)
}
result *= sign;
}
-
- debug_printf("%s: %s = %li\n", __FUNCTION__, name, result);
+
+ if (debug_get_option_should_print())
+ debug_printf("%s: %s = %li\n", __FUNCTION__, name, result);
return result;
}
@@ -176,11 +197,12 @@ debug_get_flags_option(const char *name,
}
}
- if (str) {
- debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
- }
- else {
- debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+ if (debug_get_option_should_print()) {
+ if (str) {
+ debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
+ } else {
+ debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+ }
}
return result;
@@ -421,45 +443,51 @@ void debug_dump_image(const char *prefix,
#endif
}
-void debug_dump_surface(const char *prefix,
+void debug_dump_surface(struct pipe_context *pipe,
+ const char *prefix,
struct pipe_surface *surface)
{
- struct pipe_texture *texture;
- struct pipe_screen *screen;
+ struct pipe_resource *texture;
struct pipe_transfer *transfer;
void *data;
if (!surface)
return;
+ /* XXX: this doesn't necessarily work, as the driver may be using
+ * temporary storage for the surface which hasn't been propagated
+ * back into the texture. Need to nail down the semantics of views
+ * and transfers a bit better before we can say if extra work needs
+ * to be done here:
+ */
texture = surface->texture;
- screen = texture->screen;
- transfer = screen->get_tex_transfer(screen, texture, surface->face,
- surface->level, surface->zslice,
- PIPE_TRANSFER_READ, 0, 0, surface->width,
- surface->height);
+ transfer = pipe_get_transfer(pipe, texture, surface->face,
+ surface->level, surface->zslice,
+ PIPE_TRANSFER_READ, 0, 0, surface->width,
+ surface->height);
- data = screen->transfer_map(screen, transfer);
+ data = pipe->transfer_map(pipe, transfer);
if(!data)
goto error;
debug_dump_image(prefix,
texture->format,
util_format_get_blocksize(texture->format),
- util_format_get_nblocksx(texture->format, transfer->width),
- util_format_get_nblocksy(texture->format, transfer->height),
+ util_format_get_nblocksx(texture->format, surface->width),
+ util_format_get_nblocksy(texture->format, surface->height),
transfer->stride,
data);
- screen->transfer_unmap(screen, transfer);
+ pipe->transfer_unmap(pipe, transfer);
error:
- screen->tex_transfer_destroy(transfer);
+ pipe->transfer_destroy(pipe, transfer);
}
-void debug_dump_texture(const char *prefix,
- struct pipe_texture *texture)
+void debug_dump_texture(struct pipe_context *pipe,
+ const char *prefix,
+ struct pipe_resource *texture)
{
struct pipe_surface *surface;
struct pipe_screen *screen;
@@ -471,9 +499,9 @@ void debug_dump_texture(const char *prefix,
/* XXX for now, just dump image for face=0, level=0 */
surface = screen->get_tex_surface(screen, texture, 0, 0, 0,
- PIPE_TEXTURE_USAGE_SAMPLER);
+ PIPE_BIND_SAMPLER_VIEW);
if (surface) {
- debug_dump_surface(prefix, surface);
+ debug_dump_surface(pipe, prefix, surface);
screen->tex_surface_destroy(surface);
}
}
@@ -511,27 +539,28 @@ struct bmp_rgb_quad {
};
void
-debug_dump_surface_bmp(const char *filename,
+debug_dump_surface_bmp(struct pipe_context *pipe,
+ const char *filename,
struct pipe_surface *surface)
{
#ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT
struct pipe_transfer *transfer;
- struct pipe_texture *texture = surface->texture;
- struct pipe_screen *screen = texture->screen;
+ struct pipe_resource *texture = surface->texture;
- transfer = screen->get_tex_transfer(screen, texture, surface->face,
- surface->level, surface->zslice,
- PIPE_TRANSFER_READ, 0, 0, surface->width,
- surface->height);
+ transfer = pipe_get_transfer(pipe, texture, surface->face,
+ surface->level, surface->zslice,
+ PIPE_TRANSFER_READ, 0, 0, surface->width,
+ surface->height);
- debug_dump_transfer_bmp(filename, transfer);
+ debug_dump_transfer_bmp(pipe, filename, transfer);
- screen->tex_transfer_destroy(transfer);
+ pipe->transfer_destroy(pipe, transfer);
#endif
}
void
-debug_dump_transfer_bmp(const char *filename,
+debug_dump_transfer_bmp(struct pipe_context *pipe,
+ const char *filename,
struct pipe_transfer *transfer)
{
#ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT
@@ -540,17 +569,20 @@ debug_dump_transfer_bmp(const char *filename,
if (!transfer)
goto error1;
- rgba = MALLOC(transfer->width*transfer->height*4*sizeof(float));
+ rgba = MALLOC(transfer->box.width *
+ transfer->box.height *
+ transfer->box.depth *
+ 4*sizeof(float));
if(!rgba)
goto error1;
- pipe_get_tile_rgba(transfer, 0, 0,
- transfer->width, transfer->height,
+ pipe_get_tile_rgba(pipe, transfer, 0, 0,
+ transfer->box.width, transfer->box.height,
rgba);
debug_dump_float_rgba_bmp(filename,
- transfer->width, transfer->height,
- rgba, transfer->width);
+ transfer->box.width, transfer->box.height,
+ rgba, transfer->box.width);
FREE(rgba);
error1:
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index 0f4768f3444..e8ff2773e69 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -303,6 +303,45 @@ debug_get_flags_option(const char *name,
const struct debug_named_value *flags,
unsigned long dfault);
+#define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \
+static boolean \
+debug_get_option_ ## sufix (void) \
+{ \
+ static boolean first = TRUE; \
+ static boolean value; \
+ if (first) { \
+ first = FALSE; \
+ value = debug_get_bool_option(name, dfault); \
+ } \
+ return value; \
+}
+
+#define DEBUG_GET_ONCE_NUM_OPTION(sufix, name, dfault) \
+static long \
+debug_get_option_ ## sufix (void) \
+{ \
+ static boolean first = TRUE; \
+ static long value; \
+ if (first) { \
+ first = FALSE; \
+ value = debug_get_num_option(name, dfault); \
+ } \
+ return value; \
+}
+
+#define DEBUG_GET_ONCE_FLAGS_OPTION(sufix, name, flags, dfault) \
+static unsigned long \
+debug_get_option_ ## sufix (void) \
+{ \
+ static boolean first = TRUE; \
+ static unsigned long value; \
+ if (first) { \
+ first = FALSE; \
+ value = debug_get_flags_option(name, flags, dfault); \
+ } \
+ return value; \
+}
+
unsigned long
debug_memory_begin(void);
@@ -312,30 +351,35 @@ debug_memory_end(unsigned long beginning);
#ifdef DEBUG
+struct pipe_context;
struct pipe_surface;
struct pipe_transfer;
-struct pipe_texture;
+struct pipe_resource;
void debug_dump_image(const char *prefix,
unsigned format, unsigned cpp,
unsigned width, unsigned height,
unsigned stride,
const void *data);
-void debug_dump_surface(const char *prefix,
+void debug_dump_surface(struct pipe_context *pipe,
+ const char *prefix,
struct pipe_surface *surface);
-void debug_dump_texture(const char *prefix,
- struct pipe_texture *texture);
-void debug_dump_surface_bmp(const char *filename,
+void debug_dump_texture(struct pipe_context *pipe,
+ const char *prefix,
+ struct pipe_resource *texture);
+void debug_dump_surface_bmp(struct pipe_context *pipe,
+ const char *filename,
struct pipe_surface *surface);
-void debug_dump_transfer_bmp(const char *filename,
+void debug_dump_transfer_bmp(struct pipe_context *pipe,
+ const char *filename,
struct pipe_transfer *transfer);
void debug_dump_float_rgba_bmp(const char *filename,
unsigned width, unsigned height,
float *rgba, unsigned stride);
#else
#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
-#define debug_dump_surface(prefix, surface) ((void)0)
-#define debug_dump_surface_bmp(filename, surface) ((void)0)
+#define debug_dump_surface(pipe, prefix, surface) ((void)0)
+#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
#define debug_dump_transfer_bmp(filename, transfer) ((void)0)
#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
#endif
diff --git a/src/gallium/auxiliary/util/u_dirty_surfaces.h b/src/gallium/auxiliary/util/u_dirty_surfaces.h
new file mode 100644
index 00000000000..99f260bf967
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_dirty_surfaces.h
@@ -0,0 +1,88 @@
+#ifndef U_DIRTY_SURFACES_H_
+#define U_DIRTY_SURFACES_H_
+
+#include "util/u_double_list.h"
+#include "util/u_math.h"
+
+typedef void (*util_dirty_surface_flush_t) (struct pipe_context *, struct pipe_surface *);
+
+struct util_dirty_surfaces
+{
+ struct list_head dirty_list;
+};
+
+struct util_dirty_surface
+{
+ struct pipe_surface base;
+ struct list_head dirty_list;
+};
+
+static INLINE void
+util_dirty_surfaces_init(struct util_dirty_surfaces *ds)
+{
+ LIST_INITHEAD(&ds->dirty_list);
+}
+
+static INLINE void
+util_dirty_surfaces_use_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, util_dirty_surface_flush_t flush)
+{
+ struct list_head *p, *next;
+ for(p = dss->dirty_list.next; p != &dss->dirty_list; p = next)
+ {
+ struct util_dirty_surface *ds = LIST_ENTRY(struct util_dirty_surface, p, dirty_list);
+ next = p->next;
+
+ flush(pipe, &ds->base);
+ }
+}
+
+static INLINE void
+util_dirty_surfaces_use_levels_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, unsigned first, unsigned last, util_dirty_surface_flush_t flush)
+{
+ struct list_head *p, *next;
+ if(first > last)
+ return;
+ for(p = dss->dirty_list.next; p != &dss->dirty_list; p = next)
+ {
+ struct util_dirty_surface *ds = LIST_ENTRY(struct util_dirty_surface, p, dirty_list);
+ next = p->next;
+
+ if(ds->base.level >= first && ds->base.level <= last)
+ flush(pipe, &ds->base);
+ }
+}
+
+static INLINE void
+util_dirty_surfaces_use_for_sampling_with(struct pipe_context *pipe, struct util_dirty_surfaces *dss, struct pipe_sampler_view *psv, struct pipe_sampler_state *pss, util_dirty_surface_flush_t flush)
+{
+ if(!LIST_IS_EMPTY(&dss->dirty_list))
+ util_dirty_surfaces_use_levels_for_sampling(pipe, dss, (unsigned)pss->min_lod + psv->first_level, MIN2((unsigned)ceilf(pss->max_lod) + psv->first_level, psv->last_level), flush);
+}
+
+static INLINE void
+util_dirty_surface_init(struct util_dirty_surface *ds)
+{
+ LIST_INITHEAD(&ds->dirty_list);
+}
+
+static INLINE boolean
+util_dirty_surface_is_dirty(struct util_dirty_surface *ds)
+{
+ return !LIST_IS_EMPTY(&ds->dirty_list);
+}
+
+static INLINE void
+util_dirty_surface_set_dirty(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
+{
+ if(LIST_IS_EMPTY(&ds->dirty_list))
+ LIST_ADDTAIL(&ds->dirty_list, &dss->dirty_list);
+}
+
+static INLINE void
+util_dirty_surface_set_clean(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
+{
+ if(!LIST_IS_EMPTY(&ds->dirty_list))
+ LIST_DELINIT(&ds->dirty_list);
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c
index 37ed789f955..220860ebf4b 100644
--- a/src/gallium/auxiliary/util/u_dl.c
+++ b/src/gallium/auxiliary/util/u_dl.c
@@ -78,3 +78,16 @@ util_dl_close(struct util_dl_library *library)
(void)library;
#endif
}
+
+
+const char *
+util_dl_error(void)
+{
+#if defined(PIPE_OS_UNIX)
+ return dlerror();
+#elif defined(PIPE_OS_WINDOWS)
+ return "unknown error";
+#else
+ return "unknown error";
+#endif
+}
diff --git a/src/gallium/auxiliary/util/u_dl.h b/src/gallium/auxiliary/util/u_dl.h
index 85296c58af6..2853b447c61 100644
--- a/src/gallium/auxiliary/util/u_dl.h
+++ b/src/gallium/auxiliary/util/u_dl.h
@@ -70,4 +70,11 @@ void
util_dl_close(struct util_dl_library *library);
+/**
+ * Return most recent error message.
+ */
+const char *
+util_dl_error(void);
+
+
#endif /* U_DL_H_ */
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index 14506e84519..b37b48b5aef 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -30,6 +30,7 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
#include "util/u_draw_quad.h"
+#include "util/u_memory.h"
/**
@@ -38,15 +39,13 @@
*/
void
util_draw_vertex_buffer(struct pipe_context *pipe,
- struct pipe_buffer *vbuf,
+ struct pipe_resource *vbuf,
uint offset,
uint prim_type,
uint num_verts,
uint num_attribs)
{
struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
- uint i;
assert(num_attribs <= PIPE_MAX_ATTRIBS);
@@ -58,15 +57,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
vbuffer.max_index = num_verts - 1;
pipe->set_vertex_buffers(pipe, 1, &vbuffer);
- /* tell pipe about the vertex attributes */
- for (i = 0; i < num_attribs; i++) {
- velements[i].src_offset = i * 4 * sizeof(float);
- velements[i].instance_divisor = 0;
- velements[i].vertex_buffer_index = 0;
- velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- velements[i].nr_components = 4;
- }
- pipe->set_vertex_elements(pipe, num_attribs, velements);
+ /* note: vertex elements already set by caller */
/* draw */
pipe->draw_arrays(pipe, prim_type, 0, num_verts);
@@ -76,60 +67,63 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
/**
* Draw screen-aligned textured quad.
- * Note: this function allocs/destroys a vertex buffer and isn't especially
- * efficient.
+ * Note: this isn't especially efficient.
*/
void
util_draw_texquad(struct pipe_context *pipe,
float x0, float y0, float x1, float y1, float z)
{
- struct pipe_buffer *vbuf;
- uint numAttribs = 2, vertexBytes, i, j;
-
- vertexBytes = 4 * (4 * numAttribs * sizeof(float));
-
- /* XXX create one-time */
- vbuf = pipe_buffer_create(pipe->screen, 32,
- PIPE_BUFFER_USAGE_VERTEX, vertexBytes);
- if (vbuf) {
- float *v = (float *) pipe_buffer_map(pipe->screen, vbuf,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- if (v) {
- /*
- * Load vertex buffer
- */
- for (i = j = 0; i < 4; i++) {
- v[j + 2] = z; /* z */
- v[j + 3] = 1.0; /* w */
- v[j + 6] = 0.0; /* r */
- v[j + 7] = 1.0; /* q */
- j += 8;
- }
-
- v[0] = x0;
- v[1] = y0;
- v[4] = 0.0; /*s*/
- v[5] = 0.0; /*t*/
-
- v[8] = x1;
- v[9] = y0;
- v[12] = 1.0;
- v[13] = 0.0;
-
- v[16] = x1;
- v[17] = y1;
- v[20] = 1.0;
- v[21] = 1.0;
-
- v[24] = x0;
- v[25] = y1;
- v[28] = 0.0;
- v[29] = 1.0;
-
- pipe_buffer_unmap(pipe->screen, vbuf);
- util_draw_vertex_buffer(pipe, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2);
- }
-
- pipe_buffer_reference(&vbuf, NULL);
+ uint numAttribs = 2, i, j;
+ uint vertexBytes = 4 * (4 * numAttribs * sizeof(float));
+ struct pipe_resource *vbuf = NULL;
+ uint *v = NULL;
+
+ v = MALLOC(vertexBytes);
+ if (v == NULL)
+ goto out;
+
+ /*
+ * Load vertex buffer
+ */
+ for (i = j = 0; i < 4; i++) {
+ v[j + 2] = z; /* z */
+ v[j + 3] = 1.0; /* w */
+ v[j + 6] = 0.0; /* r */
+ v[j + 7] = 1.0; /* q */
+ j += 8;
}
+
+ v[0] = x0;
+ v[1] = y0;
+ v[4] = 0.0; /*s*/
+ v[5] = 0.0; /*t*/
+
+ v[8] = x1;
+ v[9] = y0;
+ v[12] = 1.0;
+ v[13] = 0.0;
+
+ v[16] = x1;
+ v[17] = y1;
+ v[20] = 1.0;
+ v[21] = 1.0;
+
+ v[24] = x0;
+ v[25] = y1;
+ v[28] = 0.0;
+ v[29] = 1.0;
+
+ vbuf = pipe_user_buffer_create(pipe->screen, v, vertexBytes,
+ PIPE_BIND_VERTEX_BUFFER);
+ if (!vbuf)
+ goto out;
+
+ util_draw_vertex_buffer(pipe, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2);
+
+out:
+ if (vbuf)
+ pipe_resource_reference(&vbuf, NULL);
+
+ if (v)
+ FREE(v);
}
diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h
index 00d3f5b7158..42eb1844289 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.h
+++ b/src/gallium/auxiliary/util/u_draw_quad.h
@@ -33,11 +33,11 @@
extern "C" {
#endif
-struct pipe_buffer;
+struct pipe_resource;
extern void
util_draw_vertex_buffer(struct pipe_context *pipe,
- struct pipe_buffer *vbuf, uint offset,
+ struct pipe_resource *vbuf, uint offset,
uint num_attribs, uint num_verts, uint prim_type);
diff --git a/src/gallium/auxiliary/util/u_dump.h b/src/gallium/auxiliary/util/u_dump.h
index 379f18ef38b..bdc73ac47d2 100644
--- a/src/gallium/auxiliary/util/u_dump.h
+++ b/src/gallium/auxiliary/util/u_dump.h
@@ -92,7 +92,7 @@ util_dump_tex_filter(unsigned value, boolean shortened);
void
util_dump_template(struct os_stream *stream,
- const struct pipe_texture *templat);
+ const struct pipe_resource *templat);
void
util_dump_rasterizer_state(struct os_stream *stream,
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index ae7afd7311e..2ce643e90cd 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -255,14 +255,14 @@ util_dump_enum_func(struct os_stream *stream, unsigned value)
void
-util_dump_template(struct os_stream *stream, const struct pipe_texture *templat)
+util_dump_template(struct os_stream *stream, const struct pipe_resource *templat)
{
if(!templat) {
util_dump_null(stream);
return;
}
- util_dump_struct_begin(stream, "pipe_texture");
+ util_dump_struct_begin(stream, "pipe_resource");
util_dump_member(stream, int, templat, target);
util_dump_member(stream, format, templat, format);
@@ -280,7 +280,9 @@ util_dump_template(struct os_stream *stream, const struct pipe_texture *templat)
util_dump_member_end(stream);
util_dump_member(stream, uint, templat, last_level);
- util_dump_member(stream, uint, templat, tex_usage);
+ util_dump_member(stream, uint, templat, usage);
+ util_dump_member(stream, uint, templat, bind);
+ util_dump_member(stream, uint, templat, flags);
util_dump_struct_end(stream);
}
@@ -653,16 +655,13 @@ util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state)
util_dump_struct_begin(stream, "pipe_transfer");
- util_dump_member(stream, uint, state, width);
- util_dump_member(stream, uint, state, height);
+ util_dump_member(stream, ptr, state, resource);
+ /*util_dump_member(stream, uint, state, box);*/
util_dump_member(stream, uint, state, stride);
- util_dump_member(stream, uint, state, usage);
+ util_dump_member(stream, uint, state, slice_stride);
- util_dump_member(stream, ptr, state, texture);
- util_dump_member(stream, uint, state, face);
- util_dump_member(stream, uint, state, level);
- util_dump_member(stream, uint, state, zslice);
+ /*util_dump_member(stream, ptr, state, data);*/
util_dump_struct_end(stream);
}
@@ -700,7 +699,6 @@ util_dump_vertex_element(struct os_stream *stream, const struct pipe_vertex_elem
util_dump_member(stream, uint, state, src_offset);
util_dump_member(stream, uint, state, vertex_buffer_index);
- util_dump_member(stream, uint, state, nr_components);
util_dump_member(stream, format, state, src_format);
diff --git a/src/gallium/auxiliary/util/u_dynarray.h b/src/gallium/auxiliary/util/u_dynarray.h
new file mode 100644
index 00000000000..9d1c1713a7c
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_dynarray.h
@@ -0,0 +1,111 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DYNARRAY_H
+#define U_DYNARRAY_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_memory.h"
+
+/* A zero-initialized version of this is guaranteed to represent an
+ * empty array.
+ *
+ * Also, size <= capacity and data != 0 if and only if capacity != 0
+ * capacity will always be the allocation size of data
+ */
+struct util_dynarray
+{
+ void *data;
+ unsigned size;
+ unsigned capacity;
+};
+
+static INLINE void
+util_dynarray_init(struct util_dynarray *buf)
+{
+ memset(buf, 0, sizeof(*buf));
+}
+
+static INLINE void
+util_dynarray_fini(struct util_dynarray *buf)
+{
+ if(buf->data)
+ {
+ FREE(buf->data);
+ util_dynarray_init(buf);
+ }
+}
+
+/* use util_dynarray_trim to reduce the allocated storage */
+static INLINE void *
+util_dynarray_resize(struct util_dynarray *buf, unsigned newsize)
+{
+ char *p;
+ if(newsize > buf->capacity)
+ {
+ unsigned newcap = buf->capacity << 1;
+ if(newsize > newcap)
+ newcap = newsize;
+ buf->data = REALLOC(buf->data, buf->capacity, newcap);
+ buf->capacity = newcap;
+ }
+
+ p = (char *)buf->data + buf->size;
+ buf->size = newsize;
+ return p;
+}
+
+static INLINE void *
+util_dynarray_grow(struct util_dynarray *buf, int diff)
+{
+ return util_dynarray_resize(buf, buf->size + diff);
+}
+
+static INLINE void
+util_dynarray_trim(struct util_dynarray *buf)
+{
+ if (buf->size != buf->capacity) {
+ if (buf->size) {
+ buf->data = REALLOC(buf->data, buf->capacity, buf->size);
+ buf->capacity = buf->size;
+ }
+ else {
+ FREE(buf->data);
+ buf->data = 0;
+ buf->capacity = 0;
+ }
+ }
+}
+
+#define util_dynarray_append(buf, type, v) do {type __v = (v); memcpy(util_dynarray_grow((buf), sizeof(type)), &__v, sizeof(type));} while(0)
+#define util_dynarray_top_ptr(buf, type) (type*)((char*)(buf)->data + (buf)->size - sizeof(type))
+#define util_dynarray_top(buf, type) *util_dynarray_top_ptr(buf, type)
+#define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type)))
+#define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type)
+#define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type))
+
+#endif /* U_DYNARRAY_H */
+
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
new file mode 100644
index 00000000000..c50c807eb89
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -0,0 +1,286 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Pixel format accessor functions.
+ *
+ * @author Jose Fonseca <[email protected]>
+ */
+
+#include "u_math.h"
+#include "u_memory.h"
+#include "u_rect.h"
+#include "u_format.h"
+
+
+void
+util_format_read_4f(enum pipe_format format,
+ float *dst, unsigned dst_stride,
+ const void *src, unsigned src_stride,
+ unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ const struct util_format_description *format_desc;
+ const uint8_t *src_row;
+ float *dst_row;
+
+ format_desc = util_format_description(format);
+
+ assert(x % format_desc->block.width == 0);
+ assert(y % format_desc->block.height == 0);
+
+ src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8);
+ dst_row = dst;
+
+ format_desc->unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, w, h);
+}
+
+
+void
+util_format_write_4f(enum pipe_format format,
+ const float *src, unsigned src_stride,
+ void *dst, unsigned dst_stride,
+ unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ const struct util_format_description *format_desc;
+ uint8_t *dst_row;
+ const float *src_row;
+
+ format_desc = util_format_description(format);
+
+ assert(x % format_desc->block.width == 0);
+ assert(y % format_desc->block.height == 0);
+
+ dst_row = (uint8_t *)dst + y*dst_stride + x*(format_desc->block.bits/8);
+ src_row = src;
+
+ format_desc->pack_rgba_float(dst_row, dst_stride, src_row, src_stride, w, h);
+}
+
+
+void
+util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ const struct util_format_description *format_desc;
+ const uint8_t *src_row;
+ uint8_t *dst_row;
+
+ format_desc = util_format_description(format);
+
+ assert(x % format_desc->block.width == 0);
+ assert(y % format_desc->block.height == 0);
+
+ src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8);
+ dst_row = dst;
+
+ format_desc->unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, w, h);
+}
+
+
+void
+util_format_write_4ub(enum pipe_format format, const uint8_t *src, unsigned src_stride, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ const struct util_format_description *format_desc;
+ uint8_t *dst_row;
+ const uint8_t *src_row;
+
+ format_desc = util_format_description(format);
+
+ assert(x % format_desc->block.width == 0);
+ assert(y % format_desc->block.height == 0);
+
+ dst_row = (uint8_t *)dst + y*dst_stride + x*(format_desc->block.bits/8);
+ src_row = src;
+
+ format_desc->pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, w, h);
+}
+
+
+static INLINE boolean
+util_format_fits_8unorm(const struct util_format_description *format_desc)
+{
+ unsigned chan;
+
+ switch (format_desc->layout) {
+
+ case UTIL_FORMAT_LAYOUT_S3TC:
+ case UTIL_FORMAT_LAYOUT_RGTC:
+ /*
+ * These are straight forward.
+ */
+
+ return TRUE;
+
+ case UTIL_FORMAT_LAYOUT_PLAIN:
+ /*
+ * For these we can find a generic rule.
+ */
+
+ for (chan = 0; chan < format_desc->nr_channels; ++chan) {
+ switch (format_desc->channel[chan].type) {
+ case UTIL_FORMAT_TYPE_VOID:
+ break;
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if (!format_desc->channel[chan].normalized ||
+ format_desc->channel[chan].size > 8) {
+ return FALSE;
+ }
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ return TRUE;
+
+ default:
+ /*
+ * Handle all others on a case by case basis.
+ */
+
+ switch (format_desc->format) {
+ case PIPE_FORMAT_R1_UNORM:
+ case PIPE_FORMAT_UYVY:
+ case PIPE_FORMAT_YUYV:
+ case PIPE_FORMAT_R8G8_B8G8_UNORM:
+ case PIPE_FORMAT_G8R8_G8B8_UNORM:
+ return TRUE;
+
+ default:
+ return FALSE;
+ }
+ }
+}
+
+
+void
+util_format_translate(enum pipe_format dst_format,
+ void *dst, unsigned dst_stride,
+ unsigned dst_x, unsigned dst_y,
+ enum pipe_format src_format,
+ const void *src, unsigned src_stride,
+ unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height)
+{
+ const struct util_format_description *dst_format_desc;
+ const struct util_format_description *src_format_desc;
+ uint8_t *dst_row;
+ const uint8_t *src_row;
+ unsigned y_step;
+ unsigned dst_step;
+ unsigned src_step;
+
+ if (dst_format == src_format) {
+ /*
+ * Trivial case.
+ */
+
+ util_copy_rect(dst, dst_format, dst_stride, dst_x, dst_y,
+ width, height, src, (int)src_stride,
+ src_x, src_y);
+ return;
+ }
+
+ dst_format_desc = util_format_description(dst_format);
+ src_format_desc = util_format_description(src_format);
+
+ assert(dst_x % dst_format_desc->block.width == 0);
+ assert(dst_y % dst_format_desc->block.height == 0);
+ assert(src_x % src_format_desc->block.width == 0);
+ assert(src_y % src_format_desc->block.height == 0);
+
+ dst_row = (uint8_t *)dst + dst_y*dst_stride + dst_x*(dst_format_desc->block.bits/8);
+ src_row = (const uint8_t *)src + src_y*src_stride + src_x*(src_format_desc->block.bits/8);
+
+ /*
+ * This works because all pixel formats have pixel blocks with power of two
+ * sizes.
+ */
+
+ y_step = MAX2(dst_format_desc->block.height, src_format_desc->block.height);
+ assert(y_step % dst_format_desc->block.height == 0);
+ assert(y_step % src_format_desc->block.height == 0);
+
+ dst_step = y_step / dst_format_desc->block.height * dst_stride;
+ src_step = y_step / src_format_desc->block.height * src_stride;
+
+ /*
+ * TODO: double formats will loose precision
+ * TODO: Add a special case for formats that are mere swizzles of each other
+ */
+
+ if (util_format_fits_8unorm(src_format_desc) ||
+ util_format_fits_8unorm(dst_format_desc)) {
+ unsigned tmp_stride;
+ uint8_t *tmp_row;
+
+ tmp_stride = width * 4 * sizeof *tmp_row;
+ tmp_row = MALLOC(y_step * tmp_stride);
+ if (!tmp_row)
+ return;
+
+ while (height >= y_step) {
+ src_format_desc->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, y_step);
+ dst_format_desc->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
+
+ dst_row += dst_step;
+ src_row += src_step;
+ height -= y_step;
+ }
+
+ if (height) {
+ src_format_desc->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, height);
+ dst_format_desc->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
+ }
+
+ FREE(tmp_row);
+ }
+ else {
+ unsigned tmp_stride;
+ float *tmp_row;
+
+ tmp_stride = width * 4 * sizeof *tmp_row;
+ tmp_row = MALLOC(y_step * tmp_stride);
+ if (!tmp_row)
+ return;
+
+ while (height >= y_step) {
+ src_format_desc->unpack_rgba_float(tmp_row, tmp_stride, src_row, src_stride, width, y_step);
+ dst_format_desc->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
+
+ dst_row += dst_step;
+ src_row += src_step;
+ height -= y_step;
+ }
+
+ if (height) {
+ src_format_desc->unpack_rgba_float(tmp_row, tmp_stride, src_row, src_stride, width, height);
+ dst_format_desc->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
+ }
+
+ FREE(tmp_row);
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 9a7e4b9cb07..0811280b97b 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -47,31 +47,41 @@
# - color space: rgb, yub, sz
#
# See also:
-# - http://msdn.microsoft.com/en-us/library/ee416489.aspx (D3D9)
-# - http://msdn.microsoft.com/en-us/library/ee415668.aspx (D3D9 -> D3D10)
-# - http://msdn.microsoft.com/en-us/library/ee418116.aspx (D3D10)
+# - http://msdn.microsoft.com/en-us/library/bb172558.aspx (D3D9)
+# - http://msdn.microsoft.com/en-us/library/bb205073.aspx#mapping_texture_formats (D3D9 -> D3D10)
+# - http://msdn.microsoft.com/en-us/library/bb173059.aspx (D3D10)
#
# Note that GL doesn't really specify the layout of internal formats. See
# OpenGL 2.1 specification, Table 3.16, on the "Correspondence of sized
# internal formats to base in- ternal formats, and desired component
# resolutions for each sized internal format."
+# None
+# Described as regular uint_8 bytes, i.e. PIPE_FORMAT_R8_USCALED
+PIPE_FORMAT_NONE , plain, 1, 1, u8 , , , , x001, rgb
+
# Typical rendertarget formats
PIPE_FORMAT_B8G8R8A8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, rgb
-PIPE_FORMAT_B8G8R8X8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , zyx1, rgb
+PIPE_FORMAT_B8G8R8X8_UNORM , plain, 1, 1, un8 , un8 , un8 , x8 , zyx1, rgb
PIPE_FORMAT_A8R8G8B8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb
-PIPE_FORMAT_X8R8G8B8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , yzw1, rgb
+PIPE_FORMAT_X8R8G8B8_UNORM , plain, 1, 1, x8 , un8 , un8 , un8 , yzw1, rgb
PIPE_FORMAT_A8B8G8R8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb
-PIPE_FORMAT_X8B8G8R8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb
+PIPE_FORMAT_X8B8G8R8_UNORM , plain, 1, 1, x8 , un8 , un8 , un8 , wzy1, rgb
+# PIPE_FORMAT_R8G8B8A8_UNORM is below
+PIPE_FORMAT_R8G8B8X8_UNORM , plain, 1, 1, un8 , un8 , un8 , x8 , xyz1, rgb
+PIPE_FORMAT_B5G5R5X1_UNORM , plain, 1, 1, un5 , un5 , un5 , x1 , zyx1, rgb
PIPE_FORMAT_B5G5R5A1_UNORM , plain, 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb
PIPE_FORMAT_B4G4R4A4_UNORM , plain, 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb
+PIPE_FORMAT_B4G4R4X4_UNORM , plain, 1, 1, un4 , un4 , un4 , x4 , zyx1, rgb
PIPE_FORMAT_B5G6R5_UNORM , plain, 1, 1, un5 , un6 , un5 , , zyx1, rgb
PIPE_FORMAT_R10G10B10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb
+PIPE_FORMAT_B10G10R10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , zyxw, rgb
# Luminance/Intensity/Alpha formats
PIPE_FORMAT_L8_UNORM , plain, 1, 1, un8 , , , , xxx1, rgb
PIPE_FORMAT_A8_UNORM , plain, 1, 1, un8 , , , , 000x, rgb
PIPE_FORMAT_I8_UNORM , plain, 1, 1, un8 , , , , xxxx, rgb
+PIPE_FORMAT_L4A4_UNORM , plain, 1, 1, un4 , un4 , , , xxxy, rgb
PIPE_FORMAT_L8A8_UNORM , plain, 1, 1, un8 , un8 , , , xxxy, rgb
PIPE_FORMAT_L16_UNORM , plain, 1, 1, un16, , , , xxx1, rgb
@@ -79,44 +89,65 @@ PIPE_FORMAT_L16_UNORM , plain, 1, 1, un16, , , , xxx1, r
PIPE_FORMAT_L8_SRGB , plain, 1, 1, un8 , , , , xxx1, srgb
PIPE_FORMAT_L8A8_SRGB , plain, 1, 1, un8 , un8 , , , xxxy, srgb
PIPE_FORMAT_R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , , xyz1, srgb
+PIPE_FORMAT_R8G8B8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, srgb
PIPE_FORMAT_A8B8G8R8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , wzyx, srgb
-PIPE_FORMAT_X8B8G8R8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , wzy1, srgb
+PIPE_FORMAT_X8B8G8R8_SRGB , plain, 1, 1, x8 , un8 , un8 , un8 , wzy1, srgb
PIPE_FORMAT_B8G8R8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, srgb
-PIPE_FORMAT_B8G8R8X8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , zyx1, srgb
+PIPE_FORMAT_B8G8R8X8_SRGB , plain, 1, 1, un8 , un8 , un8 , x8 , zyx1, srgb
PIPE_FORMAT_A8R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , yzwx, srgb
-PIPE_FORMAT_X8R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , yzw1, srgb
+PIPE_FORMAT_X8R8G8B8_SRGB , plain, 1, 1, x8 , un8 , un8 , un8 , yzw1, srgb
# Mixed-sign formats (typically used for bump map textures)
PIPE_FORMAT_R8SG8SB8UX8U_NORM , plain, 1, 1, sn8 , sn8 , un8 , x8 , xyz1, rgb
+PIPE_FORMAT_R10SG10SB10SA2U_NORM , plain, 1, 1, sn10, sn10, sn10, un2 , xyzw, rgb
PIPE_FORMAT_R5SG5SB6U_NORM , plain, 1, 1, sn5 , sn5 , un6 , , xyz1, rgb
# Depth-stencil formats
-PIPE_FORMAT_S8_UNORM , plain, 1, 1, un8 , , , , _x__, zs
-PIPE_FORMAT_Z16_UNORM , plain, 1, 1, un16, , , , x___, zs
-PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, zs
-PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs
-PIPE_FORMAT_Z24S8_UNORM , plain, 1, 1, un24, un8 , , , xy__, zs
-PIPE_FORMAT_S8Z24_UNORM , plain, 1, 1, un8 , un24, , , yx__, zs
-PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, un8 , , , x___, zs
-PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, un8 , un24, , , y___, zs
+PIPE_FORMAT_S8_USCALED , plain, 1, 1, u8 , , , , _x__, zs
+PIPE_FORMAT_Z16_UNORM , plain, 1, 1, un16, , , , x___, zs
+PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, zs
+PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs
+PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs
+PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs
+PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs
+PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs
+PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs
# YUV formats
# http://www.fourcc.org/yuv.php#UYVY
PIPE_FORMAT_UYVY , subsampled, 2, 1, x32 , , , , xyz1, yuv
# http://www.fourcc.org/yuv.php#YUYV (a.k.a http://www.fourcc.org/yuv.php#YUY2)
-# XXX: u_tile.c's ycbcr_get_tile_rgba actually interprets it as VYUY but the
-# intent should be to match D3DFMT_YUY2
PIPE_FORMAT_YUYV , subsampled, 2, 1, x32 , , , , xyz1, yuv
+# same subsampling but with rgb channels
+PIPE_FORMAT_R8G8_B8G8_UNORM , subsampled, 2, 1, x32 , , , , xyz1, rgb
+PIPE_FORMAT_G8R8_G8B8_UNORM , subsampled, 2, 1, x32 , , , , xyz1, rgb
+
+# some special formats not fitting anywhere else
+PIPE_FORMAT_R10G10B10A2_USCALED , plain, 1, 1, u10 , u10 , u10 , u2 , xyzw, rgb
+PIPE_FORMAT_R11G11B10_FLOAT , plain, 1, 1, f11 , f11 , f10 , , xyz1, rgb
+PIPE_FORMAT_R9G9B9E5_FLOAT , other, 1, 1, x32 , , , , xyz1, rgb
+PIPE_FORMAT_R1_UNORM , other, 8, 1, x8 , , , , x001, rgb
+# A.k.a. D3DFMT_CxV8U8
+PIPE_FORMAT_R8G8Bx_SNORM , other, 1, 1, sn8 , sn8 , , , xyz1, rgb
# Compressed formats
-PIPE_FORMAT_DXT1_RGB , compressed, 4, 4, x64 , , , , xyz1, rgb
-PIPE_FORMAT_DXT1_RGBA , compressed, 4, 4, x64 , , , , xyzw, rgb
-PIPE_FORMAT_DXT3_RGBA , compressed, 4, 4, x128, , , , xyzw, rgb
-PIPE_FORMAT_DXT5_RGBA , compressed, 4, 4, x128, , , , xyzw, rgb
-PIPE_FORMAT_DXT1_SRGB , compressed, 4, 4, x64 , , , , xyz1, srgb
-PIPE_FORMAT_DXT1_SRGBA , compressed, 4, 4, x64 , , , , xyzw, srgb
-PIPE_FORMAT_DXT3_SRGBA , compressed, 4, 4, x128, , , , xyzw, srgb
-PIPE_FORMAT_DXT5_SRGBA , compressed, 4, 4, x128, , , , xyzw, srgb
+# - http://en.wikipedia.org/wiki/S3_Texture_Compression
+# - http://www.opengl.org/registry/specs/EXT/texture_compression_s3tc.txt
+# - http://www.opengl.org/registry/specs/ARB/texture_compression_rgtc.txt
+# - http://msdn.microsoft.com/en-us/library/bb694531.aspx
+PIPE_FORMAT_DXT1_RGB , s3tc, 4, 4, x64 , , , , xyz1, rgb
+PIPE_FORMAT_DXT1_RGBA , s3tc, 4, 4, x64 , , , , xyzw, rgb
+PIPE_FORMAT_DXT3_RGBA , s3tc, 4, 4, x128, , , , xyzw, rgb
+PIPE_FORMAT_DXT5_RGBA , s3tc, 4, 4, x128, , , , xyzw, rgb
+PIPE_FORMAT_DXT1_SRGB , s3tc, 4, 4, x64 , , , , xyz1, srgb
+PIPE_FORMAT_DXT1_SRGBA , s3tc, 4, 4, x64 , , , , xyzw, srgb
+PIPE_FORMAT_DXT3_SRGBA , s3tc, 4, 4, x128, , , , xyzw, srgb
+PIPE_FORMAT_DXT5_SRGBA , s3tc, 4, 4, x128, , , , xyzw, srgb
+
+PIPE_FORMAT_RGTC1_UNORM , rgtc, 4, 4, x64, , , , x001, rgb
+PIPE_FORMAT_RGTC1_SNORM , rgtc, 4, 4, x64, , , , x001, rgb
+PIPE_FORMAT_RGTC2_UNORM , rgtc, 4, 4, x128, , , , xy01, rgb
+PIPE_FORMAT_RGTC2_SNORM , rgtc, 4, 4, x128, , , , xy01, rgb
# Straightforward D3D10-like formats (also used for
# vertex buffer element description)
@@ -148,10 +179,10 @@ PIPE_FORMAT_R32_SSCALED , plain, 1, 1, s32 , , , , x001, r
PIPE_FORMAT_R32G32_SSCALED , plain, 1, 1, s32 , s32 , , , xy01, rgb
PIPE_FORMAT_R32G32B32_SSCALED , plain, 1, 1, s32 , s32 , s32 , , xyz1, rgb
PIPE_FORMAT_R32G32B32A32_SSCALED , plain, 1, 1, s32 , s32 , s32 , s32 , xyzw, rgb
-PIPE_FORMAT_R32_FIXED , plain, 1, 1, h32 , , , , x001, rgb
-PIPE_FORMAT_R32G32_FIXED , plain, 1, 1, h32 , h32 , , , xy01, rgb
-PIPE_FORMAT_R32G32B32_FIXED , plain, 1, 1, h32 , h32 , h32 , , xyz1, rgb
-PIPE_FORMAT_R32G32B32A32_FIXED , plain, 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb
+PIPE_FORMAT_R16_FLOAT , plain, 1, 1, f16 , , , , x001, rgb
+PIPE_FORMAT_R16G16_FLOAT , plain, 1, 1, f16 , f16 , , , xy01, rgb
+PIPE_FORMAT_R16G16B16_FLOAT , plain, 1, 1, f16 , f16 , f16 , , xyz1, rgb
+PIPE_FORMAT_R16G16B16A16_FLOAT , plain, 1, 1, f16 , f16 , f16 , f16 , xyzw, rgb
PIPE_FORMAT_R16_UNORM , plain, 1, 1, un16, , , , x001, rgb
PIPE_FORMAT_R16G16_UNORM , plain, 1, 1, un16, un16, , , xy01, rgb
PIPE_FORMAT_R16G16B16_UNORM , plain, 1, 1, un16, un16, un16, , xyz1, rgb
@@ -184,6 +215,22 @@ PIPE_FORMAT_R8_SSCALED , plain, 1, 1, s8 , , , , x001, r
PIPE_FORMAT_R8G8_SSCALED , plain, 1, 1, s8 , s8 , , , xy01, rgb
PIPE_FORMAT_R8G8B8_SSCALED , plain, 1, 1, s8 , s8 , s8 , , xyz1, rgb
PIPE_FORMAT_R8G8B8A8_SSCALED , plain, 1, 1, s8 , s8 , s8 , s8 , xyzw, rgb
+
+# GL-specific vertex buffer element formats
+# A.k.a. GL_FIXED
+PIPE_FORMAT_R32_FIXED , plain, 1, 1, h32 , , , , x001, rgb
+PIPE_FORMAT_R32G32_FIXED , plain, 1, 1, h32 , h32 , , , xy01, rgb
+PIPE_FORMAT_R32G32B32_FIXED , plain, 1, 1, h32 , h32 , h32 , , xyz1, rgb
+PIPE_FORMAT_R32G32B32A32_FIXED , plain, 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb
+
+# D3D9-specific vertex buffer element formats
+# See also:
+# - http://msdn.microsoft.com/en-us/library/bb172533.aspx
+# A.k.a. D3DDECLTYPE_UDEC3
+PIPE_FORMAT_R10G10B10X2_USCALED , plain, 1, 1, u10 , u10 , u10 , x2 , xyz1, rgb
+# A.k.a. D3DDECLTYPE_DEC3N
+PIPE_FORMAT_R10G10B10X2_SNORM , plain, 1, 1, sn10, sn10, sn10 , x2 , xyz1, rgb
+
PIPE_FORMAT_YV12 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv
PIPE_FORMAT_YV16 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv
PIPE_FORMAT_IYUV , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index e8fa0022b5b..605b13bd114 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -56,15 +56,23 @@ enum util_format_layout {
*
* This is for formats like YV12 where there is less than one sample per
* pixel.
- *
- * XXX: This could actually b
*/
UTIL_FORMAT_LAYOUT_SUBSAMPLED = 3,
/**
- * An unspecified compression algorithm.
+ * S3 Texture Compression formats.
+ */
+ UTIL_FORMAT_LAYOUT_S3TC = 4,
+
+ /**
+ * Red-Green Texture Compression formats.
+ */
+ UTIL_FORMAT_LAYOUT_RGTC = 5,
+
+ /**
+ * Everything else that doesn't fit in any of the above layouts.
*/
- UTIL_FORMAT_LAYOUT_COMPRESSED = 4
+ UTIL_FORMAT_LAYOUT_OTHER = 6
};
@@ -120,9 +128,15 @@ struct util_format_channel_description
struct util_format_description
{
enum pipe_format format;
+
const char *name;
/**
+ * Short name, striped of the prefix, lower case.
+ */
+ const char *short_name;
+
+ /**
* Pixel block dimensions.
*/
struct util_format_block block;
@@ -140,6 +154,15 @@ struct util_format_description
unsigned is_array:1;
/**
+ * Whether the pixel format can be described as a bitfield structure.
+ *
+ * In particular:
+ * - pixel depth must be 8, 16, or 32 bits;
+ * - all channels must be unsigned, signed, or void
+ */
+ unsigned is_bitmask:1;
+
+ /**
* Whether channels have mixed types (ignoring UTIL_FORMAT_TYPE_VOID).
*/
unsigned is_mixed:1;
@@ -166,6 +189,127 @@ struct util_format_description
* Colorspace transformation.
*/
enum util_format_colorspace colorspace;
+
+ /**
+ * Unpack pixel blocks to R8G8B8A8_UNORM.
+ * Note: strides are in bytes.
+ *
+ * Only defined for non-depth-stencil formats.
+ */
+ void
+ (*unpack_rgba_8unorm)(uint8_t *dst, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
+ /**
+ * Pack pixel blocks from R8G8B8A8_UNORM.
+ * Note: strides are in bytes.
+ *
+ * Only defined for non-depth-stencil formats.
+ */
+ void
+ (*pack_rgba_8unorm)(uint8_t *dst, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
+ /**
+ * Unpack pixel blocks to R32G32B32A32_FLOAT.
+ * Note: strides are in bytes.
+ *
+ * Only defined for non-depth-stencil formats.
+ */
+ void
+ (*unpack_rgba_float)(float *dst, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
+ /**
+ * Pack pixel blocks from R32G32B32A32_FLOAT.
+ * Note: strides are in bytes.
+ *
+ * Only defined for non-depth-stencil formats.
+ */
+ void
+ (*pack_rgba_float)(uint8_t *dst, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
+ /**
+ * Fetch a single pixel (i, j) from a block.
+ *
+ * Only defined for non-depth-stencil formats.
+ */
+ void
+ (*fetch_rgba_float)(float *dst,
+ const uint8_t *src,
+ unsigned i, unsigned j);
+
+ /**
+ * Unpack pixels to Z32_UNORM.
+ * Note: strides are in bytes.
+ *
+ * Only defined for depth formats.
+ */
+ void
+ (*unpack_z_32unorm)(uint32_t *dst, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
+ /**
+ * Pack pixels from Z32_FLOAT.
+ * Note: strides are in bytes.
+ *
+ * Only defined for depth formats.
+ */
+ void
+ (*pack_z_32unorm)(uint8_t *dst, unsigned dst_stride,
+ const uint32_t *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
+ /**
+ * Unpack pixels to Z32_FLOAT.
+ * Note: strides are in bytes.
+ *
+ * Only defined for depth formats.
+ */
+ void
+ (*unpack_z_float)(float *dst, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
+ /**
+ * Pack pixels from Z32_FLOAT.
+ * Note: strides are in bytes.
+ *
+ * Only defined for depth formats.
+ */
+ void
+ (*pack_z_float)(uint8_t *dst, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
+ /**
+ * Unpack pixels to S8_USCALED.
+ * Note: strides are in bytes.
+ *
+ * Only defined for stencil formats.
+ */
+ void
+ (*unpack_s_8uscaled)(uint8_t *dst, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
+ /**
+ * Pack pixels from S8_USCALED.
+ * Note: strides are in bytes.
+ *
+ * Only defined for stencil formats.
+ */
+ void
+ (*pack_s_8uscaled)(uint8_t *dst, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height);
+
};
@@ -186,8 +330,8 @@ util_format_name(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
- assert(format);
- if (!format) {
+ assert(desc);
+ if (!desc) {
return "???";
}
@@ -195,16 +339,16 @@ util_format_name(enum pipe_format format)
}
static INLINE boolean
-util_format_is_compressed(enum pipe_format format)
+util_format_is_s3tc(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
- assert(format);
- if (!format) {
+ assert(desc);
+ if (!desc) {
return FALSE;
}
- return desc->layout == UTIL_FORMAT_LAYOUT_COMPRESSED ? TRUE : FALSE;
+ return desc->layout == UTIL_FORMAT_LAYOUT_S3TC ? TRUE : FALSE;
}
static INLINE boolean
@@ -212,8 +356,8 @@ util_format_is_depth_or_stencil(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
- assert(format);
- if (!format) {
+ assert(desc);
+ if (!desc) {
return FALSE;
}
@@ -225,8 +369,8 @@ util_format_is_depth_and_stencil(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
- assert(format);
- if (!format) {
+ assert(desc);
+ if (!desc) {
return FALSE;
}
@@ -238,6 +382,34 @@ util_format_is_depth_and_stencil(enum pipe_format format)
desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE) ? TRUE : FALSE;
}
+/**
+ * Whether this format is a rgab8 variant.
+ *
+ * That is, any format that matches the
+ *
+ * PIPE_FORMAT_?8?8?8?8_UNORM
+ */
+static INLINE boolean
+util_format_is_rgba8_variant(const struct util_format_description *desc)
+{
+ unsigned chan;
+
+ if(desc->block.width != 1 ||
+ desc->block.height != 1 ||
+ desc->block.bits != 32)
+ return FALSE;
+
+ for(chan = 0; chan < 4; ++chan) {
+ if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED &&
+ desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID)
+ return FALSE;
+ if(desc->channel[chan].size != 8)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
/**
* Return total bits needed for the pixel format per block.
@@ -247,8 +419,8 @@ util_format_get_blocksizebits(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
- assert(format);
- if (!format) {
+ assert(desc);
+ if (!desc) {
return 0;
}
@@ -273,8 +445,8 @@ util_format_get_blockwidth(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
- assert(format);
- if (!format) {
+ assert(desc);
+ if (!desc) {
return 1;
}
@@ -286,8 +458,8 @@ util_format_get_blockheight(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
- assert(format);
- if (!format) {
+ assert(desc);
+ if (!desc) {
return 1;
}
@@ -400,6 +572,16 @@ util_format_has_alpha(enum pipe_format format)
}
}
+/**
+ * Return the number of components stored.
+ * Formats with block size != 1x1 will always have 1 component (the block).
+ */
+static INLINE unsigned
+util_format_get_nr_components(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ return desc->nr_channels;
+}
/*
* Format access functions.
@@ -429,6 +611,19 @@ util_format_write_4ub(enum pipe_format format,
void *dst, unsigned dst_stride,
unsigned x, unsigned y, unsigned w, unsigned h);
+/*
+ * Generic format conversion;
+ */
+
+void
+util_format_translate(enum pipe_format dst_format,
+ void *dst, unsigned dst_stride,
+ unsigned dst_x, unsigned dst_y,
+ enum pipe_format src_format,
+ const void *src, unsigned src_stride,
+ unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height);
+
#ifdef __cplusplus
} // extern "C" {
#endif
diff --git a/src/gallium/auxiliary/util/u_format_access.py b/src/gallium/auxiliary/util/u_format_access.py
deleted file mode 100644
index 00424779d28..00000000000
--- a/src/gallium/auxiliary/util/u_format_access.py
+++ /dev/null
@@ -1,341 +0,0 @@
-#!/usr/bin/env python
-
-'''
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * Pixel format accessor functions.
- *
- * @author Jose Fonseca <[email protected]>
- */
-'''
-
-
-import math
-import sys
-
-from u_format_pack import *
-
-
-def is_format_supported(format):
- '''Determines whether we actually have the plumbing necessary to generate the
- to read/write to/from this format.'''
-
- # FIXME: Ideally we would support any format combination here.
-
- # XXX: It should be straightforward to support srgb
- if format.colorspace not in ('rgb', 'zs'):
- return False
-
- if format.layout != PLAIN:
- return False
-
- for i in range(4):
- channel = format.channels[i]
- if channel.type not in (VOID, UNSIGNED, FLOAT):
- return False
-
- # We can only read a color from a depth/stencil format if the depth channel is present
- if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE:
- return False
-
- return True
-
-
-def native_type(format):
- '''Get the native appropriate for a format.'''
-
- if format.layout == PLAIN:
- if not format.is_array():
- # For arithmetic pixel formats return the integer type that matches the whole pixel
- return 'uint%u_t' % format.block_size()
- else:
- # For array pixel formats return the integer type that matches the color channel
- channel = format.channels[0]
- if channel.type == UNSIGNED:
- return 'uint%u_t' % channel.size
- elif channel.type == SIGNED:
- return 'int%u_t' % channel.size
- elif channel.type == FLOAT:
- if channel.size == 32:
- return 'float'
- elif channel.size == 64:
- return 'double'
- else:
- assert False
- else:
- assert False
- else:
- assert False
-
-
-def generate_srgb_tables():
- print 'static ubyte srgb_to_linear[256] = {'
- for i in range(256):
- print ' %s,' % (int(math.pow((i / 255.0 + 0.055) / 1.055, 2.4) * 255))
- print '};'
- print
- print 'static ubyte linear_to_srgb[256] = {'
- print ' 0,'
- for i in range(1, 256):
- print ' %s,' % (int((1.055 * math.pow(i / 255.0, 0.41666) - 0.055) * 255))
- print '};'
- print
-
-
-def generate_format_read(format, dst_channel, dst_native_type, dst_suffix):
- '''Generate the function to read pixels from a particular format'''
-
- name = format.short_name()
-
- src_native_type = native_type(format)
-
- print 'static void'
- print 'util_format_%s_read_%s(%s *dst, unsigned dst_stride, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type)
- print '{'
- print ' unsigned x, y;'
- print ' const uint8_t *src_row = src + y0*src_stride;'
- print ' %s *dst_row = dst;' % dst_native_type
- print ' for (y = 0; y < h; ++y) {'
- print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride())
- print ' %s *dst_pixel = dst_row;' %dst_native_type
- print ' for (x = 0; x < w; ++x) {'
-
- names = ['']*4
- if format.colorspace == 'rgb':
- for i in range(4):
- swizzle = format.swizzles[i]
- if swizzle < 4:
- names[swizzle] += 'rgba'[i]
- elif format.colorspace == 'zs':
- swizzle = format.swizzles[0]
- if swizzle < 4:
- names[swizzle] = 'z'
- else:
- assert False
- else:
- assert False
-
- if format.layout == PLAIN:
- if not format.is_array():
- print ' %s pixel = *src_pixel++;' % src_native_type
- shift = 0;
- for i in range(4):
- src_channel = format.channels[i]
- width = src_channel.size
- if names[i]:
- value = 'pixel'
- mask = (1 << width) - 1
- if shift:
- value = '(%s >> %u)' % (value, shift)
- if shift + width < format.block_size():
- value = '(%s & 0x%x)' % (value, mask)
- value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
- print ' %s %s = %s;' % (dst_native_type, names[i], value)
- shift += width
- else:
- for i in range(4):
- src_channel = format.channels[i]
- if names[i]:
- value = 'src_pixel[%u]' % i
- value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
- print ' %s %s = %s;' % (dst_native_type, names[i], value)
- print ' src_pixel += %u;' % (format.nr_channels())
- else:
- assert False
-
- for i in range(4):
- if format.colorspace == 'rgb':
- swizzle = format.swizzles[i]
- if swizzle < 4:
- value = names[swizzle]
- elif swizzle == SWIZZLE_0:
- value = '0'
- elif swizzle == SWIZZLE_1:
- value = get_one(dst_channel)
- else:
- assert False
- elif format.colorspace == 'zs':
- if i < 3:
- value = 'z'
- else:
- value = get_one(dst_channel)
- else:
- assert False
- print ' *dst_pixel++ = %s; /* %s */' % (value, 'rgba'[i])
-
- print ' }'
- print ' src_row += src_stride;'
- print ' dst_row += dst_stride/sizeof(*dst_row);'
- print ' }'
- print '}'
- print
-
-
-def generate_format_write(format, src_channel, src_native_type, src_suffix):
- '''Generate the function to write pixels to a particular format'''
-
- name = format.short_name()
-
- dst_native_type = native_type(format)
-
- print 'static void'
- print 'util_format_%s_write_%s(const %s *src, unsigned src_stride, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type)
- print '{'
- print ' unsigned x, y;'
- print ' uint8_t *dst_row = dst + y0*dst_stride;'
- print ' const %s *src_row = src;' % src_native_type
- print ' for (y = 0; y < h; ++y) {'
- print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride())
- print ' const %s *src_pixel = src_row;' %src_native_type
- print ' for (x = 0; x < w; ++x) {'
-
- inv_swizzle = format.inv_swizzles()
-
- if format.layout == PLAIN:
- if not format.is_array():
- print ' %s pixel = 0;' % dst_native_type
- shift = 0;
- for i in range(4):
- dst_channel = format.channels[i]
- width = dst_channel.size
- if inv_swizzle[i] is not None:
- value = 'src_pixel[%u]' % inv_swizzle[i]
- value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
- if shift:
- value = '(%s << %u)' % (value, shift)
- print ' pixel |= %s;' % value
- shift += width
- print ' *dst_pixel++ = pixel;'
- else:
- for i in range(4):
- dst_channel = format.channels[i]
- if inv_swizzle[i] is not None:
- value = 'src_pixel[%u]' % inv_swizzle[i]
- value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
- print ' *dst_pixel++ = %s;' % value
- else:
- assert False
- print ' src_pixel += 4;'
-
- print ' }'
- print ' dst_row += dst_stride;'
- print ' src_row += src_stride/sizeof(*src_row);'
- print ' }'
- print '}'
- print
-
-
-def generate_read(formats, dst_channel, dst_native_type, dst_suffix):
- '''Generate the dispatch function to read pixels from any format'''
-
- for format in formats:
- if is_format_supported(format):
- generate_format_read(format, dst_channel, dst_native_type, dst_suffix)
-
- print 'void'
- print 'util_format_read_%s(enum pipe_format format, %s *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
- print '{'
- print ' void (*func)(%s *dst, unsigned dst_stride, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type
- print ' switch(format) {'
- for format in formats:
- if is_format_supported(format):
- print ' case %s:' % format.name
- print ' func = &util_format_%s_read_%s;' % (format.short_name(), dst_suffix)
- print ' break;'
- print ' default:'
- print ' debug_printf("unsupported format\\n");'
- print ' return;'
- print ' }'
- print ' func(dst, dst_stride, (const uint8_t *)src, src_stride, x, y, w, h);'
- print '}'
- print
-
-
-def generate_write(formats, src_channel, src_native_type, src_suffix):
- '''Generate the dispatch function to write pixels to any format'''
-
- for format in formats:
- if is_format_supported(format):
- generate_format_write(format, src_channel, src_native_type, src_suffix)
-
- print 'void'
- print 'util_format_write_%s(enum pipe_format format, const %s *src, unsigned src_stride, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
-
- print '{'
- print ' void (*func)(const %s *src, unsigned src_stride, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type
- print ' switch(format) {'
- for format in formats:
- if is_format_supported(format):
- print ' case %s:' % format.name
- print ' func = &util_format_%s_write_%s;' % (format.short_name(), src_suffix)
- print ' break;'
- print ' default:'
- print ' debug_printf("unsupported format\\n");'
- print ' return;'
- print ' }'
- print ' func(src, src_stride, (uint8_t *)dst, dst_stride, x, y, w, h);'
- print '}'
- print
-
-
-def main():
- formats = []
- for arg in sys.argv[1:]:
- formats.extend(parse(arg))
-
- print '/* This file is autogenerated by u_format_access.py from u_format.csv. Do not edit directly. */'
- print
- # This will print the copyright message on the top of this file
- print __doc__.strip()
- print
- print '#include "pipe/p_compiler.h"'
- print '#include "u_math.h"'
- print '#include "u_format_pack.h"'
- print
-
- generate_srgb_tables()
-
- type = Channel(FLOAT, False, 32)
- native_type = 'float'
- suffix = '4f'
-
- generate_read(formats, type, native_type, suffix)
- generate_write(formats, type, native_type, suffix)
-
- type = Channel(UNSIGNED, True, 8)
- native_type = 'uint8_t'
- suffix = '4ub'
-
- generate_read(formats, type, native_type, suffix)
- generate_write(formats, type, native_type, suffix)
-
-
-if __name__ == '__main__':
- main()
diff --git a/src/gallium/auxiliary/util/u_format_other.c b/src/gallium/auxiliary/util/u_format_other.c
new file mode 100644
index 00000000000..723fa8c3bf9
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_other.c
@@ -0,0 +1,267 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "u_math.h"
+#include "u_format_other.h"
+
+
+void
+util_format_r9g9b9e5_float_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+
+}
+
+void
+util_format_r9g9b9e5_float_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+
+}
+
+void
+util_format_r9g9b9e5_float_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j)
+{
+
+}
+
+
+void
+util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+
+}
+
+
+void
+util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+
+}
+
+
+void
+util_format_r1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+
+}
+
+
+void
+util_format_r1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+
+}
+
+
+void
+util_format_r1_unorm_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j)
+{
+
+}
+
+
+void
+util_format_r1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+
+}
+
+
+void
+util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+}
+
+
+/*
+ * PIPE_FORMAT_R8G8Bx_SNORM
+ *
+ * A.k.a. D3DFMT_CxV8U8
+ */
+
+
+void
+util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for(y = 0; y < height; y += 1) {
+ float *dst = dst_row;
+ const uint16_t *src = (const uint16_t *)src_row;
+ for(x = 0; x < width; x += 1) {
+ uint16_t value = *src++;
+ int16_t r, g;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ r = ((int16_t)(value << 8)) >> 8;
+ g = ((int16_t)(value << 0)) >> 8;
+
+ dst[0] = (float)(r * (1.0f/0x7f)); /* r */
+ dst[1] = (float)(g * (1.0f/0x7f)); /* g */
+ dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */
+ dst[3] = 1.0f; /* a */
+ dst += 4;
+ }
+ src_row += src_stride;
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ uint8_t *dst = dst_row;
+ const uint16_t *src = (const uint16_t *)src_row;
+ for(x = 0; x < width; x += 1) {
+ uint16_t value = *src++;
+ int16_t r, g;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ r = ((int16_t)(value << 8)) >> 8;
+ g = ((int16_t)(value << 0)) >> 8;
+
+ dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */
+ dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */
+ dst[2] = (uint8_t)sqrtf(0x7f*0x7f - r * r - g * g) * 0xff / 0x7f; /* b */
+ dst[3] = 255; /* a */
+ dst += 4;
+ }
+ src_row += src_stride;
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_r8g8bx_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ const float *src = src_row;
+ uint16_t *dst = (uint16_t *)dst_row;
+ for(x = 0; x < width; x += 1) {
+ uint16_t value = 0;
+
+ value |= (uint16_t)(((int8_t)(CLAMP(src[0], -1, 1) * 0x7f)) & 0xff) ;
+ value |= (uint16_t)((((int8_t)(CLAMP(src[1], -1, 1) * 0x7f)) & 0xff) << 8) ;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 4;
+ }
+ dst_row += dst_stride;
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_r8g8bx_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for(y = 0; y < height; y += 1) {
+ const uint8_t *src = src_row;
+ uint16_t *dst = (uint16_t *)dst_row;
+ for(x = 0; x < width; x += 1) {
+ uint16_t value = 0;
+
+ value |= src[0] >> 1;
+ value |= (src[1] >> 1) << 8;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 4;
+ }
+ dst_row += dst_stride;
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j)
+{
+ uint16_t value = *(const uint16_t *)src;
+ int16_t r, g;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ r = ((int16_t)(value << 8)) >> 8;
+ g = ((int16_t)(value << 0)) >> 8;
+
+ dst[0] = r * (1.0f/0x7f); /* r */
+ dst[1] = g * (1.0f/0x7f); /* g */
+ dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */
+ dst[3] = 1.0f; /* a */
+}
diff --git a/src/gallium/auxiliary/util/u_format_other.h b/src/gallium/auxiliary/util/u_format_other.h
new file mode 100644
index 00000000000..98c97be33fa
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_other.h
@@ -0,0 +1,108 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef U_FORMAT_OTHER_H_
+#define U_FORMAT_OTHER_H_
+
+
+#include "pipe/p_compiler.h"
+
+
+void
+util_format_r9g9b9e5_float_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r9g9b9e5_float_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r9g9b9e5_float_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+
+void
+util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r1_unorm_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+
+void
+util_format_r1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r8g8bx_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+
+void
+util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r8g8bx_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+#endif /* U_FORMAT_OTHER_H_ */
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index 3f33f7cc021..0c1bbc84c17 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -3,7 +3,7 @@
'''
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -38,6 +38,7 @@
import sys
+import math
from u_format_parse import *
@@ -45,19 +46,35 @@ from u_format_parse import *
def generate_format_type(format):
'''Generate a structure that describes the format.'''
+ assert format.layout == PLAIN
+
print 'union util_format_%s {' % format.short_name()
- if format.is_bitmask():
+
+ if format.block_size() in (8, 16, 32, 64):
print ' uint%u_t value;' % (format.block_size(),)
+
+ use_bitfields = False
+ for channel in format.channels:
+ if channel.size % 8 or not is_pot(channel.size):
+ use_bitfields = True
+
print ' struct {'
for channel in format.channels:
- if format.is_bitmask() and not format.is_array():
+ if use_bitfields:
if channel.type == VOID:
if channel.size:
print ' unsigned %s:%u;' % (channel.name, channel.size)
elif channel.type == UNSIGNED:
print ' unsigned %s:%u;' % (channel.name, channel.size)
- elif channel.type == SIGNED:
+ elif channel.type in (SIGNED, FIXED):
print ' int %s:%u;' % (channel.name, channel.size)
+ elif channel.type == FLOAT:
+ if channel.size == 64:
+ print ' double %s;' % (channel.name)
+ elif channel.size == 32:
+ print ' float %s;' % (channel.name)
+ else:
+ print ' unsigned %s:%u;' % (channel.name, channel.size)
else:
assert 0
else:
@@ -88,7 +105,7 @@ def generate_format_type(format):
def bswap_format(format):
'''Generate a structure that describes the format.'''
- if format.is_bitmask() and not format.is_array():
+ if format.is_bitmask() and not format.is_array() and format.block_size() > 8:
print '#ifdef PIPE_ARCH_BIG_ENDIAN'
print ' pixel.value = util_bswap%u(pixel.value);' % format.block_size()
print '#endif'
@@ -105,12 +122,10 @@ def is_format_supported(format):
for i in range(4):
channel = format.channels[i]
- if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT):
+ if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT, FIXED):
+ return False
+ if channel.type == FLOAT and channel.size not in (16, 32, 64):
return False
-
- # We can only read a color from a depth/stencil format if the depth channel is present
- if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE:
- return False
return True
@@ -124,15 +139,17 @@ def native_type(format):
return 'uint%u_t' % format.block_size()
else:
# For array pixel formats return the integer type that matches the color channel
- type = format.channels[0]
- if type.type == UNSIGNED:
- return 'uint%u_t' % type.size
- elif type.type == SIGNED:
- return 'int%u_t' % type.size
- elif type.type == FLOAT:
- if type.size == 32:
+ channel = format.channels[0]
+ if channel.type in (UNSIGNED, VOID):
+ return 'uint%u_t' % channel.size
+ elif channel.type in (SIGNED, FIXED):
+ return 'int%u_t' % channel.size
+ elif channel.type == FLOAT:
+ if channel.size == 16:
+ return 'uint16_t'
+ elif channel.size == 32:
return 'float'
- elif type.size == 64:
+ elif channel.size == 64:
return 'double'
else:
assert False
@@ -171,37 +188,35 @@ def get_one_shift(type):
assert False
-def get_one(type):
+def value_to_native(type, value):
'''Get the value of unity for this type.'''
- if type.type == 'FLOAT' or not type.norm:
- return 1
+ if type.type == FLOAT:
+ return value
+ if type.type == FIXED:
+ return int(value * (1 << (type.size/2)))
+ if not type.norm:
+ return int(value)
+ if type.type == UNSIGNED:
+ return int(value * ((1 << type.size) - 1))
+ if type.type == SIGNED:
+ return int(value * ((1 << (type.size - 1)) - 1))
+ assert False
+
+
+def native_to_constant(type, value):
+ '''Get the value of unity for this type.'''
+ if type.type == FLOAT:
+ if type.size <= 32:
+ return "%ff" % value
+ else:
+ return "%ff" % value
else:
- return (1 << get_one_shift(type)) - 1
-
-
-def generate_clamp():
- '''Code generate the clamping functions for each type.
-
- We don't use a macro so that arguments with side effects,
- like *src_pixel++ are correctly handled.
- '''
-
- for suffix, native_type in [
- ('', 'double'),
- ('f', 'float'),
- ('ui', 'unsigned int'),
- ('si', 'int'),
- ]:
- print 'static INLINE %s' % native_type
- print 'clamp%s(%s value, %s lbound, %s ubound)' % (suffix, native_type, native_type, native_type)
- print '{'
- print ' if(value < lbound)'
- print ' return lbound;'
- print ' if(value > ubound)'
- print ' return ubound;'
- print ' return value;'
- print '}'
- print
+ return str(int(value))
+
+
+def get_one(type):
+ '''Get the value of unity for this type.'''
+ return value_to_native(type, 1)
def clamp_expr(src_channel, dst_channel, dst_native_type, value):
@@ -211,274 +226,448 @@ def clamp_expr(src_channel, dst_channel, dst_native_type, value):
if src_channel == dst_channel:
return value
- # Pick the approriate clamp function
- if src_channel.type == FLOAT:
- if src_channel.size == 32:
- func = 'clampf'
- elif src_channel.size == 64:
- func = 'clamp'
- else:
- assert False
- elif src_channel.type == UNSIGNED:
- func = 'clampui'
- elif src_channel.type == SIGNED:
- func = 'clampsi'
- else:
- assert False
-
src_min = src_channel.min()
src_max = src_channel.max()
dst_min = dst_channel.min()
dst_max = dst_channel.max()
+
+ # Translate the destination range to the src native value
+ dst_min_native = value_to_native(src_channel, dst_min)
+ dst_max_native = value_to_native(src_channel, dst_max)
if src_min < dst_min and src_max > dst_max:
- return 'CLAMP(%s, %s, %s)' % (value, dst_min, dst_max)
+ return 'CLAMP(%s, %s, %s)' % (value, dst_min_native, dst_max_native)
if src_max > dst_max:
- return 'MIN2(%s, %s)' % (value, dst_max)
+ return 'MIN2(%s, %s)' % (value, dst_max_native)
if src_min < dst_min:
- return 'MAX2(%s, %s)' % (value, dst_min)
+ return 'MAX2(%s, %s)' % (value, dst_min_native)
return value
-def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True):
+def conversion_expr(src_channel,
+ dst_channel, dst_native_type,
+ value,
+ clamp=True,
+ src_colorspace = RGB,
+ dst_colorspace = RGB):
'''Generate the expression to convert a value between two types.'''
+ if src_colorspace != dst_colorspace:
+ if src_colorspace == SRGB:
+ assert src_channel.type == UNSIGNED
+ assert src_channel.norm
+ assert src_channel.size == 8
+ assert dst_colorspace == RGB
+ if dst_channel.type == FLOAT:
+ return 'util_format_srgb_8unorm_to_linear_float(%s)' % value
+ else:
+ assert dst_channel.type == UNSIGNED
+ assert dst_channel.norm
+ assert dst_channel.size == 8
+ return 'util_format_srgb_to_linear_8unorm(%s)' % value
+ elif dst_colorspace == SRGB:
+ assert dst_channel.type == UNSIGNED
+ assert dst_channel.norm
+ assert dst_channel.size == 8
+ assert src_colorspace == RGB
+ if src_channel.type == FLOAT:
+ return 'util_format_linear_float_to_srgb_8unorm(%s)' % value
+ else:
+ assert src_channel.type == UNSIGNED
+ assert src_channel.norm
+ assert src_channel.size == 8
+ return 'util_format_linear_to_srgb_8unorm(%s)' % value
+ elif src_colorspace == ZS:
+ pass
+ elif dst_colorspace == ZS:
+ pass
+ else:
+ assert 0
+
if src_channel == dst_channel:
return value
- if src_channel.type == FLOAT and dst_channel.type == FLOAT:
- return '(%s)%s' % (dst_native_type, value)
-
- if not src_channel.norm and not dst_channel.norm:
- return '(%s)%s' % (dst_native_type, value)
+ src_type = src_channel.type
+ src_size = src_channel.size
+ src_norm = src_channel.norm
- if clamp:
- value = clamp_expr(src_channel, dst_channel, dst_native_type, value)
+ # Promote half to float
+ if src_type == FLOAT and src_size == 16:
+ value = 'util_half_to_float(%s)' % value
+ src_size = 32
- if dst_channel.type == FLOAT:
- if src_channel.norm:
- one = get_one(src_channel)
- if src_channel.size <= 23:
- scale = '(1.0f/0x%x)' % one
- else:
- # bigger than single precision mantissa, use double
- scale = '(1.0/0x%x)' % one
- value = '(%s * %s)' % (value, scale)
- return '(%s)%s' % (dst_native_type, value)
+ # Special case for float <-> ubytes for more accurate results
+ # Done before clamping since these functions already take care of that
+ if src_type == UNSIGNED and src_norm and src_size == 8 and dst_channel.type == FLOAT and dst_channel.size == 32:
+ return 'ubyte_to_float(%s)' % value
+ if src_type == FLOAT and src_size == 32 and dst_channel.type == UNSIGNED and dst_channel.norm and dst_channel.size == 8:
+ return 'float_to_ubyte(%s)' % value
- if src_channel.type == FLOAT:
- if dst_channel.norm:
- dst_one = get_one(dst_channel)
- if dst_channel.size <= 23:
- scale = '0x%x' % dst_one
- else:
- # bigger than single precision mantissa, use double
- scale = '(double)0x%x' % dst_one
- value = '(%s * %s)' % (value, scale)
- return '(%s)%s' % (dst_native_type, value)
+ if clamp:
+ if dst_channel.type != FLOAT or src_type != FLOAT:
+ value = clamp_expr(src_channel, dst_channel, dst_native_type, value)
- if not src_channel.norm and not dst_channel.norm:
- # neither is normalized -- just cast
- return '(%s)%s' % (dst_native_type, value)
+ if src_type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED):
+ if not src_norm and not dst_channel.norm:
+ # neither is normalized -- just cast
+ return '(%s)%s' % (dst_native_type, value)
- if src_channel.type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED):
src_one = get_one(src_channel)
dst_one = get_one(dst_channel)
- if src_one > dst_one and src_channel.norm:
+ if src_one > dst_one and src_norm and dst_channel.norm:
# We can just bitshift
src_shift = get_one_shift(src_channel)
dst_shift = get_one_shift(dst_channel)
value = '(%s >> %s)' % (value, src_shift - dst_shift)
else:
# We need to rescale using an intermediate type big enough to hold the multiplication of both
- tmp_native_type = intermediate_native_type(src_channel.size + dst_channel.size, src_channel.sign and dst_channel.sign)
- value = '(%s)%s' % (tmp_native_type, value)
+ tmp_native_type = intermediate_native_type(src_size + dst_channel.size, src_channel.sign and dst_channel.sign)
+ value = '((%s)%s)' % (tmp_native_type, value)
value = '(%s * 0x%x / 0x%x)' % (value, dst_one, src_one)
value = '(%s)%s' % (dst_native_type, value)
return value
- assert False
+ # Promote to either float or double
+ if src_type != FLOAT:
+ if src_norm or src_type == FIXED:
+ one = get_one(src_channel)
+ if src_size <= 23:
+ value = '(%s * (1.0f/0x%x))' % (value, one)
+ if dst_channel.size <= 32:
+ value = '(float)%s' % value
+ src_size = 32
+ else:
+ # bigger than single precision mantissa, use double
+ value = '(%s * (1.0/0x%x))' % (value, one)
+ src_size = 64
+ src_norm = False
+ else:
+ if src_size <= 23 or dst_channel.size <= 32:
+ value = '(float)%s' % value
+ src_size = 32
+ else:
+ # bigger than single precision mantissa, use double
+ value = '(double)%s' % value
+ src_size = 64
+ src_type = FLOAT
+ # Convert double or float to non-float
+ if dst_channel.type != FLOAT:
+ if dst_channel.norm or dst_channel.type == FIXED:
+ dst_one = get_one(dst_channel)
+ if dst_channel.size <= 23:
+ value = '(%s * 0x%x)' % (value, dst_one)
+ else:
+ # bigger than single precision mantissa, use double
+ value = '(%s * (double)0x%x)' % (value, dst_one)
+ value = '(%s)%s' % (dst_native_type, value)
+ else:
+ # Cast double to float when converting to either half or float
+ if dst_channel.size <= 32 and src_size > 32:
+ value = '(float)%s' % value
+ src_size = 32
-def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix):
- '''Generate the function to unpack pixels from a particular format'''
+ if dst_channel.size == 16:
+ value = 'util_float_to_half(%s)' % value
+ elif dst_channel.size == 64 and src_size < 64:
+ value = '(double)%s' % value
- name = format.short_name()
+ return value
- src_native_type = native_type(format)
- print 'static INLINE void'
- print 'util_format_%s_unpack_%s(%s *dst, const void *src)' % (name, dst_suffix, dst_native_type)
- print '{'
- print ' union util_format_%s pixel;' % format.short_name()
- print ' memcpy(&pixel, src, sizeof pixel);'
- bswap_format(format)
+def generate_unpack_kernel(format, dst_channel, dst_native_type):
+ if not is_format_supported(format):
+ return
+
assert format.layout == PLAIN
- for i in range(4):
- swizzle = format.swizzles[i]
- if swizzle < 4:
- src_channel = format.channels[swizzle]
- value = 'pixel.chan.%s' % src_channel.name
- value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
- elif swizzle == SWIZZLE_0:
- value = '0'
- elif swizzle == SWIZZLE_1:
- value = get_one(dst_channel)
- elif swizzle == SWIZZLE_NONE:
- value = '0'
- else:
- assert False
- if format.colorspace == ZS:
- if i == 3:
- value = get_one(dst_channel)
- elif i >= 1:
- value = 'dst[0]'
- print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i])
+ src_native_type = native_type(format)
- print '}'
- print
+ if format.is_bitmask():
+ depth = format.block_size()
+ print ' uint%u_t value = *(const uint%u_t *)src;' % (depth, depth)
+
+ # Declare the intermediate variables
+ for i in range(format.nr_channels()):
+ src_channel = format.channels[i]
+ if src_channel.type == UNSIGNED:
+ print ' uint%u_t %s;' % (depth, src_channel.name)
+ elif src_channel.type == SIGNED:
+ print ' int%u_t %s;' % (depth, src_channel.name)
+
+ if depth > 8:
+ print '#ifdef PIPE_ARCH_BIG_ENDIAN'
+ print ' value = util_bswap%u(value);' % depth
+ print '#endif'
+
+ # Compute the intermediate unshifted values
+ shift = 0
+ for i in range(format.nr_channels()):
+ src_channel = format.channels[i]
+ value = 'value'
+ if src_channel.type == UNSIGNED:
+ if shift:
+ value = '%s >> %u' % (value, shift)
+ if shift + src_channel.size < depth:
+ value = '(%s) & 0x%x' % (value, (1 << src_channel.size) - 1)
+ elif src_channel.type == SIGNED:
+ if shift + src_channel.size < depth:
+ # Align the sign bit
+ lshift = depth - (shift + src_channel.size)
+ value = '%s << %u' % (value, lshift)
+ # Cast to signed
+ value = '(int%u_t)(%s) ' % (depth, value)
+ if src_channel.size < depth:
+ # Align the LSB bit
+ rshift = depth - src_channel.size
+ value = '(%s) >> %u' % (value, rshift)
+ else:
+ value = None
+
+ if value is not None:
+ print ' %s = %s;' % (src_channel.name, value)
+
+ shift += src_channel.size
+
+ # Convert, swizzle, and store final values
+ for i in range(4):
+ swizzle = format.swizzles[i]
+ if swizzle < 4:
+ src_channel = format.channels[swizzle]
+ src_colorspace = format.colorspace
+ if src_colorspace == SRGB and i == 3:
+ # Alpha channel is linear
+ src_colorspace = RGB
+ value = src_channel.name
+ value = conversion_expr(src_channel,
+ dst_channel, dst_native_type,
+ value,
+ src_colorspace = src_colorspace)
+ elif swizzle == SWIZZLE_0:
+ value = '0'
+ elif swizzle == SWIZZLE_1:
+ value = get_one(dst_channel)
+ elif swizzle == SWIZZLE_NONE:
+ value = '0'
+ else:
+ assert False
+ print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i])
+
+ else:
+ print ' union util_format_%s pixel;' % format.short_name()
+ print ' memcpy(&pixel, src, sizeof pixel);'
+ bswap_format(format)
+
+ for i in range(4):
+ swizzle = format.swizzles[i]
+ if swizzle < 4:
+ src_channel = format.channels[swizzle]
+ src_colorspace = format.colorspace
+ if src_colorspace == SRGB and i == 3:
+ # Alpha channel is linear
+ src_colorspace = RGB
+ value = 'pixel.chan.%s' % src_channel.name
+ value = conversion_expr(src_channel,
+ dst_channel, dst_native_type,
+ value,
+ src_colorspace = src_colorspace)
+ elif swizzle == SWIZZLE_0:
+ value = '0'
+ elif swizzle == SWIZZLE_1:
+ value = get_one(dst_channel)
+ elif swizzle == SWIZZLE_NONE:
+ value = '0'
+ else:
+ assert False
+ print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i])
-def generate_format_pack(format, src_channel, src_native_type, src_suffix):
- '''Generate the function to pack pixels to a particular format'''
-
- name = format.short_name()
+def generate_pack_kernel(format, src_channel, src_native_type):
+ if not is_format_supported(format):
+ return
+
dst_native_type = native_type(format)
- print 'static INLINE void'
- print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type)
- print '{'
- print ' union util_format_%s pixel;' % format.short_name()
-
assert format.layout == PLAIN
inv_swizzle = format.inv_swizzles()
- for i in range(4):
- dst_channel = format.channels[i]
- width = dst_channel.size
- if inv_swizzle[i] is None:
- continue
- value = 'rgba'[inv_swizzle[i]]
- value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
- if format.colorspace == ZS:
- if i == 3:
- value = get_one(dst_channel)
- elif i >= 1:
- value = '0'
- print ' pixel.chan.%s = %s;' % (dst_channel.name, value)
+ if format.is_bitmask():
+ depth = format.block_size()
+ print ' uint%u_t value = 0;' % depth
+
+ shift = 0
+ for i in range(4):
+ dst_channel = format.channels[i]
+ if inv_swizzle[i] is not None:
+ value ='src[%u]' % inv_swizzle[i]
+ dst_colorspace = format.colorspace
+ if dst_colorspace == SRGB and inv_swizzle[i] == 3:
+ # Alpha channel is linear
+ dst_colorspace = RGB
+ value = conversion_expr(src_channel,
+ dst_channel, dst_native_type,
+ value,
+ dst_colorspace = dst_colorspace)
+ if dst_channel.type in (UNSIGNED, SIGNED):
+ if shift + dst_channel.size < depth:
+ value = '(%s) & 0x%x' % (value, (1 << dst_channel.size) - 1)
+ if shift:
+ value = '(%s) << %u' % (value, shift)
+ if dst_channel.type == SIGNED:
+ # Cast to unsigned
+ value = '(uint%u_t)(%s) ' % (depth, value)
+ else:
+ value = None
+ if value is not None:
+ print ' value |= %s;' % (value)
+
+ shift += dst_channel.size
+
+ if depth > 8:
+ print '#ifdef PIPE_ARCH_BIG_ENDIAN'
+ print ' value = util_bswap%u(value);' % depth
+ print '#endif'
+
+ print ' *(uint%u_t *)dst = value;' % depth
+
+ else:
+ print ' union util_format_%s pixel;' % format.short_name()
+
+ for i in range(4):
+ dst_channel = format.channels[i]
+ width = dst_channel.size
+ if inv_swizzle[i] is None:
+ continue
+ dst_colorspace = format.colorspace
+ if dst_colorspace == SRGB and inv_swizzle[i] == 3:
+ # Alpha channel is linear
+ dst_colorspace = RGB
+ value ='src[%u]' % inv_swizzle[i]
+ value = conversion_expr(src_channel,
+ dst_channel, dst_native_type,
+ value,
+ dst_colorspace = dst_colorspace)
+ print ' pixel.chan.%s = %s;' % (dst_channel.name, value)
+
+ bswap_format(format)
+ print ' memcpy(dst, &pixel, sizeof pixel);'
+
+
+def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix):
+ '''Generate the function to unpack pixels from a particular format'''
+
+ name = format.short_name()
+
+ print 'static INLINE void'
+ print 'util_format_%s_unpack_%s(%s *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, dst_suffix, dst_native_type)
+ print '{'
+
+ if is_format_supported(format):
+ print ' unsigned x, y;'
+ print ' for(y = 0; y < height; y += %u) {' % (format.block_height,)
+ print ' %s *dst = dst_row;' % (dst_native_type)
+ print ' const uint8_t *src = src_row;'
+ print ' for(x = 0; x < width; x += %u) {' % (format.block_width,)
+
+ generate_unpack_kernel(format, dst_channel, dst_native_type)
+
+ print ' src += %u;' % (format.block_size() / 8,)
+ print ' dst += 4;'
+ print ' }'
+ print ' src_row += src_stride;'
+ print ' dst_row += dst_stride/sizeof(*dst_row);'
+ print ' }'
- bswap_format(format)
- print ' memcpy(dst, &pixel, sizeof pixel);'
print '}'
print
-def generate_unpack(formats, dst_channel, dst_native_type, dst_suffix):
- '''Generate the dispatch function to unpack pixels from any format'''
+def generate_format_pack(format, src_channel, src_native_type, src_suffix):
+ '''Generate the function to pack pixels to a particular format'''
- for format in formats:
- if is_format_supported(format):
- generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix)
+ name = format.short_name()
print 'static INLINE void'
- print 'util_format_unpack_%s(enum pipe_format format, %s *dst, const void *src)' % (dst_suffix, dst_native_type)
+ print 'util_format_%s_pack_%s(uint8_t *dst_row, unsigned dst_stride, const %s *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, src_suffix, src_native_type)
print '{'
- print ' void (*func)(%s *dst, const void *src);' % dst_native_type
- print ' switch(format) {'
- for format in formats:
- if is_format_supported(format):
- print ' case %s:' % format.name
- print ' func = &util_format_%s_unpack_%s;' % (format.short_name(), dst_suffix)
- print ' break;'
- print ' default:'
- print ' debug_printf("unsupported format\\n");'
- print ' return;'
- print ' }'
- print ' func(dst, src);'
+
+ if is_format_supported(format):
+ print ' unsigned x, y;'
+ print ' for(y = 0; y < height; y += %u) {' % (format.block_height,)
+ print ' const %s *src = src_row;' % (src_native_type)
+ print ' uint8_t *dst = dst_row;'
+ print ' for(x = 0; x < width; x += %u) {' % (format.block_width,)
+
+ generate_pack_kernel(format, src_channel, src_native_type)
+
+ print ' src += 4;'
+ print ' dst += %u;' % (format.block_size() / 8,)
+ print ' }'
+ print ' dst_row += dst_stride;'
+ print ' src_row += src_stride/sizeof(*src_row);'
+ print ' }'
+
print '}'
print
+
+def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix):
+ '''Generate the function to unpack pixels from a particular format'''
-def generate_pack(formats, src_channel, src_native_type, src_suffix):
- '''Generate the dispatch function to pack pixels to any format'''
-
- for format in formats:
- if is_format_supported(format):
- generate_format_pack(format, src_channel, src_native_type, src_suffix)
+ name = format.short_name()
print 'static INLINE void'
- print 'util_format_pack_%s(enum pipe_format format, void *dst, %s r, %s g, %s b, %s a)' % (src_suffix, src_native_type, src_native_type, src_native_type, src_native_type)
+ print 'util_format_%s_fetch_%s(%s *dst, const uint8_t *src, unsigned i, unsigned j)' % (name, dst_suffix, dst_native_type)
print '{'
- print ' void (*func)(void *dst, %s r, %s g, %s b, %s a);' % (src_native_type, src_native_type, src_native_type, src_native_type)
- print ' switch(format) {'
- for format in formats:
- if is_format_supported(format):
- print ' case %s:' % format.name
- print ' func = &util_format_%s_pack_%s;' % (format.short_name(), src_suffix)
- print ' break;'
- print ' default:'
- print ' debug_printf("%s: unsupported format\\n", __FUNCTION__);'
- print ' return;'
- print ' }'
- print ' func(dst, r, g, b, a);'
+
+ if is_format_supported(format):
+ generate_unpack_kernel(format, dst_channel, dst_native_type)
+
print '}'
print
-def main():
- formats = []
- for arg in sys.argv[1:]:
- formats.extend(parse(arg))
+def is_format_hand_written(format):
+ return format.layout in ('s3tc', 'subsampled', 'other') or format.colorspace == ZS
- print '/* This file is autogenerated by u_format_pack.py from u_format.csv. Do not edit directly. */'
- print
- # This will print the copyright message on the top of this file
- print __doc__.strip()
- print
- print '#ifndef U_FORMAT_PACK_H'
- print '#define U_FORMAT_PACK_H'
+def generate(formats):
print
print '#include "pipe/p_compiler.h"'
print '#include "u_math.h"'
+ print '#include "u_half.h"'
print '#include "u_format.h"'
+ print '#include "u_format_other.h"'
+ print '#include "u_format_srgb.h"'
+ print '#include "u_format_yuv.h"'
+ print '#include "u_format_zs.h"'
print
- generate_clamp()
-
for format in formats:
- if format.layout == PLAIN:
- generate_format_type(format)
-
- channel = Channel(FLOAT, False, 32)
- native_type = 'float'
- suffix = '4f'
+ if not is_format_hand_written(format):
+
+ if is_format_supported(format):
+ generate_format_type(format)
- generate_unpack(formats, channel, native_type, suffix)
- generate_pack(formats, channel, native_type, suffix)
+ channel = Channel(FLOAT, False, 32)
+ native_type = 'float'
+ suffix = 'rgba_float'
- channel = Channel(UNSIGNED, True, 8)
- native_type = 'uint8_t'
- suffix = '4ub'
+ generate_format_unpack(format, channel, native_type, suffix)
+ generate_format_pack(format, channel, native_type, suffix)
+ generate_format_fetch(format, channel, native_type, suffix)
- generate_unpack(formats, channel, native_type, suffix)
- generate_pack(formats, channel, native_type, suffix)
-
- print
- print '#ifdef __cplusplus'
- print '}'
- print '#endif'
- print
- print '#endif /* ! U_FORMAT_PACK_H */'
+ channel = Channel(UNSIGNED, True, 8)
+ native_type = 'uint8_t'
+ suffix = 'rgba_8unorm'
+ generate_format_unpack(format, channel, native_type, suffix)
+ generate_format_pack(format, channel, native_type, suffix)
-if __name__ == '__main__':
- main()
diff --git a/src/gallium/auxiliary/util/u_format_parse.py b/src/gallium/auxiliary/util/u_format_parse.py
index 250926418ec..7076c676aaf 100755
--- a/src/gallium/auxiliary/util/u_format_parse.py
+++ b/src/gallium/auxiliary/util/u_format_parse.py
@@ -73,18 +73,22 @@ class Channel:
'''Maximum representable number.'''
if self.type == FLOAT:
return VERY_LARGE
+ if self.type == FIXED:
+ return (1 << (self.size/2)) - 1
if self.norm:
return 1
if self.type == UNSIGNED:
return (1 << self.size) - 1
if self.type == SIGNED:
- return self.size - 1
+ return (1 << (self.size - 1)) - 1
assert False
def min(self):
'''Minimum representable number.'''
if self.type == FLOAT:
return -VERY_LARGE
+ if self.type == FIXED:
+ return -(1 << (self.size/2))
if self.type == UNSIGNED:
return 0
if self.norm:
@@ -134,6 +138,8 @@ class Format:
return nr_channels
def is_array(self):
+ if self.layout != PLAIN:
+ return False
ref_channel = self.channels[0]
for channel in self.channels[1:]:
if channel.size and (channel.size != ref_channel.size or channel.size % 8):
@@ -141,7 +147,11 @@ class Format:
return True
def is_mixed(self):
+ if self.layout != PLAIN:
+ return False
ref_channel = self.channels[0]
+ if ref_channel.type == VOID:
+ ref_channel = self.channels[1]
for channel in self.channels[1:]:
if channel.type != VOID:
if channel.type != ref_channel.type:
@@ -154,29 +164,29 @@ class Format:
return is_pot(self.block_size())
def is_int(self):
+ if self.layout != PLAIN:
+ return False
for channel in self.channels:
if channel.type not in (VOID, UNSIGNED, SIGNED):
return False
return True
def is_float(self):
+ if self.layout != PLAIN:
+ return False
for channel in self.channels:
if channel.type not in (VOID, FLOAT):
return False
return True
def is_bitmask(self):
- if self.block_size() > 32:
+ if self.layout != PLAIN:
return False
- if not self.is_pot():
+ if self.block_size() not in (8, 16, 32):
return False
for channel in self.channels:
- if not is_pot(channel.size):
- return True
if channel.type not in (VOID, UNSIGNED, SIGNED):
return False
- if channel.size >= 32:
- return False
return True
def inv_swizzles(self):
diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c
new file mode 100644
index 00000000000..5b279b8fe26
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -0,0 +1,728 @@
+/**************************************************************************
+ *
+ * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
+ * Copyright (c) 2008 VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "u_dl.h"
+#include "u_math.h"
+#include "u_format.h"
+#include "u_format_s3tc.h"
+
+
+#if defined(_WIN32) || defined(WIN32)
+#define DXTN_LIBNAME "dxtn.dll"
+#elif defined(__APPLE__)
+#define DXTN_LIBNAME "libtxc_dxtn.dylib"
+#else
+#define DXTN_LIBNAME "libtxc_dxtn.so"
+#endif
+
+
+static void
+util_format_dxt1_rgb_fetch_stub(int src_stride,
+ const uint8_t *src,
+ int col, int row,
+ uint8_t *dst)
+{
+ assert(0);
+}
+
+
+static void
+util_format_dxt1_rgba_fetch_stub(int src_stride,
+ const uint8_t *src,
+ int col, int row,
+ uint8_t *dst )
+{
+ assert(0);
+}
+
+
+static void
+util_format_dxt3_rgba_fetch_stub(int src_stride,
+ const uint8_t *src,
+ int col, int row,
+ uint8_t *dst )
+{
+ assert(0);
+}
+
+
+static void
+util_format_dxt5_rgba_fetch_stub(int src_stride,
+ const uint8_t *src,
+ int col, int row,
+ uint8_t *dst )
+{
+ assert(0);
+}
+
+
+static void
+util_format_dxtn_pack_stub(int src_comps,
+ int width, int height,
+ const uint8_t *src,
+ enum util_format_dxtn dst_format,
+ uint8_t *dst,
+ int dst_stride)
+{
+ assert(0);
+}
+
+
+boolean util_format_s3tc_enabled = FALSE;
+
+util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = util_format_dxt1_rgb_fetch_stub;
+util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = util_format_dxt1_rgba_fetch_stub;
+util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = util_format_dxt3_rgba_fetch_stub;
+util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = util_format_dxt5_rgba_fetch_stub;
+
+util_format_dxtn_pack_t util_format_dxtn_pack = util_format_dxtn_pack_stub;
+
+
+void
+util_format_s3tc_init(void)
+{
+ static boolean first_time = TRUE;
+ struct util_dl_library *library = NULL;
+ util_dl_proc fetch_2d_texel_rgb_dxt1;
+ util_dl_proc fetch_2d_texel_rgba_dxt1;
+ util_dl_proc fetch_2d_texel_rgba_dxt3;
+ util_dl_proc fetch_2d_texel_rgba_dxt5;
+ util_dl_proc tx_compress_dxtn;
+
+ if (!first_time)
+ return;
+ first_time = FALSE;
+
+ if (util_format_s3tc_enabled)
+ return;
+
+ library = util_dl_open(DXTN_LIBNAME);
+ if (!library) {
+ debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
+ "compression/decompression unavailable");
+ return;
+ }
+
+ fetch_2d_texel_rgb_dxt1 =
+ util_dl_get_proc_address(library, "fetch_2d_texel_rgb_dxt1");
+ fetch_2d_texel_rgba_dxt1 =
+ util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt1");
+ fetch_2d_texel_rgba_dxt3 =
+ util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt3");
+ fetch_2d_texel_rgba_dxt5 =
+ util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt5");
+ tx_compress_dxtn =
+ util_dl_get_proc_address(library, "tx_compress_dxtn");
+
+ if (!util_format_dxt1_rgb_fetch ||
+ !util_format_dxt1_rgba_fetch ||
+ !util_format_dxt3_rgba_fetch ||
+ !util_format_dxt5_rgba_fetch ||
+ !util_format_dxtn_pack) {
+ debug_printf("couldn't reference all symbols in " DXTN_LIBNAME
+ ", software DXTn compression/decompression "
+ "unavailable");
+ util_dl_close(library);
+ return;
+ }
+
+ util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1;
+ util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1;
+ util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3;
+ util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5;
+ util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn;
+ util_format_s3tc_enabled = TRUE;
+}
+
+
+/*
+ * Pixel fetch.
+ */
+
+void
+util_format_dxt1_rgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt1_rgb_fetch(0, src, i, j, dst);
+}
+
+void
+util_format_dxt1_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt1_rgba_fetch(0, src, i, j, dst);
+}
+
+void
+util_format_dxt3_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt3_rgba_fetch(0, src, i, j, dst);
+}
+
+void
+util_format_dxt5_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt5_rgba_fetch(0, src, i, j, dst);
+}
+
+void
+util_format_dxt1_rgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp[4];
+ util_format_dxt1_rgb_fetch(0, src, i, j, tmp);
+ dst[0] = ubyte_to_float(tmp[0]);
+ dst[1] = ubyte_to_float(tmp[1]);
+ dst[2] = ubyte_to_float(tmp[2]);
+ dst[3] = 1.0;
+}
+
+void
+util_format_dxt1_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp[4];
+ util_format_dxt1_rgba_fetch(0, src, i, j, tmp);
+ dst[0] = ubyte_to_float(tmp[0]);
+ dst[1] = ubyte_to_float(tmp[1]);
+ dst[2] = ubyte_to_float(tmp[2]);
+ dst[3] = ubyte_to_float(tmp[3]);
+}
+
+void
+util_format_dxt3_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp[4];
+ util_format_dxt3_rgba_fetch(0, src, i, j, tmp);
+ dst[0] = ubyte_to_float(tmp[0]);
+ dst[1] = ubyte_to_float(tmp[1]);
+ dst[2] = ubyte_to_float(tmp[2]);
+ dst[3] = ubyte_to_float(tmp[3]);
+}
+
+void
+util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp[4];
+ util_format_dxt5_rgba_fetch(0, src, i, j, tmp);
+ dst[0] = ubyte_to_float(tmp[0]);
+ dst[1] = ubyte_to_float(tmp[1]);
+ dst[2] = ubyte_to_float(tmp[2]);
+ dst[3] = ubyte_to_float(tmp[3]);
+}
+
+
+/*
+ * Block decompression.
+ */
+
+static INLINE void
+util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height,
+ util_format_dxtn_fetch_t fetch,
+ unsigned block_size)
+{
+ const unsigned bw = 4, bh = 4, comps = 4;
+ unsigned x, y, i, j;
+ for(y = 0; y < height; y += bh) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += bw) {
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps;
+ fetch(0, src, i, j, dst);
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt1_rgb_fetch, 8);
+}
+
+void
+util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt1_rgba_fetch, 8);
+}
+
+void
+util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt3_rgba_fetch, 16);
+}
+
+void
+util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt5_rgba_fetch, 16);
+}
+
+static INLINE void
+util_format_dxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height,
+ util_format_dxtn_fetch_t fetch,
+ unsigned block_size)
+{
+ unsigned x, y, i, j;
+ for(y = 0; y < height; y += 4) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ uint8_t tmp[4];
+ fetch(0, src, i, j, tmp);
+ dst[0] = ubyte_to_float(tmp[0]);
+ dst[1] = ubyte_to_float(tmp[1]);
+ dst[2] = ubyte_to_float(tmp[2]);
+ dst[3] = ubyte_to_float(tmp[3]);
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt1_rgb_fetch, 8);
+}
+
+void
+util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt1_rgba_fetch, 8);
+}
+
+void
+util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt3_rgba_fetch, 16);
+}
+
+void
+util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt5_rgba_fetch, 16);
+}
+
+
+/*
+ * Block compression.
+ */
+
+void
+util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 8;
+ unsigned x, y, i, j, k;
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][3]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < 3; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*4 + k];
+ }
+ }
+ }
+ util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 8;
+ unsigned x, y, i, j, k;
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < comps; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
+ }
+ }
+ }
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16;
+ unsigned x, y, i, j, k;
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < comps; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
+ }
+ }
+ }
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16;
+ unsigned x, y, i, j, k;
+
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < comps; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
+ }
+ }
+ }
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y, i, j, k;
+ for(y = 0; y < height; y += 4) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 4) {
+ uint8_t tmp[4][4][3];
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ for(k = 0; k < 3; ++k) {
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
+ }
+ }
+ }
+ util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0);
+ dst += 8;
+ }
+ dst_row += 4*dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y, i, j, k;
+ for(y = 0; y < height; y += 4) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 4) {
+ uint8_t tmp[4][4][4];
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ for(k = 0; k < 4; ++k) {
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
+ }
+ }
+ }
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0);
+ dst += 8;
+ }
+ dst_row += 4*dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y, i, j, k;
+ for(y = 0; y < height; y += 4) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 4) {
+ uint8_t tmp[4][4][4];
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ for(k = 0; k < 4; ++k) {
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
+ }
+ }
+ }
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0);
+ dst += 16;
+ }
+ dst_row += 4*dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y, i, j, k;
+ for(y = 0; y < height; y += 4) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 4) {
+ uint8_t tmp[4][4][4];
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ for(k = 0; k < 4; ++k) {
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
+ }
+ }
+ }
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0);
+ dst += 16;
+ }
+ dst_row += 4*dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+/*
+ * SRGB variants.
+ *
+ * FIXME: shunts to RGB for now
+ */
+
+void
+util_format_dxt1_srgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt1_rgb_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt1_srgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt1_rgb_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt1_srgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt1_rgb_fetch_rgba_8unorm(dst, src, i, j);
+}
+
+void
+util_format_dxt1_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt1_rgba_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt1_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt1_rgba_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt1_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt1_rgba_fetch_rgba_8unorm(dst, src, i, j);
+}
+
+void
+util_format_dxt3_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt3_rgba_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt3_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt3_rgba_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt3_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt3_rgba_fetch_rgba_8unorm(dst, src, i, j);
+}
+
+void
+util_format_dxt5_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt5_rgba_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt5_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt5_rgba_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt5_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt5_rgba_fetch_rgba_8unorm(dst, src, i, j);
+}
+
+void
+util_format_dxt1_srgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt1_rgb_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt1_srgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt1_rgb_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt1_srgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt1_rgb_fetch_rgba_float(dst, src, i, j);
+}
+
+void
+util_format_dxt1_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt1_rgba_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt1_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt1_rgba_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt1_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt1_rgba_fetch_rgba_float(dst, src, i, j);
+}
+
+void
+util_format_dxt3_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt3_rgba_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt3_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt3_rgba_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt3_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt3_rgba_fetch_rgba_float(dst, src, i, j);
+}
+
+void
+util_format_dxt5_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt5_rgba_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt5_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_dxt5_rgba_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
+}
+
+void
+util_format_dxt5_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ util_format_dxt5_rgba_fetch_rgba_float(dst, src, i, j);
+}
+
diff --git a/src/gallium/auxiliary/util/u_format_s3tc.h b/src/gallium/auxiliary/util/u_format_s3tc.h
new file mode 100644
index 00000000000..97770abd422
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_s3tc.h
@@ -0,0 +1,218 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef U_FORMAT_S3TC_H_
+#define U_FORMAT_S3TC_H_
+
+
+#include "pipe/p_compiler.h"
+
+
+enum util_format_dxtn {
+ UTIL_FORMAT_DXT1_RGB = 0x83F0,
+ UTIL_FORMAT_DXT1_RGBA = 0x83F1,
+ UTIL_FORMAT_DXT3_RGBA = 0x83F2,
+ UTIL_FORMAT_DXT5_RGBA = 0x83F3
+};
+
+
+typedef void
+(*util_format_dxtn_fetch_t)( int src_stride,
+ const uint8_t *src,
+ int col, int row,
+ uint8_t *dst );
+
+typedef void
+(*util_format_dxtn_pack_t)( int src_comps,
+ int width, int height,
+ const uint8_t *src,
+ enum util_format_dxtn dst_format,
+ uint8_t *dst,
+ int dst_stride);
+
+extern boolean util_format_s3tc_enabled;
+
+extern util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch;
+extern util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch;
+extern util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch;
+extern util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch;
+
+extern util_format_dxtn_pack_t util_format_dxtn_pack;
+
+
+void
+util_format_s3tc_init(void);
+
+
+void
+util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_rgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt3_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt5_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt1_srgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_srgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_srgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt1_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt3_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt3_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt3_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt5_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt5_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt5_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+void
+util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_rgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt3_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt1_srgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_srgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_srgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt1_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt1_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt3_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt3_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt3_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_dxt5_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt5_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_dxt5_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+#endif /* U_FORMAT_S3TC_H_ */
diff --git a/src/gallium/auxiliary/util/u_format_srgb.h b/src/gallium/auxiliary/util/u_format_srgb.h
new file mode 100644
index 00000000000..43213fbeb3e
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_srgb.h
@@ -0,0 +1,106 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * SRGB translation.
+ *
+ * @author Brian Paul <[email protected]>
+ * @author Michal Krol <[email protected]>
+ * @author Jose Fonseca <[email protected]>
+ */
+
+#ifndef U_FORMAT_SRGB_H_
+#define U_FORMAT_SRGB_H_
+
+
+#include "pipe/p_compiler.h"
+#include "u_math.h"
+
+
+extern const float
+util_format_srgb_8unorm_to_linear_float_table[256];
+
+extern const uint8_t
+util_format_srgb_to_linear_8unorm_table[256];
+
+extern const uint8_t
+util_format_linear_to_srgb_8unorm_table[256];
+
+
+/**
+ * Convert a unclamped linear float to srgb value in the [0,255].
+ * XXX this hasn't been tested (render to srgb surface).
+ * XXX this needs optimization.
+ */
+static INLINE uint8_t
+util_format_linear_float_to_srgb_8unorm(float x)
+{
+ if (x >= 1.0f)
+ return 255;
+ else if (x >= 0.0031308f)
+ return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f);
+ else if (x > 0.0f)
+ return float_to_ubyte(12.92f * x);
+ else
+ return 0;
+}
+
+
+/**
+ * Convert an 8-bit sRGB value from non-linear space to a
+ * linear RGB value in [0, 1].
+ * Implemented with a 256-entry lookup table.
+ */
+static INLINE float
+util_format_srgb_8unorm_to_linear_float(uint8_t x)
+{
+ return util_format_srgb_8unorm_to_linear_float_table[x];
+}
+
+
+/**
+ * Convert a 8bit normalized value from linear to srgb.
+ */
+static INLINE uint8_t
+util_format_linear_to_srgb_8unorm(uint8_t x)
+{
+ return util_format_linear_to_srgb_8unorm_table[x];
+}
+
+
+/**
+ * Convert a 8bit normalized value from srgb to linear.
+ */
+static INLINE uint8_t
+util_format_srgb_to_linear_8unorm(uint8_t x)
+{
+ return util_format_srgb_to_linear_8unorm_table[x];
+}
+
+
+#endif /* U_FORMAT_SRGB_H_ */
diff --git a/src/gallium/auxiliary/util/u_format_srgb.py b/src/gallium/auxiliary/util/u_format_srgb.py
new file mode 100644
index 00000000000..a4c76dc00b3
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_srgb.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+'''
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * SRGB translation.
+ *
+ * @author Brian Paul <[email protected]>
+ * @author Michal Krol <[email protected]>
+ * @author Jose Fonseca <[email protected]>
+ */
+'''
+
+
+import sys
+import math
+
+
+def srgb_to_linear(x):
+ if x <= 0.04045:
+ return x / 12.92
+ else:
+ return math.pow((x + 0.055) / 1.055, 2.4)
+
+
+def linear_to_srgb(x):
+ if x >= 0.0031308:
+ return 1.055 * math.pow(x, 0.41666) - 0.055
+ else:
+ return 12.92 * x
+
+def generate_srgb_tables():
+ print 'const float'
+ print 'util_format_srgb_8unorm_to_linear_float_table[256] = {'
+ for j in range(0, 256, 4):
+ print ' ',
+ for i in range(j, j + 4):
+ print '%.7e,' % (srgb_to_linear(i / 255.0),),
+ print
+ print '};'
+ print
+ print 'const uint8_t'
+ print 'util_format_srgb_to_linear_8unorm_table[256] = {'
+ for j in range(0, 256, 16):
+ print ' ',
+ for i in range(j, j + 16):
+ print '%3u,' % (int(srgb_to_linear(i / 255.0) * 255.0 + 0.5),),
+ print
+ print '};'
+ print
+ print 'const uint8_t'
+ print 'util_format_linear_to_srgb_8unorm_table[256] = {'
+ for j in range(0, 256, 16):
+ print ' ',
+ for i in range(j, j + 16):
+ print '%3u,' % (int(linear_to_srgb(i / 255.0) * 255.0 + 0.5),),
+ print
+ print '};'
+ print
+
+
+def main():
+ print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */'
+ print
+ # This will print the copyright message on the top of this file
+ print __doc__.strip()
+ print
+ print '#include "u_format_srgb.h"'
+ print
+ generate_srgb_tables()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index 4e29d15f3bb..ae9a5981973 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -3,7 +3,7 @@
'''
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -33,6 +33,7 @@
import sys
from u_format_parse import *
+import u_format_pack
def layout_map(layout):
@@ -85,30 +86,22 @@ def write_format_table(formats):
print __doc__.strip()
print
print '#include "u_format.h"'
+ print '#include "u_format_s3tc.h"'
print
- print 'const struct util_format_description'
- print 'util_format_none_description = {'
- print " PIPE_FORMAT_NONE,"
- print " \"PIPE_FORMAT_NONE\","
- print " {0, 0, 0},"
- print " 0,"
- print " 0,"
- print " 0,"
- print " 0,"
- print " {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}},"
- print " {0, 0, 0, 0},"
- print " 0"
- print "};"
- print
+
+ u_format_pack.generate(formats)
+
for format in formats:
print 'const struct util_format_description'
print 'util_format_%s_description = {' % (format.short_name(),)
print " %s," % (format.name,)
print " \"%s\"," % (format.name,)
+ print " \"%s\"," % (format.short_name(),)
print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size())
print " %s," % (layout_map(format.layout),)
print " %u,\t/* nr_channels */" % (format.nr_channels(),)
print " %s,\t/* is_array */" % (bool_map(format.is_array()),)
+ print " %s,\t/* is_bitmask */" % (bool_map(format.is_bitmask()),)
print " %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),)
print " {"
for i in range(4):
@@ -136,8 +129,37 @@ def write_format_table(formats):
print " %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment)
print " },"
print " %s," % (colorspace_map(format.colorspace),)
+ if format.colorspace != ZS:
+ print " &util_format_%s_unpack_rgba_8unorm," % format.short_name()
+ print " &util_format_%s_pack_rgba_8unorm," % format.short_name()
+ print " &util_format_%s_unpack_rgba_float," % format.short_name()
+ print " &util_format_%s_pack_rgba_float," % format.short_name()
+ print " &util_format_%s_fetch_rgba_float," % format.short_name()
+ else:
+ print " NULL, /* unpack_rgba_8unorm */"
+ print " NULL, /* pack_rgba_8unorm */"
+ print " NULL, /* unpack_rgba_float */"
+ print " NULL, /* pack_rgba_float */"
+ print " NULL, /* fetch_rgba_float */"
+ if format.colorspace == ZS and format.swizzles[0] != SWIZZLE_NONE:
+ print " &util_format_%s_unpack_z_32unorm," % format.short_name()
+ print " &util_format_%s_pack_z_32unorm," % format.short_name()
+ print " &util_format_%s_unpack_z_float," % format.short_name()
+ print " &util_format_%s_pack_z_float," % format.short_name()
+ else:
+ print " NULL, /* unpack_z_32unorm */"
+ print " NULL, /* pack_z_32unorm */"
+ print " NULL, /* unpack_z_float */"
+ print " NULL, /* pack_z_float */"
+ if format.colorspace == ZS and format.swizzles[1] != SWIZZLE_NONE:
+ print " &util_format_%s_unpack_s_8uscaled," % format.short_name()
+ print " &util_format_%s_pack_s_8uscaled" % format.short_name()
+ else:
+ print " NULL, /* unpack_s_8uscaled */"
+ print " NULL /* pack_s_8uscaled */"
print "};"
print
+
print "const struct util_format_description *"
print "util_format_description(enum pipe_format format)"
print "{"
@@ -146,13 +168,10 @@ def write_format_table(formats):
print " }"
print
print " switch (format) {"
- print " case PIPE_FORMAT_NONE:"
- print " return &util_format_none_description;"
for format in formats:
print " case %s:" % format.name
print " return &util_format_%s_description;" % (format.short_name(),)
print " default:"
- print " assert(0);"
print " return NULL;"
print " }"
print "}"
diff --git a/src/gallium/auxiliary/util/u_format_tests.c b/src/gallium/auxiliary/util/u_format_tests.c
new file mode 100644
index 00000000000..b1df6c49c42
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_tests.c
@@ -0,0 +1,970 @@
+/**************************************************************************
+ *
+ * Copyright 2009-2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "u_memory.h"
+#include "u_format_tests.h"
+
+
+/*
+ * Helper macros to create the packed bytes for longer words.
+ */
+
+#define PACKED_1x8(x) {x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_2x8(x, y) {x, y, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_3x8(x, y, z) {x, y, z, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_4x8(x, y, z, w) {x, y, z, w, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_8x8(a, b, c, d, e, f, g, h) {a, b, c, d, e, f, g, h, 0, 0, 0, 0, 0, 0, 0, 0}
+
+#define PACKED_1x16(x) {(x) & 0xff, (x) >> 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_2x16(x, y) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_3x16(x, y, z) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, (z) & 0xff, (z) >> 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_4x16(x, y, z, w) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, (z) & 0xff, (z) >> 8, (w) & 0xff, (w) >> 8, 0, 0, 0, 0, 0, 0, 0, 0}
+
+#define PACKED_1x32(x) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_2x32(x, y) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, 0, 0, 0, 0, 0, 0, 0, 0}
+#define PACKED_3x32(x, y, z) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, (z) & 0xff, ((z) >> 8) & 0xff, ((z) >> 16) & 0xff, (z) >> 24, 0, 0, 0, 0}
+#define PACKED_4x32(x, y, z, w) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, (z) & 0xff, ((z) >> 8) & 0xff, ((z) >> 16) & 0xff, (z) >> 24, (w) & 0xff, ((w) >> 8) & 0xff, ((w) >> 16) & 0xff, (w) >> 24}
+
+#define UNPACKED_1x1(r, g, b, a) \
+ {{{r, g, b, a}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \
+ {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \
+ {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \
+ {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}}
+
+#define UNPACKED_2x1(r0, g0, b0, a0, r1, g1, b1, a1) \
+ {{{r0, g0, b0, a0}, {r1, g1, b1, a1}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \
+ {{ 0, 0, 0, 0}, { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \
+ {{ 0, 0, 0, 0}, { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \
+ {{ 0, 0, 0, 0}, { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}}
+
+
+/**
+ * Test cases.
+ *
+ * These were manually entered. We could generate these
+ *
+ * To keep this to a we cover only the corner cases, which should produce
+ * good enough coverage since that pixel format transformations are afine for
+ * non SRGB formats.
+ */
+const struct util_format_test_case
+util_format_test_cases[] =
+{
+
+ /*
+ * 32-bit rendertarget formats
+ */
+
+ {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x3ff00000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x3ff00000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ /*
+ * 16-bit rendertarget formats
+ */
+
+ {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x001f), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x03e0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x7c00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x7fff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x001f), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x03e0), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x7c00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x000f), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x00f0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x0f00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x0fff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x000f), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x00f0), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0f00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x001f), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x07e0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf800), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ /*
+ * Luminance/intensity/alpha formats
+ */
+
+ {PIPE_FORMAT_L8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_L8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_A8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_I8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_I8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_L4A4_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_L4A4_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x0f), UNPACKED_1x1(1.0, 1.0, 1.0, 0.0)},
+ {PIPE_FORMAT_L4A4_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xf0), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_L4A4_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x00ff), UNPACKED_1x1(1.0, 1.0, 1.0, 0.0)},
+ {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xff00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_L16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_L16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ /*
+ * SRGB formats
+ */
+
+ {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xbc), UNPACKED_1x1(0.502886458033, 0.502886458033, 0.502886458033, 1.0)},
+ {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x00bc), UNPACKED_1x1(0.502886458033, 0.502886458033, 0.502886458033, 0.0)},
+ {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x00ff), UNPACKED_1x1(1.0, 1.0, 1.0, 0.0)},
+ {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0xcc00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)},
+ {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0xff00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xbc, 0x00, 0x00), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xbc, 0x00), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xbc), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xbc, 0x00, 0x00, 0x00), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xbc, 0x00, 0x00), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xbc, 0x00), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xcc), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)},
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000bc), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xcc000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)},
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000bc), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000cc), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)},
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xbc000000), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xbc000000), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000cc), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)},
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xbc000000), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xbc000000), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ /*
+ * Mixed-signed formats
+ */
+
+ {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x81, 0x00, 0x00, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x81, 0x00, 0x00), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000001ff), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000201), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0007fc00), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00080400), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x1ff00000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x20100000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)},
+ {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x000f), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0011), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x01e0), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0220), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0xfc00), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x81, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x81), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+
+ /*
+ * Depth-stencil formats
+ */
+
+ {PIPE_FORMAT_S8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_S8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(0.0, 255.0, 0.0, 0.0)},
+
+ {PIPE_FORMAT_Z16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_Z16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+
+ {PIPE_FORMAT_Z32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_Z32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+
+ {PIPE_FORMAT_Z32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_Z32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x3f800000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+
+ {PIPE_FORMAT_Z24_UNORM_S8_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_Z24_UNORM_S8_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ffffff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_Z24_UNORM_S8_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 255.0, 0.0, 0.0)},
+ {PIPE_FORMAT_Z24_UNORM_S8_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 255.0, 0.0, 0.0)},
+
+ {PIPE_FORMAT_S8_USCALED_Z24_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_S8_USCALED_Z24_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffff00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_S8_USCALED_Z24_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 255.0, 0.0, 0.0)},
+ {PIPE_FORMAT_S8_USCALED_Z24_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 255.0, 0.0, 0.0)},
+
+ {PIPE_FORMAT_Z24X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_Z24X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ffffff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+
+ {PIPE_FORMAT_X8Z24_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_X8Z24_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffff00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+
+ {PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, PACKED_2x32(0xffffffff, 0x000000ff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, PACKED_2x32(0xffffffff, 0x000000ff), PACKED_2x32(0x3f800000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, PACKED_2x32(0xffffffff, 0x000000ff), PACKED_2x32(0x00000000, 0x000000ff), UNPACKED_1x1( 0.0, 255.0, 0.0, 0.0)},
+
+ /*
+ * YUV formats
+ */
+
+ {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_2x1(1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_2x1(0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_2x1(0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_2x1(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_2x1(0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_2x1(1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_2x1(0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_2x1(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0)},
+
+ /*
+ * TODO: Exercise the UV channels as well.
+ */
+ {PIPE_FORMAT_UYVY, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0x10, 0x80, 0x10), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_UYVY, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0xeb, 0x80, 0x10), UNPACKED_2x1(1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_UYVY, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0x10, 0x80, 0xeb), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_YUYV, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x10, 0x80, 0x10, 0x80), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_YUYV, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xeb, 0x80, 0x10, 0x80), UNPACKED_2x1(1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_YUYV, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x10, 0x80, 0xeb, 0x80), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0)},
+
+ /*
+ * Compressed formats
+ */
+
+ {
+ PIPE_FORMAT_DXT1_RGB,
+ PACKED_8x8(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff),
+ PACKED_8x8(0xf2, 0xd7, 0xb0, 0x20, 0xae, 0x2c, 0x6f, 0x97),
+ {
+ {
+ {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0},
+ {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0},
+ {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0},
+ {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0}
+ },
+ {
+ {0xd6/255.0, 0xff/255.0, 0x94/255.0, 0xff/255.0},
+ {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0},
+ {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0},
+ {0xd6/255.0, 0xff/255.0, 0x94/255.0, 0xff/255.0}
+ },
+ {
+ {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0},
+ {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0},
+ {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0},
+ {0x21/255.0, 0x14/255.0, 0x84/255.0, 0xff/255.0}
+ },
+ {
+ {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0},
+ {0x21/255.0, 0x14/255.0, 0x84/255.0, 0xff/255.0},
+ {0x21/255.0, 0x14/255.0, 0x84/255.0, 0xff/255.0},
+ {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0}
+ }
+ }
+ },
+ {
+ PIPE_FORMAT_DXT1_RGBA,
+ PACKED_8x8(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff),
+ PACKED_8x8(0xff, 0x2f, 0xa4, 0x72, 0xeb, 0xb2, 0xbd, 0xbe),
+ {
+ {
+ {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0},
+ {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0},
+ {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0},
+ {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0}
+ },
+ {
+ {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0},
+ {0x29/255.0, 0xff/255.0, 0xff/255.0, 0xff/255.0},
+ {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0},
+ {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0}
+ },
+ {
+ {0x73/255.0, 0x55/255.0, 0x21/255.0, 0xff/255.0},
+ {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0},
+ {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0},
+ {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0}
+ },
+ {
+ {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0},
+ {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0},
+ {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0},
+ {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0}
+ }
+ }
+ },
+ {
+ PIPE_FORMAT_DXT3_RGBA,
+ {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ {0xe7, 0x4a, 0x8f, 0x96, 0x5b, 0xc1, 0x1c, 0x84, 0xf6, 0x8f, 0xab, 0x32, 0x2a, 0x9a, 0x95, 0x5a},
+ {
+ {
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x77/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xee/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xaa/255.0},
+ {0x8c/255.0, 0xff/255.0, 0xb5/255.0, 0x44/255.0}
+ },
+ {
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xff/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x88/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x66/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x99/255.0}
+ },
+ {
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0xbb/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x55/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x11/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xcc/255.0}
+ },
+ {
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xcc/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x11/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x44/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x88/255.0}
+ }
+ }
+ },
+ {
+ PIPE_FORMAT_DXT5_RGBA,
+ {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ {0xf8, 0x11, 0xc5, 0x0c, 0x9a, 0x73, 0xb4, 0x9c, 0xf6, 0x8f, 0xab, 0x32, 0x2a, 0x9a, 0x95, 0x5a},
+ {
+ {
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x74/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xf8/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xb6/255.0},
+ {0x8c/255.0, 0xff/255.0, 0xb5/255.0, 0x53/255.0}
+ },
+ {
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xf8/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x95/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x53/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x95/255.0}
+ },
+ {
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0xb6/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x53/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x11/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xd7/255.0}
+ },
+ {
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xb6/255.0},
+ {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x11/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x32/255.0},
+ {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x95/255.0}
+ }
+ }
+ },
+
+
+ /*
+ * Standard 8-bit integer formats
+ */
+
+ {PIPE_FORMAT_R8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0xff), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(255.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0x00), UNPACKED_1x1(255.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0xff), UNPACKED_1x1( 0.0, 255.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0xff), UNPACKED_1x1(255.0, 255.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), UNPACKED_1x1(255.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), UNPACKED_1x1( 0.0, 255.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), UNPACKED_1x1( 0.0, 0.0, 255.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), UNPACKED_1x1(255.0, 255.0, 255.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_1x1(255.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_1x1( 0.0, 255.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_1x1( 0.0, 0.0, 255.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_1x1( 0.0, 0.0, 0.0, 255.0)},
+ {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_1x1(255.0, 255.0, 255.0, 255.0)},
+
+ {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x7f), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x81), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x81, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x81), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x7f, 0x00, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x81, 0x00, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x7f, 0x00), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x81, 0x00), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x7f), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x81), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x81, 0x00, 0x00, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x81, 0x00, 0x00), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x7f, 0x00), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x81, 0x00), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x7f), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x81), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)},
+
+ {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x7f), UNPACKED_1x1( 127.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x80), UNPACKED_1x1(-128.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), UNPACKED_1x1( 127.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x80, 0x00), UNPACKED_1x1(-128.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), UNPACKED_1x1( 0.0, 127.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x80), UNPACKED_1x1( 0.0, -128.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x7f, 0x00, 0x00), UNPACKED_1x1( 127.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x80, 0x00, 0x00), UNPACKED_1x1(-128.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x7f, 0x00), UNPACKED_1x1( 0.0, 127.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x80, 0x00), UNPACKED_1x1( 0.0, -128.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x7f), UNPACKED_1x1( 0.0, 0.0, 127.0, 1.0)},
+ {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x80), UNPACKED_1x1( 0.0, 0.0, -128.0, 1.0)},
+
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), UNPACKED_1x1( 127.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0x00, 0x00, 0x00), UNPACKED_1x1(-128.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), UNPACKED_1x1( 0.0, 127.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x80, 0x00, 0x00), UNPACKED_1x1( 0.0, -128.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x7f, 0x00), UNPACKED_1x1( 0.0, 0.0, 127.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x80, 0x00), UNPACKED_1x1( 0.0, 0.0, -128.0, 0.0)},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x7f), UNPACKED_1x1( 0.0, 0.0, 0.0, 127.0)},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x80), UNPACKED_1x1( 0.0, 0.0, 0.0, -128.0)},
+
+ /*
+ * Standard 16-bit integer formats
+ */
+
+ {PIPE_FORMAT_R16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0x0000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xffff), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0xffff), UNPACKED_1x1(1.0, 1.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0x0000, 0x0000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xffff, 0x0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xffff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0xffff, 0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xffff, 0x0000, 0x0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xffff, 0x0000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xffff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R16_USCALED, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16_USCALED, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(65535.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0x0000), UNPACKED_1x1(65535.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xffff), UNPACKED_1x1( 0.0, 65535.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0xffff), UNPACKED_1x1(65535.0, 65535.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0x0000, 0x0000), UNPACKED_1x1(65535.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xffff, 0x0000), UNPACKED_1x1( 0.0, 65535.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xffff), UNPACKED_1x1( 0.0, 0.0, 65535.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0xffff, 0xffff), UNPACKED_1x1(65535.0, 65535.0, 65535.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(65535.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xffff, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 65535.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xffff, 0x0000), UNPACKED_1x1( 0.0, 0.0, 65535.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xffff), UNPACKED_1x1( 0.0, 0.0, 0.0, 65535.0)},
+ {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), UNPACKED_1x1(65535.0, 65535.0, 65535.0, 65535.0)},
+
+ {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8001), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x7fff, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x8001, 0x0000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x7fff), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x8001), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x7fff, 0x0000, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x8001, 0x0000, 0x0000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x7fff, 0x0000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x8001, 0x0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x7fff), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x8001), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x7fff, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x8001, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( -1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x7fff, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x8001, 0x0000, 0x0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x7fff, 0x0000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x8001, 0x0000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x7fff), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x8001), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)},
+
+ {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), UNPACKED_1x1( 32767.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x8000), UNPACKED_1x1(-32768.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x7fff, 0x0000), UNPACKED_1x1( 32767.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x8000, 0x0000), UNPACKED_1x1(-32768.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x7fff), UNPACKED_1x1( 0.0, 32767.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x8000), UNPACKED_1x1( 0.0, -32768.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x7fff, 0x0000, 0x0000), UNPACKED_1x1( 32767.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x8000, 0x0000, 0x0000), UNPACKED_1x1(-32768.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x7fff, 0x0000), UNPACKED_1x1( 0.0, 32767.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x8000, 0x0000), UNPACKED_1x1( 0.0, -32768.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x7fff), UNPACKED_1x1( 0.0, 0.0, 32767.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x8000), UNPACKED_1x1( 0.0, 0.0, -32768.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x7fff, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 32767.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x8000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(-32768.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x7fff, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 32767.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x8000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, -32768.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x7fff, 0x0000), UNPACKED_1x1( 0.0, 0.0, 32767.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x8000, 0x0000), UNPACKED_1x1( 0.0, 0.0, -32768.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x7fff), UNPACKED_1x1( 0.0, 0.0, 0.0, 32767.0)},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x8000), UNPACKED_1x1( 0.0, 0.0, 0.0, -32768.0)},
+
+ /*
+ * Standard 32-bit integer formats
+ *
+ * NOTE: We can't accurately represent integers larger than +/-0x1000000
+ * with single precision floats, so that's as far as we test.
+ */
+
+ {PIPE_FORMAT_R32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffffffff, 0x00000000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xffffffff), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffffffff, 0xffffffff), UNPACKED_1x1(1.0, 1.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffffffff, 0x00000000, 0x00000000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xffffffff, 0x00000000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xffffffff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffffffff, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xffffffff, 0x00000000, 0x00000000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xffffffff, 0x00000000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xffffffff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R32_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x01000000), UNPACKED_1x1(16777216.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x00000000), UNPACKED_1x1(16777216.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x01000000), UNPACKED_1x1(16777216.0, 16777216.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x00000000, 0x00000000), UNPACKED_1x1(16777216.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x01000000, 0x00000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 0.0, 16777216.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x01000000, 0x01000000), UNPACKED_1x1(16777216.0, 16777216.0, 16777216.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(16777216.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x01000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x01000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 16777216.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 16777216.0)},
+ {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x01000000, 0x01000000, 0x01000000), UNPACKED_1x1(16777216.0, 16777216.0, 16777216.0, 16777216.0)},
+
+ {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x7fffffff), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x80000001), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x7fffffff, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x80000001, 0x00000000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x7fffffff), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x80000001), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x7fffffff, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x80000001, 0x00000000, 0x00000000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x7fffffff, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x80000001, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x7fffffff), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x80000001), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x7fffffff, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x80000001, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( -1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x7fffffff, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x80000001, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x7fffffff, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x80000001, 0x00000000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x7fffffff), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x80000001), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)},
+
+ {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x01000000), UNPACKED_1x1( 16777216.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(-16777216.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x00000000), UNPACKED_1x1( 16777216.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xff000000, 0x00000000), UNPACKED_1x1(-16777216.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xff000000), UNPACKED_1x1( 0.0, -16777216.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x00000000, 0x00000000), UNPACKED_1x1( 16777216.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xff000000, 0x00000000, 0x00000000), UNPACKED_1x1(-16777216.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x01000000, 0x00000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xff000000, 0x00000000), UNPACKED_1x1( 0.0, -16777216.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 0.0, 16777216.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xff000000), UNPACKED_1x1( 0.0, 0.0, -16777216.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 16777216.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xff000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(-16777216.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x01000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xff000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, -16777216.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x01000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 16777216.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xff000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, -16777216.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 16777216.0)},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xff000000), UNPACKED_1x1( 0.0, 0.0, 0.0, -16777216.0)},
+
+ /*
+ * Standard 32-bit float formats
+ */
+
+ {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x3f800000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0xbf800000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x3f800000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xbf800000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x3f800000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xbf800000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x3f800000, 0x3f800000), UNPACKED_1x1( 1.0, 1.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x3f800000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xbf800000, 0x00000000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x3f800000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xbf800000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x3f800000), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xbf800000), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x3f800000, 0x3f800000, 0x3f800000), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x3f800000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xbf800000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x3f800000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xbf800000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x3f800000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xbf800000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x3f800000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xbf800000), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)},
+
+ /*
+ * Half float formats
+ */
+
+ {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x3c00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xbc00), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x3c00, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xbc00, 0x0000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x3c00), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xbc00), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x3c00, 0x3c00), UNPACKED_1x1( 1.0, 1.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x3c00, 0x0000, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xbc00, 0x0000, 0x0000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x3c00, 0x0000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xbc00, 0x0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x3c00), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xbc00), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x3c00, 0x3c00, 0x3c00), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x3c00, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xbc00, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x3c00, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xbc00, 0x0000, 0x0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x3c00, 0x0000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xbc00, 0x0000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)},
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x3c00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xbc00), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)},
+ {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x3c00, 0x3c00, 0x3c00, 0x3c00), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)},
+
+ /*
+ * 32-bit fixed point formats
+ */
+
+ {PIPE_FORMAT_R32_FIXED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_FIXED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00010000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32_FIXED, PACKED_1x32(0xffffffff), PACKED_1x32(0xffff0000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00010000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffff0000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00010000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xffff0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00010000, 0x00010000), UNPACKED_1x1( 1.0, 1.0, 0.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00010000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffff0000, 0x00000000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00010000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xffff0000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00010000), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xffff0000), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00010000, 0x00010000, 0x00010000), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00010000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffff0000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00010000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xffff0000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00010000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xffff0000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00010000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xffff0000), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00010000, 0x00010000, 0x00010000, 0x00010000), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)},
+
+ /*
+ * D3D9 specific vertex formats
+ */
+
+ {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(1023.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1( 0.0, 1023.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3ff00000), UNPACKED_1x1( 0.0, 0.0, 1023.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3fffffff), UNPACKED_1x1(1023.0, 1023.0, 1023.0, 1.0)},
+
+ {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000001ff), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00000201), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x0007fc00), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00080400), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x1ff00000), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x20100000), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)},
+
+ /*
+ * Special formats that not fit anywhere else
+ */
+
+};
+
+
+const unsigned util_format_nr_test_cases = Elements(util_format_test_cases);
diff --git a/src/gallium/auxiliary/util/u_format_tests.h b/src/gallium/auxiliary/util/u_format_tests.h
new file mode 100644
index 00000000000..f59563f4f4f
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_tests.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef U_FORMAT_TESTS_H_
+#define U_FORMAT_TESTS_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+
+
+#define UTIL_FORMAT_MAX_PACKED_BYTES 16
+#define UTIL_FORMAT_MAX_UNPACKED_WIDTH 4
+#define UTIL_FORMAT_MAX_UNPACKED_HEIGHT 4
+
+
+/**
+ * A (packed, unpacked) color pair.
+ */
+struct util_format_test_case
+{
+ enum pipe_format format;
+
+ /**
+ * Mask of the bits that actually meaningful data. Used to mask out the
+ * "X" channels.
+ */
+ uint8_t mask[UTIL_FORMAT_MAX_PACKED_BYTES];
+
+ uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES];
+
+ /**
+ * RGBA.
+ */
+ double unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4];
+};
+
+
+extern const struct util_format_test_case
+util_format_test_cases[];
+
+
+extern const unsigned util_format_nr_test_cases;
+
+
+#endif /* U_FORMAT_TESTS_H_ */
diff --git a/src/gallium/auxiliary/util/u_format_yuv.c b/src/gallium/auxiliary/util/u_format_yuv.c
new file mode 100644
index 00000000000..64ea0b35347
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_yuv.c
@@ -0,0 +1,1182 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * YUV and RGB subsampled formats conversion.
+ *
+ * @author Jose Fonseca <[email protected]>
+ */
+
+
+#include "util/u_format_yuv.h"
+
+
+void
+util_format_r8g8_b8g8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ float *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ uint32_t value;
+ float r, g0, g1, b;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ value = *src++;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ r = ubyte_to_float((value >> 0) & 0xff);
+ g0 = ubyte_to_float((value >> 8) & 0xff);
+ b = ubyte_to_float((value >> 16) & 0xff);
+ g1 = ubyte_to_float((value >> 24) & 0xff);
+
+ dst[0] = r; /* r */
+ dst[1] = g0; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 1.0f; /* a */
+ dst += 4;
+
+ dst[0] = r; /* r */
+ dst[1] = g1; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 1.0f; /* a */
+ dst += 4;
+ }
+
+ if (x < width) {
+ value = *src;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ r = ubyte_to_float((value >> 0) & 0xff);
+ g0 = ubyte_to_float((value >> 8) & 0xff);
+ b = ubyte_to_float((value >> 16) & 0xff);
+ g1 = ubyte_to_float((value >> 24) & 0xff);
+
+ dst[0] = r; /* r */
+ dst[1] = g0; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 1.0f; /* a */
+ }
+
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_r8g8_b8g8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ uint8_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ uint32_t value;
+ uint8_t r, g0, g1, b;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ value = *src++;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ r = (value >> 0) & 0xff;
+ g0 = (value >> 8) & 0xff;
+ b = (value >> 16) & 0xff;
+ g1 = (value >> 24) & 0xff;
+
+ dst[0] = r; /* r */
+ dst[1] = g0; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 0xff; /* a */
+ dst += 4;
+
+ dst[0] = r; /* r */
+ dst[1] = g1; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 0xff; /* a */
+ dst += 4;
+ }
+
+ if (x < width) {
+ value = *src;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ r = (value >> 0) & 0xff;
+ g0 = (value >> 8) & 0xff;
+ b = (value >> 16) & 0xff;
+ g1 = (value >> 24) & 0xff;
+
+ dst[0] = r; /* r */
+ dst[1] = g0; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 0xff; /* a */
+ }
+
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_r8g8_b8g8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ const float *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ float r, g0, g1, b;
+ uint32_t value;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ r = 0.5f*(src[0] + src[4]);
+ g0 = src[1];
+ g1 = src[5];
+ b = 0.5f*(src[2] + src[6]);
+
+ value = float_to_ubyte(r);
+ value |= float_to_ubyte(g0) << 8;
+ value |= float_to_ubyte(b) << 16;
+ value |= float_to_ubyte(g1) << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 8;
+ }
+
+ if (x < width) {
+ r = src[0];
+ g0 = src[1];
+ g1 = 0;
+ b = src[2];
+
+ value = float_to_ubyte(r);
+ value |= float_to_ubyte(g0) << 8;
+ value |= float_to_ubyte(b) << 16;
+ value |= float_to_ubyte(g1) << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst = value;
+ }
+
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_r8g8_b8g8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ const uint8_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ uint32_t r, g0, g1, b;
+ uint32_t value;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ r = (src[0] + src[4] + 1) >> 1;
+ g0 = src[1];
+ g1 = src[5];
+ b = (src[2] + src[6] + 1) >> 1;
+
+ value = r;
+ value |= g0 << 8;
+ value |= b << 16;
+ value |= g1 << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 8;
+ }
+
+ if (x < width) {
+ r = src[0];
+ g0 = src[1];
+ g1 = 0;
+ b = src[2];
+
+ value = r;
+ value |= g0 << 8;
+ value |= b << 16;
+ value |= g1 << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst = value;
+ }
+
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_r8g8_b8g8_unorm_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j)
+{
+ assert(i < 2);
+ assert(j < 1);
+
+ dst[0] = ubyte_to_float(src[0]); /* r */
+ dst[1] = ubyte_to_float(src[1 + 2*i]); /* g */
+ dst[2] = ubyte_to_float(src[2]); /* b */
+ dst[3] = 1.0f; /* a */
+}
+
+
+void
+util_format_g8r8_g8b8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ float *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ uint32_t value;
+ float r, g0, g1, b;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ value = *src++;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ g0 = ubyte_to_float((value >> 0) & 0xff);
+ r = ubyte_to_float((value >> 8) & 0xff);
+ g1 = ubyte_to_float((value >> 16) & 0xff);
+ b = ubyte_to_float((value >> 24) & 0xff);
+
+ dst[0] = r; /* r */
+ dst[1] = g0; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 1.0f; /* a */
+ dst += 4;
+
+ dst[0] = r; /* r */
+ dst[1] = g1; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 1.0f; /* a */
+ dst += 4;
+ }
+
+ if (x < width) {
+ value = *src;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ g0 = ubyte_to_float((value >> 0) & 0xff);
+ r = ubyte_to_float((value >> 8) & 0xff);
+ g1 = ubyte_to_float((value >> 16) & 0xff);
+ b = ubyte_to_float((value >> 24) & 0xff);
+
+ dst[0] = r; /* r */
+ dst[1] = g0; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 1.0f; /* a */
+ }
+
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_g8r8_g8b8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ uint8_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ uint32_t value;
+ uint8_t r, g0, g1, b;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ value = *src++;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ g0 = (value >> 0) & 0xff;
+ r = (value >> 8) & 0xff;
+ g1 = (value >> 16) & 0xff;
+ b = (value >> 24) & 0xff;
+
+ dst[0] = r; /* r */
+ dst[1] = g0; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 0xff; /* a */
+ dst += 4;
+
+ dst[0] = r; /* r */
+ dst[1] = g1; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 0xff; /* a */
+ dst += 4;
+ }
+
+ if (x < width) {
+ value = *src;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ g0 = (value >> 0) & 0xff;
+ r = (value >> 8) & 0xff;
+ g1 = (value >> 16) & 0xff;
+ b = (value >> 24) & 0xff;
+
+ dst[0] = r; /* r */
+ dst[1] = g0; /* g */
+ dst[2] = b; /* b */
+ dst[3] = 0xff; /* a */
+ }
+
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_g8r8_g8b8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ const float *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ float r, g0, g1, b;
+ uint32_t value;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ r = 0.5f*(src[0] + src[4]);
+ g0 = src[1];
+ g1 = src[5];
+ b = 0.5f*(src[2] + src[6]);
+
+ value = float_to_ubyte(g0);
+ value |= float_to_ubyte(r) << 8;
+ value |= float_to_ubyte(g1) << 16;
+ value |= float_to_ubyte(b) << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 8;
+ }
+
+ if (x < width) {
+ r = src[0];
+ g0 = src[1];
+ g1 = 0;
+ b = src[2];
+
+ value = float_to_ubyte(g0);
+ value |= float_to_ubyte(r) << 8;
+ value |= float_to_ubyte(g1) << 16;
+ value |= float_to_ubyte(b) << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst = value;
+ }
+
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_g8r8_g8b8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ const uint8_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ uint32_t r, g0, g1, b;
+ uint32_t value;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ r = (src[0] + src[4] + 1) >> 1;
+ g0 = src[1];
+ g1 = src[5];
+ b = (src[2] + src[6] + 1) >> 1;
+
+ value = g0;
+ value |= r << 8;
+ value |= g1 << 16;
+ value |= b << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 8;
+ }
+
+ if (x < width) {
+ r = src[0];
+ g0 = src[1];
+ g1 = 0;
+ b = src[2];
+
+ value = g0;
+ value |= r << 8;
+ value |= g1 << 16;
+ value |= b << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst = value;
+ }
+
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_g8r8_g8b8_unorm_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j)
+{
+ assert(i < 2);
+ assert(j < 1);
+
+ dst[0] = ubyte_to_float(src[1]); /* r */
+ dst[1] = ubyte_to_float(src[0 + 2*i]); /* g */
+ dst[2] = ubyte_to_float(src[3]); /* b */
+ dst[3] = 1.0f; /* a */
+}
+
+
+void
+util_format_uyvy_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ float *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ uint32_t value;
+ uint8_t y0, y1, u, v;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ value = *src++;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ u = (value >> 0) & 0xff;
+ y0 = (value >> 8) & 0xff;
+ v = (value >> 16) & 0xff;
+ y1 = (value >> 24) & 0xff;
+
+ util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 1.0f; /* a */
+ dst += 4;
+
+ util_format_yuv_to_rgb_float(y1, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 1.0f; /* a */
+ dst += 4;
+ }
+
+ if (x < width) {
+ value = *src;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ u = (value >> 0) & 0xff;
+ y0 = (value >> 8) & 0xff;
+ v = (value >> 16) & 0xff;
+ y1 = (value >> 24) & 0xff;
+
+ util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 1.0f; /* a */
+ }
+
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_uyvy_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ uint8_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ uint32_t value;
+ uint8_t y0, y1, u, v;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ value = *src++;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ u = (value >> 0) & 0xff;
+ y0 = (value >> 8) & 0xff;
+ v = (value >> 16) & 0xff;
+ y1 = (value >> 24) & 0xff;
+
+ util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 0xff; /* a */
+ dst += 4;
+
+ util_format_yuv_to_rgb_8unorm(y1, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 0xff; /* a */
+ dst += 4;
+ }
+
+ if (x < width) {
+ value = *src;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ u = (value >> 0) & 0xff;
+ y0 = (value >> 8) & 0xff;
+ v = (value >> 16) & 0xff;
+ y1 = (value >> 24) & 0xff;
+
+ util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 0xff; /* a */
+ }
+
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_uyvy_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ const float *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ uint8_t y0, y1, u, v;
+ uint32_t value;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ uint8_t y0, y1, u0, u1, v0, v1, u, v;
+
+ util_format_rgb_float_to_yuv(src[0], src[1], src[2],
+ &y0, &u0, &v0);
+ util_format_rgb_float_to_yuv(src[4], src[5], src[6],
+ &y1, &u1, &v1);
+
+ u = (u0 + u1 + 1) >> 1;
+ v = (v0 + v1 + 1) >> 1;
+
+ value = u;
+ value |= y0 << 8;
+ value |= v << 16;
+ value |= y1 << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 8;
+ }
+
+ if (x < width) {
+ util_format_rgb_float_to_yuv(src[0], src[1], src[2],
+ &y0, &u, &v);
+ y1 = 0;
+
+ value = u;
+ value |= y0 << 8;
+ value |= v << 16;
+ value |= y1 << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst = value;
+ }
+
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_uyvy_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ const uint8_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ uint8_t y0, y1, u, v;
+ uint32_t value;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ uint8_t y0, y1, u0, u1, v0, v1, u, v;
+
+ util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2],
+ &y0, &u0, &v0);
+ util_format_rgb_8unorm_to_yuv(src[4], src[5], src[6],
+ &y1, &u1, &v1);
+
+ u = (u0 + u1 + 1) >> 1;
+ v = (v0 + v1 + 1) >> 1;
+
+ value = u;
+ value |= y0 << 8;
+ value |= v << 16;
+ value |= y1 << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 8;
+ }
+
+ if (x < width) {
+ util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2],
+ &y0, &u, &v);
+ y1 = 0;
+
+ value = u;
+ value |= y0 << 8;
+ value |= v << 16;
+ value |= y1 << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst = value;
+ }
+
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_uyvy_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j)
+{
+ uint8_t y, u, v;
+
+ assert(i < 2);
+ assert(j < 1);
+
+ y = src[1 + i*2];
+ u = src[0];
+ v = src[2];
+
+ util_format_yuv_to_rgb_float(y, u, v, &dst[0], &dst[1], &dst[2]);
+
+ dst[3] = 1.0f;
+}
+
+
+void
+util_format_yuyv_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ float *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ uint32_t value;
+ uint8_t y0, y1, u, v;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ value = *src++;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ y0 = (value >> 0) & 0xff;
+ u = (value >> 8) & 0xff;
+ y1 = (value >> 16) & 0xff;
+ v = (value >> 24) & 0xff;
+
+ util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 1.0f; /* a */
+ dst += 4;
+
+ util_format_yuv_to_rgb_float(y1, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 1.0f; /* a */
+ dst += 4;
+ }
+
+ if (x < width) {
+ value = *src;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ y0 = (value >> 0) & 0xff;
+ u = (value >> 8) & 0xff;
+ y1 = (value >> 16) & 0xff;
+ v = (value >> 24) & 0xff;
+
+ util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 1.0f; /* a */
+ }
+
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_yuyv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ uint8_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ uint32_t value;
+ uint8_t y0, y1, u, v;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ value = *src++;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ y0 = (value >> 0) & 0xff;
+ u = (value >> 8) & 0xff;
+ y1 = (value >> 16) & 0xff;
+ v = (value >> 24) & 0xff;
+
+ util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 0xff; /* a */
+ dst += 4;
+
+ util_format_yuv_to_rgb_8unorm(y1, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 0xff; /* a */
+ dst += 4;
+ }
+
+ if (x < width) {
+ value = *src;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ y0 = (value >> 0) & 0xff;
+ u = (value >> 8) & 0xff;
+ y1 = (value >> 16) & 0xff;
+ v = (value >> 24) & 0xff;
+
+ util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]);
+ dst[3] = 0xff; /* a */
+ }
+
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+
+void
+util_format_yuyv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ const float *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ uint8_t y0, y1, u, v;
+ uint32_t value;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ uint8_t y0, y1, u0, u1, v0, v1, u, v;
+
+ util_format_rgb_float_to_yuv(src[0], src[1], src[2],
+ &y0, &u0, &v0);
+ util_format_rgb_float_to_yuv(src[4], src[5], src[6],
+ &y1, &u1, &v1);
+
+ u = (u0 + u1 + 1) >> 1;
+ v = (v0 + v1 + 1) >> 1;
+
+ value = y0;
+ value |= u << 8;
+ value |= y1 << 16;
+ value |= v << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 8;
+ }
+
+ if (x < width) {
+ util_format_rgb_float_to_yuv(src[0], src[1], src[2],
+ &y0, &u, &v);
+ y1 = 0;
+
+ value = y0;
+ value |= u << 8;
+ value |= y1 << 16;
+ value |= v << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst = value;
+ }
+
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_yuyv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+
+ for (y = 0; y < height; y += 1) {
+ const uint8_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ uint8_t y0, y1, u, v;
+ uint32_t value;
+
+ for (x = 0; x + 1 < width; x += 2) {
+ uint8_t y0, y1, u0, u1, v0, v1, u, v;
+
+ util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2],
+ &y0, &u0, &v0);
+ util_format_rgb_8unorm_to_yuv(src[4], src[5], src[6],
+ &y1, &u1, &v1);
+
+ u = (u0 + u1 + 1) >> 1;
+ v = (v0 + v1 + 1) >> 1;
+
+ value = y0;
+ value |= u << 8;
+ value |= y1 << 16;
+ value |= v << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst++ = value;
+
+ src += 8;
+ }
+
+ if (x < width) {
+ util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2],
+ &y0, &u, &v);
+ y1 = 0;
+
+ value = y0;
+ value |= u << 8;
+ value |= y1 << 16;
+ value |= v << 24;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+
+ *dst = value;
+ }
+
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+
+void
+util_format_yuyv_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j)
+{
+ uint8_t y, u, v;
+
+ assert(i < 2);
+ assert(j < 1);
+
+ y = src[0 + i*2];
+ u = src[1];
+ v = src[3];
+
+ util_format_yuv_to_rgb_float(y, u, v, &dst[0], &dst[1], &dst[2]);
+
+ dst[3] = 1.0f;
+}
+
+/* XXX: Stubbed for now */
+void
+util_format_yv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_yv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_yv12_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_yv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_yv12_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j) {}
+void
+util_format_yv16_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_yv16_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_yv16_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_yv16_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_yv16_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j) {}
+void
+util_format_iyuv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_iyuv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_iyuv_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_iyuv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_iyuv_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j) {}
+void
+util_format_nv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_nv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_nv12_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_nv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_nv12_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j) {}
+void
+util_format_nv21_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_nv21_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_nv21_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_nv21_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_nv21_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j) {}
+void
+util_format_ia44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_ia44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_ia44_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_ia44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_ia44_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j) {}
+void
+util_format_ai44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_ai44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_ai44_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_ai44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height) {}
+void
+util_format_ai44_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j) {}
diff --git a/src/gallium/auxiliary/util/u_format_yuv.h b/src/gallium/auxiliary/util/u_format_yuv.h
new file mode 100644
index 00000000000..9f2365a5266
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_yuv.h
@@ -0,0 +1,358 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * YUV colorspace conversion.
+ *
+ * @author Brian Paul <[email protected]>
+ * @author Michal Krol <[email protected]>
+ * @author Jose Fonseca <[email protected]>
+ *
+ * See also:
+ * - http://www.fourcc.org/fccyvrgb.php
+ * - http://msdn.microsoft.com/en-us/library/ms893078
+ * - http://en.wikipedia.org/wiki/YUV
+ */
+
+
+#ifndef U_FORMAT_YUV_H_
+#define U_FORMAT_YUV_H_
+
+
+#include "pipe/p_compiler.h"
+#include "u_math.h"
+
+
+/*
+ * TODO: Ensure we use consistent and right floating formulas, with enough
+ * precision in the coefficients.
+ */
+
+static INLINE void
+util_format_rgb_float_to_yuv(float r, float g, float b,
+ uint8_t *y, uint8_t *u, uint8_t *v)
+{
+ const float _r = CLAMP(r, 0.0f, 1.0f);
+ const float _g = CLAMP(g, 0.0f, 1.0f);
+ const float _b = CLAMP(b, 0.0f, 1.0f);
+
+ const float scale = 255.0f;
+
+ const int _y = scale * ( (0.257f * _r) + (0.504f * _g) + (0.098f * _b));
+ const int _u = scale * (-(0.148f * _r) - (0.291f * _g) + (0.439f * _b));
+ const int _v = scale * ( (0.439f * _r) - (0.368f * _g) - (0.071f * _b));
+
+ *y = _y + 16;
+ *u = _u + 128;
+ *v = _v + 128;
+}
+
+
+static INLINE void
+util_format_yuv_to_rgb_float(uint8_t y, uint8_t u, uint8_t v,
+ float *r, float *g, float *b)
+{
+ const int _y = y - 16;
+ const int _u = u - 128;
+ const int _v = v - 128;
+
+ const float y_factor = 255.0f / 219.0f;
+
+ const float scale = 1.0f / 255.0f;
+
+ *r = scale * (y_factor * _y + 1.596f * _v);
+ *g = scale * (y_factor * _y - 0.391f * _u - 0.813f * _v);
+ *b = scale * (y_factor * _y + 2.018f * _u );
+}
+
+
+static INLINE void
+util_format_rgb_8unorm_to_yuv(uint8_t r, uint8_t g, uint8_t b,
+ uint8_t *y, uint8_t *u, uint8_t *v)
+{
+ *y = (( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
+ *u = (( -38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
+ *v = (( 112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
+}
+
+
+static INLINE void
+util_format_yuv_to_rgb_8unorm(uint8_t y, uint8_t u, uint8_t v,
+ uint8_t *r, uint8_t *g, uint8_t *b)
+{
+ const int _y = y - 16;
+ const int _u = u - 128;
+ const int _v = v - 128;
+
+ const int _r = (298 * _y + 409 * _v + 128) >> 8;
+ const int _g = (298 * _y - 100 * _u - 208 * _v + 128) >> 8;
+ const int _b = (298 * _y + 516 * _u + 128) >> 8;
+
+ *r = CLAMP(_r, 0, 255);
+ *g = CLAMP(_g, 0, 255);
+ *b = CLAMP(_b, 0, 255);
+}
+
+
+
+void
+util_format_uyvy_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_uyvy_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_uyvy_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_uyvy_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_uyvy_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+
+void
+util_format_yuyv_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_yuyv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_yuyv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_yuyv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_yuyv_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+
+/* XXX: Stubbed for now */
+void
+util_format_yv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_yv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_yv12_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_yv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_yv12_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+void
+util_format_yv16_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_yv16_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_yv16_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_yv16_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_yv16_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+void
+util_format_iyuv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_iyuv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_iyuv_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_iyuv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_iyuv_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+void
+util_format_nv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_nv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_nv12_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_nv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_nv12_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+void
+util_format_nv21_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_nv21_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_nv21_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_nv21_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_nv21_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+void
+util_format_ia44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_ia44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_ia44_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_ia44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_ia44_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+void
+util_format_ai44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_ai44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_ai44_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_ai44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+void
+util_format_ai44_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+
+
+void
+util_format_r8g8_b8g8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r8g8_b8g8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r8g8_b8g8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r8g8_b8g8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_r8g8_b8g8_unorm_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+
+void
+util_format_g8r8_g8b8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_g8r8_g8b8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_g8r8_g8b8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_g8r8_g8b8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height);
+
+void
+util_format_g8r8_g8b8_unorm_fetch_rgba_float(float *dst, const uint8_t *src,
+ unsigned i, unsigned j);
+
+
+
+#endif /* U_FORMAT_YUV_H_ */
diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c
new file mode 100644
index 00000000000..792d69c214c
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_zs.c
@@ -0,0 +1,920 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "u_debug.h"
+#include "u_math.h"
+#include "u_format_zs.h"
+
+
+/*
+ * z32_unorm conversion functions
+ */
+
+static INLINE uint16_t
+z32_unorm_to_z16_unorm(uint32_t z)
+{
+ /* z * 0xffff / 0xffffffff */
+ return z >> 16;
+}
+
+static INLINE uint32_t
+z16_unorm_to_z32_unorm(uint16_t z)
+{
+ /* z * 0xffffffff / 0xffff */
+ return (z << 16) | z;
+}
+
+static INLINE uint32_t
+z32_unorm_to_z24_unorm(uint32_t z)
+{
+ /* z * 0xffffff / 0xffffffff */
+ return z >> 8;
+}
+
+static INLINE uint32_t
+z24_unorm_to_z32_unorm(uint32_t z)
+{
+ /* z * 0xffffffff / 0xffffff */
+ return (z << 8) | (z >> 16);
+}
+
+
+/*
+ * z32_float conversion functions
+ */
+
+static INLINE uint16_t
+z32_float_to_z16_unorm(float z)
+{
+ const float scale = 0xffff;
+ return (uint16_t)(z * scale);
+}
+
+static INLINE float
+z16_unorm_to_z32_float(uint16_t z)
+{
+ const float scale = 1.0 / 0xffff;
+ return (float)(z * scale);
+}
+
+static INLINE uint32_t
+z32_float_to_z24_unorm(float z)
+{
+ const double scale = 0xffffff;
+ return (uint32_t)(z * scale) & 0xffffff;
+}
+
+static INLINE float
+z24_unorm_to_z32_float(uint32_t z)
+{
+ const double scale = 1.0 / 0xffffff;
+ return (float)(z * scale);
+}
+
+static INLINE uint32_t
+z32_float_to_z32_unorm(float z)
+{
+ const double scale = 0xffffffff;
+ return (uint32_t)(z * scale);
+}
+
+static INLINE float
+z32_unorm_to_z32_float(uint32_t z)
+{
+ const double scale = 1.0 / 0xffffffff;
+ return (float)(z * scale);
+}
+
+
+void
+util_format_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned y;
+ for(y = 0; y < height; ++y) {
+ memcpy(dst_row, src_row, width);
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned y;
+ for(y = 0; y < height; ++y) {
+ memcpy(dst_row, src_row, width);
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z16_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ float *dst = dst_row;
+ const uint16_t *src = (const uint16_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint16_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap16(value);
+#endif
+ *dst++ = z16_unorm_to_z32_float(value);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z16_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const float *src = src_row;
+ uint16_t *dst = (uint16_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint16_t value;
+ value = z32_float_to_z16_unorm(*src++);
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap16(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z16_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint32_t *dst = dst_row;
+ const uint16_t *src = (const uint16_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint16_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap16(value);
+#endif
+ *dst++ = z16_unorm_to_z32_unorm(value);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z16_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint32_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint32_t *src = src_row;
+ uint16_t *dst = (uint16_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint16_t value;
+ value = z32_unorm_to_z16_unorm(*src++);
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap16(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z32_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ float *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = z32_unorm_to_z32_float(value);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z32_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const float *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value;
+ value = z32_float_to_z32_unorm(*src++);
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z32_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned y;
+ for(y = 0; y < height; ++y) {
+ memcpy(dst_row, src_row, width * 4);
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z32_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint32_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned y;
+ for(y = 0; y < height; ++y) {
+ memcpy(dst_row, src_row, width * 4);
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z32_float_unpack_z_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned y;
+ for(y = 0; y < height; ++y) {
+ memcpy(dst_row, src_row, width * 4);
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z32_float_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned y;
+ for(y = 0; y < height; ++y) {
+ memcpy(dst_row, src_row, width * 4);
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z32_float_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint32_t *dst = dst_row;
+ const float *src = (const float *)src_row;
+ for(x = 0; x < width; ++x) {
+ *dst++ = z32_float_to_z32_unorm(*src++);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z32_float_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint32_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint32_t *src = src_row;
+ float *dst = (float *)dst_row;
+ for(x = 0; x < width; ++x) {
+ *dst++ = z32_unorm_to_z32_float(*src++);
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z24_unorm_s8_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ float *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = z24_unorm_to_z32_float(value & 0xffffff);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z24_unorm_s8_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const float *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *dst;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ value &= 0xff000000;
+ value |= z32_float_to_z24_unorm(*src++);
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z24_unorm_s8_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint32_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z24_unorm_s8_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint32_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint32_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value= *dst;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ value &= 0xff000000;
+ value |= z32_unorm_to_z24_unorm(*src++);
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint8_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value >> 24;
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint8_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *dst;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ value &= 0x00ffffff;
+ value |= *src++ << 24;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_s8_uscaled_z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ float *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = z24_unorm_to_z32_float(value >> 8);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_s8_uscaled_z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const float *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *dst;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ value &= 0x000000ff;
+ value |= z32_float_to_z24_unorm(*src++) << 8;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_s8_uscaled_z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint32_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = z24_unorm_to_z32_unorm(value >> 8);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_s8_uscaled_z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint32_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint32_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *dst;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ value &= 0x000000ff;
+ value |= *src++ & 0xffffff00;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint8_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value & 0xff;
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint8_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *dst;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ value &= 0xffffff00;
+ value |= *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z24x8_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ float *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = z24_unorm_to_z32_float(value & 0xffffff);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z24x8_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const float *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value;
+ value = z32_float_to_z24_unorm(*src++);
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z24x8_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint32_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z24x8_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint32_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint32_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value;
+ value = z32_unorm_to_z24_unorm(*src++);
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_x8z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ float *dst = dst_row;
+ const uint32_t *src = (uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = z24_unorm_to_z32_float(value >> 8);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_x8z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const float *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value;
+ value = z32_float_to_z24_unorm(*src++) << 8;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_x8z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint32_t *dst = dst_row;
+ const uint32_t *src = (const uint32_t *)src_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value = *src++;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = z24_unorm_to_z32_unorm(value >> 8);
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_x8z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint32_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint32_t *src = src_row;
+ uint32_t *dst = (uint32_t *)dst_row;
+ for(x = 0; x < width; ++x) {
+ uint32_t value;
+ value = z32_unorm_to_z24_unorm(*src++) << 8;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ value = util_bswap32(value);
+#endif
+ *dst++ = value;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z32_float_s8x24_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ float *dst = dst_row;
+ const float *src = (const float *)src_row;
+ for(x = 0; x < width; ++x) {
+ *dst = *src;
+ src += 2;
+ dst += 1;
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z32_float_s8x24_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const float *src = src_row;
+ float *dst = (float *)dst_row;
+ for(x = 0; x < width; ++x) {
+ *dst = *src;
+ src += 1;
+ dst += 2;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z32_float_s8x24_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint32_t *dst = dst_row;
+ const float *src = (const float *)src_row;
+ for(x = 0; x < width; ++x) {
+ *dst = z32_float_to_z32_unorm(*src);
+ src += 2;
+ dst += 1;
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z32_float_s8x24_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint32_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint32_t *src = src_row;
+ float *dst = (float *)dst_row;
+ for(x = 0; x < width; ++x) {
+ *dst++ = z32_unorm_to_z32_float(*src++);
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+void
+util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ uint8_t *dst = dst_row;
+ const uint8_t *src = src_row + 4;
+ for(x = 0; x < width; ++x) {
+ *dst = *src;
+ src += 8;
+ dst += 1;
+ }
+ src_row += src_stride/sizeof(*src_row);
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+void
+util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; ++y) {
+ const uint8_t *src = src_row;
+ uint8_t *dst = dst_row + 4;
+ for(x = 0; x < width; ++x) {
+ *dst = *src;
+ src += 1;
+ dst += 8;
+ }
+ dst_row += dst_stride/sizeof(*dst_row);
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h
new file mode 100644
index 00000000000..650db4b95fd
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_zs.h
@@ -0,0 +1,196 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef U_FORMAT_ZS_H_
+#define U_FORMAT_ZS_H_
+
+
+#include "pipe/p_compiler.h"
+
+
+void
+util_format_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z16_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z16_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z16_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z16_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24_unorm_s8_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24_unorm_s8_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24_unorm_s8_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24_unorm_s8_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_s8_uscaled_z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_s8_uscaled_z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_s8_uscaled_z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_s8_uscaled_z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24x8_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24x8_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24x8_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z24x8_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_x8z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_x8z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_x8z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_x8z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_s8x24_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_s8x24_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_s8x24_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_s8x24_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+void
+util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+
+#endif /* U_FORMAT_ZS_H_ */
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index fc027e48e4e..eee6030ddcc 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -62,11 +62,12 @@ struct gen_mipmap_state
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
struct pipe_clip_state clip;
+ struct pipe_vertex_element velem[2];
void *vs;
void *fs2d, *fsCube;
- struct pipe_buffer *vbuf; /**< quad vertices */
+ struct pipe_resource *vbuf; /**< quad vertices */
unsigned vbuf_slot;
float vertices[4][2][4]; /**< vertex/texcoords for quad */
@@ -937,6 +938,7 @@ format_to_type_comps(enum pipe_format pformat,
*datatype = DTYPE_UBYTE;
*comps = 4;
return;
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
case PIPE_FORMAT_B5G5R5A1_UNORM:
*datatype = DTYPE_USHORT_1_5_5_5_REV;
*comps = 4;
@@ -1114,11 +1116,10 @@ reduce_3d(enum pipe_format pformat,
static void
make_1d_mipmap(struct gen_mipmap_state *ctx,
- struct pipe_texture *pt,
+ struct pipe_resource *pt,
uint face, uint baseLevel, uint lastLevel)
{
struct pipe_context *pipe = ctx->pipe;
- struct pipe_screen *screen = pipe->screen;
const uint zslice = 0;
uint dstLevel;
@@ -1127,38 +1128,37 @@ make_1d_mipmap(struct gen_mipmap_state *ctx,
struct pipe_transfer *srcTrans, *dstTrans;
void *srcMap, *dstMap;
- srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
+ srcTrans = pipe_get_transfer(pipe, pt, face, srcLevel, zslice,
PIPE_TRANSFER_READ, 0, 0,
u_minify(pt->width0, srcLevel),
u_minify(pt->height0, srcLevel));
- dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
+ dstTrans = pipe_get_transfer(pipe, pt, face, dstLevel, zslice,
PIPE_TRANSFER_WRITE, 0, 0,
u_minify(pt->width0, dstLevel),
u_minify(pt->height0, dstLevel));
- srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
- dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
+ srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans);
+ dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans);
reduce_1d(pt->format,
- srcTrans->width, srcMap,
- dstTrans->width, dstMap);
+ srcTrans->box.width, srcMap,
+ dstTrans->box.width, dstMap);
- screen->transfer_unmap(screen, srcTrans);
- screen->transfer_unmap(screen, dstTrans);
+ pipe->transfer_unmap(pipe, srcTrans);
+ pipe->transfer_unmap(pipe, dstTrans);
- screen->tex_transfer_destroy(srcTrans);
- screen->tex_transfer_destroy(dstTrans);
+ pipe->transfer_destroy(pipe, srcTrans);
+ pipe->transfer_destroy(pipe, dstTrans);
}
}
static void
make_2d_mipmap(struct gen_mipmap_state *ctx,
- struct pipe_texture *pt,
+ struct pipe_resource *pt,
uint face, uint baseLevel, uint lastLevel)
{
struct pipe_context *pipe = ctx->pipe;
- struct pipe_screen *screen = pipe->screen;
const uint zslice = 0;
uint dstLevel;
@@ -1170,36 +1170,36 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
struct pipe_transfer *srcTrans, *dstTrans;
ubyte *srcMap, *dstMap;
- srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
- PIPE_TRANSFER_READ, 0, 0,
- u_minify(pt->width0, srcLevel),
- u_minify(pt->height0, srcLevel));
- dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
- PIPE_TRANSFER_WRITE, 0, 0,
- u_minify(pt->width0, dstLevel),
- u_minify(pt->height0, dstLevel));
-
- srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
- dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
+ srcTrans = pipe_get_transfer(pipe, pt, face, srcLevel, zslice,
+ PIPE_TRANSFER_READ, 0, 0,
+ u_minify(pt->width0, srcLevel),
+ u_minify(pt->height0, srcLevel));
+ dstTrans = pipe_get_transfer(pipe, pt, face, dstLevel, zslice,
+ PIPE_TRANSFER_WRITE, 0, 0,
+ u_minify(pt->width0, dstLevel),
+ u_minify(pt->height0, dstLevel));
+
+ srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans);
+ dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans);
reduce_2d(pt->format,
- srcTrans->width, srcTrans->height,
+ srcTrans->box.width, srcTrans->box.height,
srcTrans->stride, srcMap,
- dstTrans->width, dstTrans->height,
+ dstTrans->box.width, dstTrans->box.height,
dstTrans->stride, dstMap);
- screen->transfer_unmap(screen, srcTrans);
- screen->transfer_unmap(screen, dstTrans);
+ pipe->transfer_unmap(pipe, srcTrans);
+ pipe->transfer_unmap(pipe, dstTrans);
- screen->tex_transfer_destroy(srcTrans);
- screen->tex_transfer_destroy(dstTrans);
+ pipe->transfer_destroy(pipe, srcTrans);
+ pipe->transfer_destroy(pipe, dstTrans);
}
}
static void
make_3d_mipmap(struct gen_mipmap_state *ctx,
- struct pipe_texture *pt,
+ struct pipe_resource *pt,
uint face, uint baseLevel, uint lastLevel)
{
#if 0
@@ -1215,17 +1215,17 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
struct pipe_transfer *srcTrans, *dstTrans;
ubyte *srcMap, *dstMap;
- srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
+ srcTrans = pipe->get_transfer(pipe, pt, face, srcLevel, zslice,
PIPE_TRANSFER_READ, 0, 0,
u_minify(pt->width0, srcLevel),
u_minify(pt->height0, srcLevel));
- dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
+ dstTrans = pipe->get_transfer(pipe, pt, face, dstLevel, zslice,
PIPE_TRANSFER_WRITE, 0, 0,
u_minify(pt->width0, dstLevel),
u_minify(pt->height0, dstLevel));
- srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
- dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
+ srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans);
+ dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans);
reduce_3d(pt->format,
srcTrans->width, srcTrans->height,
@@ -1233,11 +1233,11 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
dstTrans->width, dstTrans->height,
dstTrans->stride, dstMap);
- screen->transfer_unmap(screen, srcTrans);
- screen->transfer_unmap(screen, dstTrans);
+ pipe->transfer_unmap(pipe, srcTrans);
+ pipe->transfer_unmap(pipe, dstTrans);
- screen->tex_transfer_destroy(srcTrans);
- screen->tex_transfer_destroy(dstTrans);
+ pipe->transfer_destroy(pipe, srcTrans);
+ pipe->transfer_destroy(pipe, dstTrans);
}
#else
(void) reduce_3d;
@@ -1247,7 +1247,7 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
static void
fallback_gen_mipmap(struct gen_mipmap_state *ctx,
- struct pipe_texture *pt,
+ struct pipe_resource *pt,
uint face, uint baseLevel, uint lastLevel)
{
switch (pt->target) {
@@ -1307,6 +1307,15 @@ util_create_gen_mipmap(struct pipe_context *pipe,
ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
ctx->sampler.normalized_coords = 1;
+ /* vertex elements state */
+ memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+ for (i = 0; i < 2; i++) {
+ ctx->velem[i].src_offset = i * 4 * sizeof(float);
+ ctx->velem[i].instance_divisor = 0;
+ ctx->velem[i].vertex_buffer_index = 0;
+ ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
+
/* vertex shader - still needed to specify mapping from fragment
* shader input semantics to vertex elements
*/
@@ -1349,8 +1358,7 @@ get_next_slot(struct gen_mipmap_state *ctx)
if (!ctx->vbuf) {
ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
- 32,
- PIPE_BUFFER_USAGE_VERTEX,
+ PIPE_BIND_VERTEX_BUFFER,
max_slots * sizeof ctx->vertices);
}
@@ -1411,7 +1419,7 @@ set_vertex_data(struct gen_mipmap_state *ctx,
offset = get_next_slot( ctx );
- pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf,
+ pipe_buffer_write_nooverlap(ctx->pipe, ctx->vbuf,
offset, sizeof(ctx->vertices), ctx->vertices);
return offset;
@@ -1431,7 +1439,7 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
pipe->delete_fs_state(pipe, ctx->fs2d);
pipe->delete_fs_state(pipe, ctx->fsCube);
- pipe_buffer_reference(&ctx->vbuf, NULL);
+ pipe_resource_reference(&ctx->vbuf, NULL);
FREE(ctx);
}
@@ -1443,7 +1451,7 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
*/
void util_gen_mipmap_flush( struct gen_mipmap_state *ctx )
{
- pipe_buffer_reference(&ctx->vbuf, NULL);
+ pipe_resource_reference(&ctx->vbuf, NULL);
ctx->vbuf_slot = 0;
}
@@ -1452,7 +1460,7 @@ void util_gen_mipmap_flush( struct gen_mipmap_state *ctx )
* Generate mipmap images. It's assumed all needed texture memory is
* already allocated.
*
- * \param pt the texture to generate mipmap levels for
+ * \param psv the sampler view to the texture to generate mipmap levels for
* \param face which cube face to generate mipmaps for (0 for non-cube maps)
* \param baseLevel the first mipmap level to use as a src
* \param lastLevel the last mipmap level to generate
@@ -1461,12 +1469,13 @@ void util_gen_mipmap_flush( struct gen_mipmap_state *ctx )
*/
void
util_gen_mipmap(struct gen_mipmap_state *ctx,
- struct pipe_texture *pt,
+ struct pipe_sampler_view *psv,
uint face, uint baseLevel, uint lastLevel, uint filter)
{
struct pipe_context *pipe = ctx->pipe;
struct pipe_screen *screen = pipe->screen;
struct pipe_framebuffer_state fb;
+ struct pipe_resource *pt = psv->texture;
void *fs = (pt->target == PIPE_TEXTURE_CUBE) ? ctx->fsCube : ctx->fs2d;
uint dstLevel;
uint zslice = 0;
@@ -1484,8 +1493,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
filter == PIPE_TEX_FILTER_NEAREST);
/* check if we can render in the texture's format */
- if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D,
- PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
+ if (!screen->is_format_supported(screen, psv->format, PIPE_TEXTURE_2D,
+ PIPE_BIND_RENDER_TARGET, 0)) {
fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel);
return;
}
@@ -1495,18 +1504,20 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_save_depth_stencil_alpha(ctx->cso);
cso_save_rasterizer(ctx->cso);
cso_save_samplers(ctx->cso);
- cso_save_sampler_textures(ctx->cso);
+ cso_save_fragment_sampler_views(ctx->cso);
cso_save_framebuffer(ctx->cso);
cso_save_fragment_shader(ctx->cso);
cso_save_vertex_shader(ctx->cso);
cso_save_viewport(ctx->cso);
cso_save_clip(ctx->cso);
+ cso_save_vertex_elements(ctx->cso);
/* bind our state */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
+ cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
cso_set_fragment_shader_handle(ctx->cso, fs);
cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
@@ -1529,7 +1540,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
struct pipe_surface *surf =
screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BIND_RENDER_TARGET);
/*
* Setup framebuffer / dest surface
@@ -1562,7 +1573,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_single_sampler(ctx->cso, 0, &ctx->sampler);
cso_single_sampler_done(ctx->cso);
- cso_set_sampler_textures(ctx->cso, 1, &pt);
+ cso_set_fragment_sampler_views(ctx->cso, 1, &psv);
/* quad coords in clip coords */
offset = set_vertex_data(ctx,
@@ -1587,10 +1598,11 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_restore_depth_stencil_alpha(ctx->cso);
cso_restore_rasterizer(ctx->cso);
cso_restore_samplers(ctx->cso);
- cso_restore_sampler_textures(ctx->cso);
+ cso_restore_fragment_sampler_views(ctx->cso);
cso_restore_framebuffer(ctx->cso);
cso_restore_fragment_shader(ctx->cso);
cso_restore_vertex_shader(ctx->cso);
cso_restore_viewport(ctx->cso);
cso_restore_clip(ctx->cso);
+ cso_restore_vertex_elements(ctx->cso);
}
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h
index 54608f9466d..a7502b9982b 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.h
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.h
@@ -37,7 +37,7 @@ extern "C" {
struct pipe_context;
-struct pipe_texture;
+struct pipe_resource;
struct cso_context;
struct gen_mipmap_state;
@@ -59,7 +59,7 @@ util_gen_mipmap_flush( struct gen_mipmap_state *ctx );
extern void
util_gen_mipmap(struct gen_mipmap_state *ctx,
- struct pipe_texture *pt,
+ struct pipe_sampler_view *psv,
uint face, uint baseLevel, uint lastLevel, uint filter);
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
new file mode 100644
index 00000000000..ad030e90c67
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -0,0 +1,90 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef U_HALF_H
+#define U_HALF_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_math.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const uint32_t util_half_to_float_mantissa_table[2048];
+extern const uint32_t util_half_to_float_exponent_table[64];
+extern const uint32_t util_half_to_float_offset_table[64];
+extern const uint16_t util_float_to_half_base_table[512];
+extern const uint8_t util_float_to_half_shift_table[512];
+
+/*
+ * Note that if the half float is a signaling NaN, the x87 FPU will turn
+ * it into a quiet NaN immediately upon loading into a float.
+ *
+ * Additionally, denormals may be flushed to zero.
+ *
+ * To avoid this, use the floatui functions instead of the float ones
+ * when just doing conversion rather than computation on the resulting
+ * floats.
+ */
+
+static INLINE uint32_t
+util_half_to_floatui(uint16_t h)
+{
+ unsigned exp = h >> 10;
+ return util_half_to_float_mantissa_table[util_half_to_float_offset_table[exp] + (h & 0x3ff)] + util_half_to_float_exponent_table[exp];
+}
+
+static INLINE float
+util_half_to_float(uint16_t h)
+{
+ union fi r;
+ r.ui = util_half_to_floatui(h);
+ return r.f;
+}
+
+static INLINE uint16_t
+util_floatui_to_half(uint32_t v)
+{
+ unsigned signexp = v >> 23;
+ return util_float_to_half_base_table[signexp] + ((v & 0x007fffff) >> util_float_to_half_shift_table[signexp]);
+}
+
+static INLINE uint16_t
+util_float_to_half(float f)
+{
+ union fi i;
+ i.f = f;
+ return util_floatui_to_half(i.ui);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_HALF_H */
+
diff --git a/src/gallium/auxiliary/util/u_half.py b/src/gallium/auxiliary/util/u_half.py
new file mode 100644
index 00000000000..8007482e971
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_half.py
@@ -0,0 +1,179 @@
+# Copyright 2010 Luca Barbieri
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial
+# portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# *************************************************************************
+
+# The code is a reimplementation of the algorithm in
+# www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
+# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
+#
+# The table contents have been slightly changed so that the exponent
+# bias is now in the exponent table instead of the mantissa table (mostly
+# for cosmetic reasons, and because it theoretically allows a variant
+# that flushes denormal to zero but uses a mantissa table with 24-bit
+# entries).
+#
+# The tables are also constructed slightly differently.
+#
+
+# Note that using a 64K * 4 table is a terrible idea since it will not fit
+# in the L1 cache and will massively pollute the L2 cache as well
+#
+# These should instead fit in the L1 cache.
+#
+# TODO: we could use a denormal bias table instead of the mantissa/offset
+# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
+# but would involve more computation
+#
+# Note however that if denormals are never encountered, the L1 cache usage
+# is only about 4608 bytes anyway.
+
+table_index = None
+table_length = None
+
+def begin(t, n, l):
+ global table_length
+ global table_index
+ table_index = 0
+ table_length = l
+ print
+ print "const " + t + " " + n + "[" + str(l) + "] = {"
+
+def value(v):
+ global table_index
+ table_index += 1
+ print "\t" + hex(v) + ","
+
+def end():
+ global table_length
+ global table_index
+ print "};"
+ assert table_index == table_length
+
+print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
+print "#include \"util/u_half.h\""
+
+begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
+# zero
+value(0)
+
+# denormals
+for i in xrange(1, 1024):
+ m = i << 13
+ e = 0
+
+ # normalize number
+ while (m & 0x00800000) == 0:
+ e -= 0x00800000;
+ m <<= 1;
+
+ m &= ~0x00800000;
+ e += 0x38800000;
+ value(m | e)
+
+# normals
+for i in xrange(1024, 2048):
+ value((i - 1024) << 13)
+end()
+
+begin("uint32_t", "util_half_to_float_exponent_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive numbers
+for i in xrange(1, 31):
+ value(0x38000000 + (i << 23))
+
+# positive infinity/NaN
+value(0x7f800000)
+
+# negative zero or denormals
+value(0x80000000)
+
+# negative numbers
+for i in range(33, 63):
+ value(0xb8000000 + ((i - 32) << 23))
+
+# negative infinity/NaN
+value(0xff800000)
+end()
+
+begin("uint32_t", "util_half_to_float_offset_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive normals
+for i in range(1, 32):
+ value(1024)
+
+# negative zero or denormals
+value(0)
+
+# negative normals
+for i in xrange(33, 64):
+ value(1024)
+end()
+
+begin("uint16_t", "util_float_to_half_base_table", 512)
+for sign in (0, 0x8000):
+ # very small numbers mapping to zero
+ for i in xrange(-127, -24):
+ value(sign | 0)
+
+ # small numbers mapping to denormals
+ for i in xrange(-24, -14):
+ value(sign | (0x400 >> (-14 -i)))
+
+ # normal numbers
+ for i in xrange(-14, 16):
+ value(sign | ((i + 15) << 10))
+
+ # large numbers mapping to infinity
+ for i in xrange(16, 128):
+ value(sign | 0x7c00)
+
+ # infinity and NaNs
+ value(sign | 0x7c00)
+end()
+
+begin("uint8_t", "util_float_to_half_shift_table", 512)
+for sign in (0, 0x8000):
+ # very small numbers mapping to zero
+ for i in xrange(-127, -24):
+ value(24)
+
+ # small numbers mapping to denormals
+ for i in xrange(-24, -14):
+ value(-1 - i)
+
+ # normal numbers
+ for i in xrange(-14, 16):
+ value(13)
+
+ # large numbers mapping to infinity
+ for i in xrange(16, 128):
+ value(24)
+
+ # infinity and NaNs
+ value(13)
+end()
+
diff --git a/src/gallium/auxiliary/util/u_init.h b/src/gallium/auxiliary/util/u_init.h
new file mode 100644
index 00000000000..7bc356a7916
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_init.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_INIT_H
+#define U_INIT_H
+
+/* Use UTIL_INIT(f) to have f called at program initialization.
+ Note that it is only guaranteed to be called if any symbol in the
+ .c file it is in sis referenced by the program.
+
+ UTIL_INIT functions are called in arbitrary order.
+*/
+
+#ifdef __cplusplus
+/* use a C++ global constructor */
+#define UTIL_INIT(f) struct f##__gctor_t {f##__gctor_t() {x();}} f##__gctor;
+#elif defined(_MSC_VER)
+/* add a pointer to the section where MSVC stores global constructor pointers */
+/* see http://blogs.msdn.com/vcblog/archive/2006/10/20/crt-initialization.aspx and
+ http://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc */
+#pragma section(".CRT$XCU",read)
+#define UTIL_INIT(f) static void __cdecl f##__init(void) {f();}; __declspec(allocate(".CRT$XCU")) void (__cdecl* f##__xcu)(void) = f##__init;
+#elif defined(__GNUC__)
+#define UTIL_INIT(f) static void f##__init(void) __attribute__((constructor)); static void f##__init(void) {f();}
+#else
+#error Unsupported compiler: please find out how to implement global initializers in C on it
+#endif
+
+#endif
+
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 0cb3432c6e4..a48689ee8be 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -34,6 +34,8 @@
#include "pipe/p_screen.h"
#include "util/u_debug.h"
#include "util/u_atomic.h"
+#include "util/u_box.h"
+#include "util/u_math.h"
#ifdef __cplusplus
@@ -87,18 +89,6 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
return destroy;
}
-static INLINE void
-pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf)
-{
- struct pipe_buffer *old_buf;
-
- assert(ptr);
- old_buf = *ptr;
-
- if (pipe_reference(&(*ptr)->reference, &buf->reference))
- old_buf->screen->buffer_destroy(old_buf);
- *ptr = buf;
-}
static INLINE void
pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
@@ -110,106 +100,184 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
*ptr = surf;
}
+
static INLINE void
-pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *tex)
+pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
{
- struct pipe_texture *old_tex = *ptr;
+ struct pipe_resource *old_tex = *ptr;
if (pipe_reference(&(*ptr)->reference, &tex->reference))
- old_tex->screen->texture_destroy(old_tex);
+ old_tex->screen->resource_destroy(old_tex->screen, old_tex);
*ptr = tex;
}
+static INLINE void
+pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view)
+{
+ struct pipe_sampler_view *old_view = *ptr;
+
+ if (pipe_reference(&(*ptr)->reference, &view->reference))
+ old_view->context->sampler_view_destroy(old_view->context, old_view);
+ *ptr = view;
+}
+
+static INLINE void
+pipe_surface_reset(struct pipe_surface* ps, struct pipe_resource *pt,
+ unsigned face, unsigned level, unsigned zslice, unsigned flags)
+{
+ pipe_resource_reference(&ps->texture, pt);
+ ps->format = pt->format;
+ ps->width = u_minify(pt->width0, level);
+ ps->height = u_minify(pt->height0, level);
+ ps->usage = flags;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+}
+
+static INLINE void
+pipe_surface_init(struct pipe_surface* ps, struct pipe_resource *pt,
+ unsigned face, unsigned level, unsigned zslice, unsigned flags)
+{
+ ps->texture = 0;
+ pipe_reference_init(&ps->reference, 1);
+ pipe_surface_reset(ps, pt, face, level, zslice, flags);
+}
+
/*
* Convenience wrappers for screen buffer functions.
*/
-static INLINE struct pipe_buffer *
+static INLINE struct pipe_resource *
pipe_buffer_create( struct pipe_screen *screen,
- unsigned alignment, unsigned usage, unsigned size )
+ unsigned bind,
+ unsigned size )
{
- return screen->buffer_create(screen, alignment, usage, size);
+ struct pipe_resource buffer;
+ memset(&buffer, 0, sizeof buffer);
+ buffer.target = PIPE_BUFFER;
+ buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */
+ buffer.bind = bind;
+ buffer.usage = PIPE_USAGE_DEFAULT;
+ buffer.flags = 0;
+ buffer.width0 = size;
+ buffer.height0 = 1;
+ buffer.depth0 = 1;
+ return screen->resource_create(screen, &buffer);
}
-static INLINE struct pipe_buffer *
-pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size )
+
+static INLINE struct pipe_resource *
+pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size,
+ unsigned usage )
{
- return screen->user_buffer_create(screen, ptr, size);
+ return screen->user_buffer_create(screen, ptr, size, usage);
}
static INLINE void *
-pipe_buffer_map(struct pipe_screen *screen,
- struct pipe_buffer *buf,
- unsigned usage)
+pipe_buffer_map_range(struct pipe_context *pipe,
+ struct pipe_resource *buffer,
+ unsigned offset,
+ unsigned length,
+ unsigned usage,
+ struct pipe_transfer **transfer)
{
- if(screen->buffer_map_range) {
- unsigned offset = 0;
- unsigned length = buf->size;
- return screen->buffer_map_range(screen, buf, offset, length, usage);
+ struct pipe_box box;
+ void *map;
+
+ assert(offset < buffer->width0);
+ assert(offset + length <= buffer->width0);
+ assert(length);
+
+ u_box_1d(offset, length, &box);
+
+ *transfer = pipe->get_transfer( pipe,
+ buffer,
+ u_subresource(0, 0),
+ usage,
+ &box);
+
+ if (*transfer == NULL)
+ return NULL;
+
+ map = pipe->transfer_map( pipe, *transfer );
+ if (map == NULL) {
+ pipe->transfer_destroy( pipe, *transfer );
+ return NULL;
}
- else
- return screen->buffer_map(screen, buf, usage);
+
+ /* Match old screen->buffer_map_range() behaviour, return pointer
+ * to where the beginning of the buffer would be:
+ */
+ return (void *)((char *)map - offset);
}
-static INLINE void
-pipe_buffer_unmap(struct pipe_screen *screen,
- struct pipe_buffer *buf)
+
+static INLINE void *
+pipe_buffer_map(struct pipe_context *pipe,
+ struct pipe_resource *buffer,
+ unsigned usage,
+ struct pipe_transfer **transfer)
{
- screen->buffer_unmap(screen, buf);
+ return pipe_buffer_map_range(pipe, buffer, 0, buffer->width0, usage, transfer);
}
-static INLINE void *
-pipe_buffer_map_range(struct pipe_screen *screen,
- struct pipe_buffer *buf,
- unsigned offset,
- unsigned length,
- unsigned usage)
+
+static INLINE void
+pipe_buffer_unmap(struct pipe_context *pipe,
+ struct pipe_resource *buf,
+ struct pipe_transfer *transfer)
{
- assert(offset < buf->size);
- assert(offset + length <= buf->size);
- assert(length);
- if(screen->buffer_map_range)
- return screen->buffer_map_range(screen, buf, offset, length, usage);
- else
- return screen->buffer_map(screen, buf, usage);
+ if (transfer) {
+ pipe->transfer_unmap(pipe, transfer);
+ pipe->transfer_destroy(pipe, transfer);
+ }
}
static INLINE void
-pipe_buffer_flush_mapped_range(struct pipe_screen *screen,
- struct pipe_buffer *buf,
+pipe_buffer_flush_mapped_range(struct pipe_context *pipe,
+ struct pipe_transfer *transfer,
unsigned offset,
unsigned length)
{
- assert(offset < buf->size);
- assert(offset + length <= buf->size);
+ struct pipe_box box;
+ int transfer_offset;
+
assert(length);
- if(screen->buffer_flush_mapped_range)
- screen->buffer_flush_mapped_range(screen, buf, offset, length);
+ assert(transfer->box.x <= offset);
+ assert(offset + length <= transfer->box.x + transfer->box.width);
+
+ /* Match old screen->buffer_flush_mapped_range() behaviour, where
+ * offset parameter is relative to the start of the buffer, not the
+ * mapped range.
+ */
+ transfer_offset = offset - transfer->box.x;
+
+ u_box_1d(transfer_offset, length, &box);
+
+ pipe->transfer_flush_region(pipe, transfer, &box);
}
static INLINE void
-pipe_buffer_write(struct pipe_screen *screen,
- struct pipe_buffer *buf,
- unsigned offset, unsigned size,
+pipe_buffer_write(struct pipe_context *pipe,
+ struct pipe_resource *buf,
+ unsigned offset,
+ unsigned size,
const void *data)
{
- void *map;
-
- assert(offset < buf->size);
- assert(offset + size <= buf->size);
- assert(size);
-
- map = pipe_buffer_map_range(screen, buf, offset, size,
- PIPE_BUFFER_USAGE_CPU_WRITE |
- PIPE_BUFFER_USAGE_FLUSH_EXPLICIT |
- PIPE_BUFFER_USAGE_DISCARD);
- assert(map);
- if(map) {
- memcpy((uint8_t *)map + offset, data, size);
- pipe_buffer_flush_mapped_range(screen, buf, offset, size);
- pipe_buffer_unmap(screen, buf);
- }
+ struct pipe_box box;
+
+ u_box_1d(offset, size, &box);
+
+ pipe->transfer_inline_write( pipe,
+ buf,
+ u_subresource(0,0),
+ PIPE_TRANSFER_WRITE,
+ &box,
+ data,
+ size,
+ 0);
}
/**
@@ -219,87 +287,88 @@ pipe_buffer_write(struct pipe_screen *screen,
* been written before.
*/
static INLINE void
-pipe_buffer_write_nooverlap(struct pipe_screen *screen,
- struct pipe_buffer *buf,
+pipe_buffer_write_nooverlap(struct pipe_context *pipe,
+ struct pipe_resource *buf,
unsigned offset, unsigned size,
const void *data)
{
- void *map;
-
- assert(offset < buf->size);
- assert(offset + size <= buf->size);
- assert(size);
-
- map = pipe_buffer_map_range(screen, buf, offset, size,
- PIPE_BUFFER_USAGE_CPU_WRITE |
- PIPE_BUFFER_USAGE_FLUSH_EXPLICIT |
- PIPE_BUFFER_USAGE_DISCARD |
- PIPE_BUFFER_USAGE_UNSYNCHRONIZED);
- assert(map);
- if(map) {
- memcpy((uint8_t *)map + offset, data, size);
- pipe_buffer_flush_mapped_range(screen, buf, offset, size);
- pipe_buffer_unmap(screen, buf);
- }
+ struct pipe_box box;
+
+ u_box_1d(offset, size, &box);
+
+ pipe->transfer_inline_write(pipe,
+ buf,
+ u_subresource(0,0),
+ (PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_NOOVERWRITE),
+ &box,
+ data,
+ 0, 0);
}
static INLINE void
-pipe_buffer_read(struct pipe_screen *screen,
- struct pipe_buffer *buf,
- unsigned offset, unsigned size,
+pipe_buffer_read(struct pipe_context *pipe,
+ struct pipe_resource *buf,
+ unsigned offset,
+ unsigned size,
void *data)
{
- void *map;
-
- assert(offset < buf->size);
- assert(offset + size <= buf->size);
- assert(size);
-
- map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_READ);
- assert(map);
- if(map) {
- memcpy(data, (const uint8_t *)map + offset, size);
- pipe_buffer_unmap(screen, buf);
- }
+ struct pipe_transfer *src_transfer;
+ ubyte *map;
+
+ map = (ubyte *) pipe_buffer_map_range(pipe,
+ buf,
+ offset, size,
+ PIPE_TRANSFER_READ,
+ &src_transfer);
+
+ if (map)
+ memcpy(data, map + offset, size);
+
+ pipe_buffer_unmap(pipe, buf, src_transfer);
}
-static INLINE void *
-pipe_transfer_map( struct pipe_transfer *transf )
+static INLINE struct pipe_transfer *
+pipe_get_transfer( struct pipe_context *context,
+ struct pipe_resource *resource,
+ unsigned face, unsigned level,
+ unsigned zslice,
+ enum pipe_transfer_usage usage,
+ unsigned x, unsigned y,
+ unsigned w, unsigned h)
{
- struct pipe_screen *screen = transf->texture->screen;
- return screen->transfer_map(screen, transf);
+ struct pipe_box box;
+ u_box_2d_zslice( x, y, zslice, w, h, &box );
+ return context->get_transfer( context,
+ resource,
+ u_subresource(face, level),
+ usage,
+ &box );
}
-static INLINE void
-pipe_transfer_unmap( struct pipe_transfer *transf )
+static INLINE void *
+pipe_transfer_map( struct pipe_context *context,
+ struct pipe_transfer *transfer )
{
- struct pipe_screen *screen = transf->texture->screen;
- screen->transfer_unmap(screen, transf);
+ return context->transfer_map( context, transfer );
}
static INLINE void
-pipe_transfer_destroy( struct pipe_transfer *transf )
+pipe_transfer_unmap( struct pipe_context *context,
+ struct pipe_transfer *transfer )
{
- struct pipe_screen *screen = transf->texture->screen;
- screen->tex_transfer_destroy(transf);
+ context->transfer_unmap( context, transfer );
}
-static INLINE unsigned
-pipe_transfer_buffer_flags( struct pipe_transfer *transf )
+
+static INLINE void
+pipe_transfer_destroy( struct pipe_context *context,
+ struct pipe_transfer *transfer )
{
- switch (transf->usage & PIPE_TRANSFER_READ_WRITE) {
- case PIPE_TRANSFER_READ_WRITE:
- return PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE;
- case PIPE_TRANSFER_READ:
- return PIPE_BUFFER_USAGE_CPU_READ;
- case PIPE_TRANSFER_WRITE:
- return PIPE_BUFFER_USAGE_CPU_WRITE;
- default:
- debug_assert(0);
- return 0;
- }
+ context->transfer_destroy(context, transfer);
}
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h
index a2fc5973565..53d56599fe4 100644
--- a/src/gallium/auxiliary/util/u_memory.h
+++ b/src/gallium/auxiliary/util/u_memory.h
@@ -88,7 +88,7 @@ mem_dup(const void *src, uint size)
/**
* Offset of a field in a struct, in bytes.
*/
-#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER))
+#define Offset(TYPE, MEMBER) ((uintptr_t)&(((TYPE *)NULL)->MEMBER))
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 50f1b1670b6..3ebef9fb749 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -37,6 +37,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
+#include "util/u_debug.h"
#include "util/u_format.h"
#include "util/u_math.h"
@@ -92,6 +93,11 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a,
uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
}
return;
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ {
+ uc->us = ((0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
+ }
+ return;
case PIPE_FORMAT_B5G5R5A1_UNORM:
{
uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
@@ -216,6 +222,15 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
*a = (ubyte) 0xff;
}
return;
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ {
+ ushort p = uc->us;
+ *r = (ubyte) (((p >> 7) & 0xf8) | ((p >> 12) & 0x7));
+ *g = (ubyte) (((p >> 2) & 0xf8) | ((p >> 7) & 0x7));
+ *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7));
+ *a = (ubyte) 0xff;
+ }
+ return;
case PIPE_FORMAT_B5G5R5A1_UNORM:
{
ushort p = uc->us;
@@ -361,6 +376,11 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
}
return;
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ {
+ uc->us = ((0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
+ }
+ return;
case PIPE_FORMAT_B5G5R5A1_UNORM:
{
uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
@@ -427,17 +447,17 @@ util_pack_z(enum pipe_format format, double z)
return (uint) (z * 0xffffffff);
case PIPE_FORMAT_Z32_FLOAT:
return (uint)z;
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z24X8_UNORM:
if (z == 1.0)
return 0xffffff;
return (uint) (z * 0xffffff);
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
if (z == 1.0)
return 0xffffff00;
return ((uint) (z * 0xffffff)) << 8;
- case PIPE_FORMAT_S8_UNORM:
+ case PIPE_FORMAT_S8_USCALED:
/* this case can get it via util_pack_z_stencil() */
return 0;
default:
@@ -458,13 +478,13 @@ util_pack_z_stencil(enum pipe_format format, double z, uint s)
unsigned packed = util_pack_z(format, z);
switch (format) {
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
packed |= s << 24;
break;
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
packed |= s;
break;
- case PIPE_FORMAT_S8_UNORM:
+ case PIPE_FORMAT_S8_USCALED:
packed |= s;
break;
default:
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index 8479161c744..098cdfd58b1 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -35,6 +35,7 @@
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_format.h"
+#include "util/u_inlines.h"
#include "util/u_rect.h"
@@ -169,7 +170,6 @@ util_surface_copy(struct pipe_context *pipe,
unsigned src_x, unsigned src_y,
unsigned w, unsigned h)
{
- struct pipe_screen *screen = pipe->screen;
struct pipe_transfer *src_trans, *dst_trans;
void *dst_map;
const void *src_map;
@@ -182,28 +182,28 @@ util_surface_copy(struct pipe_context *pipe,
src_format = src->texture->format;
dst_format = dst->texture->format;
- src_trans = screen->get_tex_transfer(screen,
- src->texture,
- src->face,
- src->level,
- src->zslice,
- PIPE_TRANSFER_READ,
- src_x, src_y, w, h);
-
- dst_trans = screen->get_tex_transfer(screen,
- dst->texture,
- dst->face,
- dst->level,
- dst->zslice,
- PIPE_TRANSFER_WRITE,
- dst_x, dst_y, w, h);
+ src_trans = pipe_get_transfer(pipe,
+ src->texture,
+ src->face,
+ src->level,
+ src->zslice,
+ PIPE_TRANSFER_READ,
+ src_x, src_y, w, h);
+
+ dst_trans = pipe_get_transfer(pipe,
+ dst->texture,
+ dst->face,
+ dst->level,
+ dst->zslice,
+ PIPE_TRANSFER_WRITE,
+ dst_x, dst_y, w, h);
assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format));
assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format));
assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format));
- src_map = pipe->screen->transfer_map(screen, src_trans);
- dst_map = pipe->screen->transfer_map(screen, dst_trans);
+ src_map = pipe->transfer_map(pipe, src_trans);
+ dst_map = pipe->transfer_map(pipe, dst_trans);
assert(src_map);
assert(dst_map);
@@ -221,11 +221,11 @@ util_surface_copy(struct pipe_context *pipe,
do_flip ? h - 1 : 0);
}
- pipe->screen->transfer_unmap(pipe->screen, src_trans);
- pipe->screen->transfer_unmap(pipe->screen, dst_trans);
+ pipe->transfer_unmap(pipe, src_trans);
+ pipe->transfer_unmap(pipe, dst_trans);
- screen->tex_transfer_destroy(src_trans);
- screen->tex_transfer_destroy(dst_trans);
+ pipe->transfer_destroy(pipe, src_trans);
+ pipe->transfer_destroy(pipe, dst_trans);
}
@@ -243,65 +243,65 @@ util_surface_fill(struct pipe_context *pipe,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height, unsigned value)
{
- struct pipe_screen *screen = pipe->screen;
struct pipe_transfer *dst_trans;
void *dst_map;
assert(dst->texture);
if (!dst->texture)
return;
- dst_trans = screen->get_tex_transfer(screen,
- dst->texture,
- dst->face,
- dst->level,
- dst->zslice,
- PIPE_TRANSFER_WRITE,
- dstx, dsty, width, height);
+ dst_trans = pipe_get_transfer(pipe,
+ dst->texture,
+ dst->face,
+ dst->level,
+ dst->zslice,
+ PIPE_TRANSFER_WRITE,
+ dstx, dsty, width, height);
- dst_map = pipe->screen->transfer_map(screen, dst_trans);
+ dst_map = pipe->transfer_map(pipe, dst_trans);
assert(dst_map);
if (dst_map) {
assert(dst_trans->stride > 0);
- switch (util_format_get_blocksize(dst_trans->texture->format)) {
+ switch (util_format_get_blocksize(dst->texture->format)) {
case 1:
case 2:
case 4:
- util_fill_rect(dst_map, dst_trans->texture->format, dst_trans->stride,
+ util_fill_rect(dst_map, dst->texture->format,
+ dst_trans->stride,
0, 0, width, height, value);
break;
case 8:
- {
- /* expand the 4-byte clear value to an 8-byte value */
- ushort *row = (ushort *) dst_map;
- ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff);
- ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff);
- ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff);
- ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff);
- unsigned i, j;
- val0 = (val0 << 8) | val0;
- val1 = (val1 << 8) | val1;
- val2 = (val2 << 8) | val2;
- val3 = (val3 << 8) | val3;
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- row[j*4+0] = val0;
- row[j*4+1] = val1;
- row[j*4+2] = val2;
- row[j*4+3] = val3;
- }
- row += dst_trans->stride/2;
- }
- }
- break;
+ {
+ /* expand the 4-byte clear value to an 8-byte value */
+ ushort *row = (ushort *) dst_map;
+ ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff);
+ ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff);
+ ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff);
+ ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff);
+ unsigned i, j;
+ val0 = (val0 << 8) | val0;
+ val1 = (val1 << 8) | val1;
+ val2 = (val2 << 8) | val2;
+ val3 = (val3 << 8) | val3;
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ row[j*4+0] = val0;
+ row[j*4+1] = val1;
+ row[j*4+2] = val2;
+ row[j*4+3] = val3;
+ }
+ row += dst_trans->stride/2;
+ }
+ }
+ break;
default:
assert(0);
break;
}
}
- pipe->screen->transfer_unmap(pipe->screen, dst_trans);
- screen->tex_transfer_destroy(dst_trans);
+ pipe->transfer_unmap(pipe, dst_trans);
+ pipe->transfer_destroy(pipe, dst_trans);
}
diff --git a/src/gallium/auxiliary/util/u_resource.c b/src/gallium/auxiliary/util/u_resource.c
new file mode 100644
index 00000000000..9e6474b83de
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_resource.c
@@ -0,0 +1,96 @@
+
+
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+
+static INLINE struct u_resource *
+u_resource( struct pipe_resource *res )
+{
+ return (struct u_resource *)res;
+}
+
+boolean u_resource_get_handle_vtbl(struct pipe_screen *screen,
+ struct pipe_resource *resource,
+ struct winsys_handle *handle)
+{
+ struct u_resource *ur = u_resource(resource);
+ return ur->vtbl->resource_get_handle(screen, resource, handle);
+}
+
+void u_resource_destroy_vtbl(struct pipe_screen *screen,
+ struct pipe_resource *resource)
+{
+ struct u_resource *ur = u_resource(resource);
+ ur->vtbl->resource_destroy(screen, resource);
+}
+
+unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned face, unsigned level)
+{
+ struct u_resource *ur = u_resource(resource);
+ return ur->vtbl->is_resource_referenced(pipe, resource, face, level);
+}
+
+struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ enum pipe_transfer_usage usage,
+ const struct pipe_box *box)
+{
+ struct u_resource *ur = u_resource(resource);
+ return ur->vtbl->get_transfer(context, resource, sr, usage, box);
+}
+
+void u_transfer_destroy_vtbl(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ struct u_resource *ur = u_resource(transfer->resource);
+ ur->vtbl->transfer_destroy(pipe, transfer);
+}
+
+void *u_transfer_map_vtbl( struct pipe_context *pipe,
+ struct pipe_transfer *transfer )
+{
+ struct u_resource *ur = u_resource(transfer->resource);
+ return ur->vtbl->transfer_map(pipe, transfer);
+}
+
+void u_transfer_flush_region_vtbl( struct pipe_context *pipe,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ struct u_resource *ur = u_resource(transfer->resource);
+ ur->vtbl->transfer_flush_region(pipe, transfer, box);
+}
+
+void u_transfer_unmap_vtbl( struct pipe_context *pipe,
+ struct pipe_transfer *transfer )
+{
+ struct u_resource *ur = u_resource(transfer->resource);
+ ur->vtbl->transfer_unmap(pipe, transfer);
+}
+
+void u_transfer_inline_write_vtbl( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned slice_stride)
+{
+ struct u_resource *ur = u_resource(resource);
+ ur->vtbl->transfer_inline_write(pipe,
+ resource,
+ sr,
+ usage,
+ box,
+ data,
+ stride,
+ slice_stride);
+}
+
+
+
+
diff --git a/src/gallium/auxiliary/util/u_sampler.c b/src/gallium/auxiliary/util/u_sampler.c
new file mode 100644
index 00000000000..e77f562ea22
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_sampler.c
@@ -0,0 +1,100 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "u_format.h"
+#include "u_sampler.h"
+
+
+static void
+default_template(struct pipe_sampler_view *view,
+ const struct pipe_resource *texture,
+ enum pipe_format format,
+ unsigned expand_green_blue)
+{
+ /* XXX: Check if format is compatible with texture->format.
+ */
+
+ view->format = format;
+ view->first_level = 0;
+ view->last_level = texture->last_level;
+ view->swizzle_r = PIPE_SWIZZLE_RED;
+ view->swizzle_g = PIPE_SWIZZLE_GREEN;
+ view->swizzle_b = PIPE_SWIZZLE_BLUE;
+ view->swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+ /* Override default green and blue component expansion to the requested
+ * one.
+ *
+ * Gallium expands nonexistent components to (0,0,0,1), DX9 expands
+ * to (1,1,1,1). Since alpha is always expanded to 1, and red is
+ * always present, we only really care about green and blue
+ * components.
+ *
+ * To make it look less hackish, one would have to add
+ * UTIL_FORMAT_SWIZZLE_EXPAND to indicate components for expansion
+ * and then override without exceptions or favoring one component
+ * over another.
+ */
+ if (format != PIPE_FORMAT_A8_UNORM) {
+ const struct util_format_description *desc = util_format_description(format);
+
+ assert(desc);
+ if (desc) {
+ if (desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0) {
+ view->swizzle_g = expand_green_blue;
+ }
+ if (desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0) {
+ view->swizzle_b = expand_green_blue;
+ }
+ }
+ }
+}
+
+void
+u_sampler_view_default_template(struct pipe_sampler_view *view,
+ const struct pipe_resource *texture,
+ enum pipe_format format)
+{
+ /* Expand to (0, 0, 0, 1) */
+ default_template(view,
+ texture,
+ format,
+ PIPE_SWIZZLE_ZERO);
+}
+
+void
+u_sampler_view_default_dx9_template(struct pipe_sampler_view *view,
+ const struct pipe_resource *texture,
+ enum pipe_format format)
+{
+ /* Expand to (1, 1, 1, 1) */
+ default_template(view,
+ texture,
+ format,
+ PIPE_SWIZZLE_ONE);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/util/u_sampler.h
index 79d6981bb51..f3dad7417e0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_depth.h
+++ b/src/gallium/auxiliary/util/u_sampler.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,38 +26,32 @@
**************************************************************************/
-/**
- * Depth/stencil testing to LLVM IR translation.
- *
- * @author Jose Fonseca <[email protected]>
- */
-
-#ifndef LP_BLD_DEPTH_H
-#define LP_BLD_DEPTH_H
-
+#ifndef U_SAMPLER_H
+#define U_SAMPLER_H
-#include <llvm-c/Core.h>
-
-struct pipe_depth_state;
-struct util_format_description;
-struct lp_type;
-struct lp_build_mask_context;
+#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
-struct lp_type
-lp_depth_type(const struct util_format_description *format_desc,
- unsigned length);
+void
+u_sampler_view_default_template(struct pipe_sampler_view *view,
+ const struct pipe_resource *texture,
+ enum pipe_format format);
void
-lp_build_depth_test(LLVMBuilderRef builder,
- const struct pipe_depth_state *state,
- struct lp_type type,
- const struct util_format_description *format_desc,
- struct lp_build_mask_context *mask,
- LLVMValueRef src,
- LLVMValueRef dst_ptr);
+u_sampler_view_default_dx9_template(struct pipe_sampler_view *view,
+ const struct pipe_resource *texture,
+ enum pipe_format format);
+
+#ifdef __cplusplus
+} /* extern "C" { */
+#endif
-#endif /* !LP_BLD_DEPTH_H */
+#endif /* U_SAMPLER_H */
diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c
deleted file mode 100644
index 53f3c16dbcc..00000000000
--- a/src/gallium/auxiliary/util/u_simple_screen.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "u_simple_screen.h"
-
-#include "pipe/p_screen.h"
-#include "pipe/p_state.h"
-#include "util/u_simple_screen.h"
-
-
-static struct pipe_buffer *
-pass_buffer_create(struct pipe_screen *screen,
- unsigned alignment,
- unsigned usage,
- unsigned size)
-{
- struct pipe_buffer *buffer =
- screen->winsys->buffer_create(screen->winsys, alignment, usage, size);
-
- buffer->screen = screen;
-
- return buffer;
-}
-
-static struct pipe_buffer *
-pass_user_buffer_create(struct pipe_screen *screen,
- void *ptr,
- unsigned bytes)
-{
- struct pipe_buffer *buffer =
- screen->winsys->user_buffer_create(screen->winsys, ptr, bytes);
-
- buffer->screen = screen;
-
- return buffer;
-}
-
-static struct pipe_buffer *
-pass_surface_buffer_create(struct pipe_screen *screen,
- unsigned width, unsigned height,
- enum pipe_format format,
- unsigned usage,
- unsigned tex_usage,
- unsigned *stride)
-{
- struct pipe_buffer *buffer =
- screen->winsys->surface_buffer_create(screen->winsys, width, height,
- format, usage, tex_usage, stride);
-
- buffer->screen = screen;
-
- return buffer;
-}
-
-static void *
-pass_buffer_map(struct pipe_screen *screen,
- struct pipe_buffer *buf,
- unsigned usage)
-{
- return screen->winsys->buffer_map(screen->winsys, buf, usage);
-}
-
-static void
-pass_buffer_unmap(struct pipe_screen *screen,
- struct pipe_buffer *buf)
-{
- screen->winsys->buffer_unmap(screen->winsys, buf);
-}
-
-static void
-pass_buffer_destroy(struct pipe_buffer *buf)
-{
- buf->screen->winsys->buffer_destroy(buf);
-}
-
-
-static void
-pass_flush_frontbuffer(struct pipe_screen *screen,
- struct pipe_surface *surf,
- void *context_private)
-{
- screen->winsys->flush_frontbuffer(screen->winsys, surf, context_private);
-}
-
-static void
-pass_fence_reference(struct pipe_screen *screen,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *fence)
-{
- screen->winsys->fence_reference(screen->winsys, ptr, fence);
-}
-
-static int
-pass_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flag)
-{
- return screen->winsys->fence_signalled(screen->winsys, fence, flag);
-}
-
-static int
-pass_fence_finish(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flag)
-{
- return screen->winsys->fence_finish(screen->winsys, fence, flag);
-}
-
-void
-u_simple_screen_init(struct pipe_screen *screen)
-{
- screen->buffer_create = pass_buffer_create;
- screen->user_buffer_create = pass_user_buffer_create;
- screen->surface_buffer_create = pass_surface_buffer_create;
-
- screen->buffer_map = pass_buffer_map;
- screen->buffer_unmap = pass_buffer_unmap;
- screen->buffer_destroy = pass_buffer_destroy;
- screen->flush_frontbuffer = pass_flush_frontbuffer;
- screen->fence_reference = pass_fence_reference;
- screen->fence_signalled = pass_fence_signalled;
- screen->fence_finish = pass_fence_finish;
-}
-
-const char *
-u_simple_screen_winsys_name(struct pipe_screen *screen)
-{
- return screen->winsys->get_name(screen->winsys);
-}
diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h
index bb3f5ba102f..b52232f025c 100644
--- a/src/gallium/auxiliary/util/u_simple_screen.h
+++ b/src/gallium/auxiliary/util/u_simple_screen.h
@@ -33,7 +33,7 @@
struct pipe_screen;
struct pipe_fence_handle;
struct pipe_surface;
-struct pipe_buffer;
+struct pipe_resource;
/**
* Gallium3D drivers are (meant to be!) independent of both GL and the
@@ -53,11 +53,6 @@ struct pipe_winsys
const char *(*get_name)( struct pipe_winsys *ws );
/**
- * Do any special operations to ensure buffer size is correct
- */
- void (*update_buffer)( struct pipe_winsys *ws,
- void *context_private );
- /**
* Do any special operations to ensure frontbuffer contents are
* displayed, eg copy fake frontbuffer.
*/
@@ -73,14 +68,13 @@ struct pipe_winsys
* window systems must then implement that interface (rather than the
* other way around...).
*
- * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This
- * usage argument is only an optimization hint, not a guarantee, therefore
- * proper behavior must be observed in all circumstances.
+ * usage is a bitmask of PIPE_BIND_*.
+ * All possible usages must be present.
*
* alignment indicates the client's alignment requirements, eg for
* SSE instructions.
*/
- struct pipe_buffer *(*buffer_create)( struct pipe_winsys *ws,
+ struct pipe_resource *(*buffer_create)( struct pipe_winsys *ws,
unsigned alignment,
unsigned usage,
unsigned size );
@@ -106,7 +100,7 @@ struct pipe_winsys
* Note that ptr may be accessed at any time upto the time when the
* buffer is destroyed, so the data must not be freed before then.
*/
- struct pipe_buffer *(*user_buffer_create)(struct pipe_winsys *ws,
+ struct pipe_resource *(*user_buffer_create)(struct pipe_winsys *ws,
void *ptr,
unsigned bytes);
@@ -117,11 +111,11 @@ struct pipe_winsys
* display targets) must be allocated with special characteristics, memory
* pools, or obtained directly from the windowing system.
*
- * This callback is invoked by the pipe_screenwhen creating a texture marked
- * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying
+ * This callback is invoked by the pipe_screen when creating a texture marked
+ * with the PIPE_BIND_DISPLAY_TARGET flag to get the underlying
* buffer storage.
*/
- struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws,
+ struct pipe_resource *(*surface_buffer_create)(struct pipe_winsys *ws,
unsigned width, unsigned height,
enum pipe_format format,
unsigned usage,
@@ -134,13 +128,13 @@ struct pipe_winsys
* flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags.
*/
void *(*buffer_map)( struct pipe_winsys *ws,
- struct pipe_buffer *buf,
+ struct pipe_resource *buf,
unsigned usage );
void (*buffer_unmap)( struct pipe_winsys *ws,
- struct pipe_buffer *buf );
+ struct pipe_resource *buf );
- void (*buffer_destroy)( struct pipe_buffer *buf );
+ void (*buffer_destroy)( struct pipe_resource *buf );
/** Set ptr = fence, with reference counting */
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index 33306bbc2a6..42440d0d673 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -50,7 +50,8 @@
boolean
util_create_rgba_surface(struct pipe_screen *screen,
uint width, uint height,
- struct pipe_texture **textureOut,
+ uint bind,
+ struct pipe_resource **textureOut,
struct pipe_surface **surfaceOut)
{
static const enum pipe_format rgbaFormats[] = {
@@ -60,15 +61,14 @@ util_create_rgba_surface(struct pipe_screen *screen,
PIPE_FORMAT_NONE
};
const uint target = PIPE_TEXTURE_2D;
- const uint usage = PIPE_TEXTURE_USAGE_RENDER_TARGET;
enum pipe_format format = PIPE_FORMAT_NONE;
- struct pipe_texture templ;
+ struct pipe_resource templ;
uint i;
/* Choose surface format */
for (i = 0; rgbaFormats[i]; i++) {
if (screen->is_format_supported(screen, rgbaFormats[i],
- target, usage, 0)) {
+ target, bind, 0)) {
format = rgbaFormats[i];
break;
}
@@ -84,16 +84,19 @@ util_create_rgba_surface(struct pipe_screen *screen,
templ.width0 = width;
templ.height0 = height;
templ.depth0 = 1;
- templ.tex_usage = usage;
+ templ.bind = bind;
- *textureOut = screen->texture_create(screen, &templ);
+ *textureOut = screen->resource_create(screen, &templ);
if (!*textureOut)
return FALSE;
/* create surface / view into texture */
- *surfaceOut = screen->get_tex_surface(screen, *textureOut, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE);
+ *surfaceOut = screen->get_tex_surface(screen,
+ *textureOut,
+ 0, 0, 0,
+ bind);
if (!*surfaceOut) {
- pipe_texture_reference(textureOut, NULL);
+ pipe_resource_reference(textureOut, NULL);
return FALSE;
}
@@ -105,11 +108,11 @@ util_create_rgba_surface(struct pipe_screen *screen,
* Release the surface and texture from util_create_rgba_surface().
*/
void
-util_destroy_rgba_surface(struct pipe_texture *texture,
+util_destroy_rgba_surface(struct pipe_resource *texture,
struct pipe_surface *surface)
{
pipe_surface_reference(&surface, NULL);
- pipe_texture_reference(&texture, NULL);
+ pipe_resource_reference(&texture, NULL);
}
diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h
index 3c60df2c3e5..119fcd4ce8e 100644
--- a/src/gallium/auxiliary/util/u_surface.h
+++ b/src/gallium/auxiliary/util/u_surface.h
@@ -52,13 +52,13 @@ util_same_surface(const struct pipe_surface *s1, const struct pipe_surface *s2)
extern boolean
util_create_rgba_surface(struct pipe_screen *screen,
- uint width, uint height,
- struct pipe_texture **textureOut,
+ uint width, uint height, uint bind,
+ struct pipe_resource **textureOut,
struct pipe_surface **surfaceOut);
extern void
-util_destroy_rgba_surface(struct pipe_texture *texture,
+util_destroy_rgba_surface(struct pipe_resource *texture,
struct pipe_surface *surface);
diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c
new file mode 100644
index 00000000000..668da8c5c27
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_surfaces.c
@@ -0,0 +1,112 @@
+#include "u_surfaces.h"
+#include "util/u_hash_table.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+/* TODO: ouch, util_hash_table should do these by default when passed a null function pointer
+ * this indirect function call is quite bad
+ */
+static unsigned
+hash(void *key)
+{
+ return (unsigned)(uintptr_t)key;
+}
+
+static int
+compare(void *key1, void *key2)
+{
+ return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2;
+}
+
+struct pipe_surface *
+util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
+{
+ struct pipe_surface *ps;
+ void *key = NULL;
+
+ if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
+ { /* or 2D array */
+ if(!us->u.table)
+ us->u.table = util_hash_table_create(hash, compare);
+ key = (void *)(((zslice + face) << 8) | level);
+ /* TODO: ouch, should have a get-reference function...
+ * also, shouldn't allocate a two-pointer structure for each item... */
+ ps = util_hash_table_get(us->u.table, key);
+ }
+ else
+ {
+ if(!us->u.array)
+ us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *));
+ ps = us->u.array[level];
+ }
+
+ if(ps)
+ {
+ p_atomic_inc(&ps->reference.count);
+ return ps;
+ }
+
+ ps = (struct pipe_surface *)CALLOC(1, surface_struct_size);
+ if(!ps)
+ return NULL;
+
+ pipe_surface_init(ps, pt, face, level, zslice, flags);
+ ps->offset = ~0;
+
+ if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
+ util_hash_table_set(us->u.table, key, ps);
+ else
+ us->u.array[level] = ps;
+
+ return ps;
+}
+
+void
+util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps)
+{
+ struct pipe_resource *pt = ps->texture;
+ if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
+ { /* or 2D array */
+ void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level);
+ util_hash_table_remove(us->u.table, key);
+ }
+ else
+ us->u.array[ps->level] = 0;
+}
+
+static enum pipe_error
+util_surfaces_destroy_callback(void *key, void *value, void *data)
+{
+ void (*destroy_surface) (struct pipe_surface * ps) = data;
+ destroy_surface((struct pipe_surface *)value);
+ return PIPE_OK;
+}
+
+void
+util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *))
+{
+ if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
+ { /* or 2D array */
+ if(us->u.table)
+ {
+ util_hash_table_foreach(us->u.table, util_surfaces_destroy_callback, destroy_surface);
+ util_hash_table_destroy(us->u.table);
+ us->u.table = NULL;
+ }
+ }
+ else
+ {
+ if(us->u.array)
+ {
+ unsigned i;
+ for(i = 0; i < pt->last_level; ++i)
+ {
+ struct pipe_surface *ps = us->u.array[i];
+ if(ps)
+ destroy_surface(ps);
+ }
+ FREE(us->u.array);
+ us->u.array = NULL;
+ }
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
new file mode 100644
index 00000000000..0195bf5afba
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -0,0 +1,54 @@
+#ifndef U_SURFACES_H_
+#define U_SURFACES_H_
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "util/u_atomic.h"
+
+struct util_hash_table;
+
+struct util_surfaces
+{
+ union
+ {
+ struct util_hash_table *table;
+ struct pipe_surface **array;
+ } u;
+};
+
+struct pipe_surface *util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags);
+
+/* fast inline path for the very common case */
+static INLINE struct pipe_surface *
+util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
+{
+ if(likely(pt->target == PIPE_TEXTURE_2D && us->u.array))
+ {
+ struct pipe_surface *ps = us->u.array[level];
+ if(ps)
+ {
+ p_atomic_inc(&ps->reference.count);
+ return ps;
+ }
+ }
+
+ return util_surfaces_do_get(us, surface_struct_size, pscreen, pt, face, level, zslice, flags);
+}
+
+void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
+
+static INLINE void
+util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps)
+{
+ if(likely(ps->texture->target == PIPE_TEXTURE_2D))
+ {
+ us->u.array[ps->level] = 0;
+ return;
+ }
+
+ util_surfaces_do_detach(us, ps);
+}
+
+void util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *));
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 79481b710bf..f7aa1403d08 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -45,27 +45,27 @@
* Move raw block of pixels from transfer object to user memory.
*/
void
-pipe_get_tile_raw(struct pipe_transfer *pt,
+pipe_get_tile_raw(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
void *dst, int dst_stride)
{
- struct pipe_screen *screen = pt->texture->screen;
const void *src;
if (dst_stride == 0)
- dst_stride = util_format_get_stride(pt->texture->format, w);
+ dst_stride = util_format_get_stride(pt->resource->format, w);
- if (pipe_clip_tile(x, y, &w, &h, pt))
+ if (u_clip_tile(x, y, &w, &h, &pt->box))
return;
- src = screen->transfer_map(screen, pt);
+ src = pipe->transfer_map(pipe, pt);
assert(src);
if(!src)
return;
- util_copy_rect(dst, pt->texture->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y);
+ util_copy_rect(dst, pt->resource->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y);
- screen->transfer_unmap(screen, pt);
+ pipe->transfer_unmap(pipe, pt);
}
@@ -73,28 +73,28 @@ pipe_get_tile_raw(struct pipe_transfer *pt,
* Move raw block of pixels from user memory to transfer object.
*/
void
-pipe_put_tile_raw(struct pipe_transfer *pt,
+pipe_put_tile_raw(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const void *src, int src_stride)
{
- struct pipe_screen *screen = pt->texture->screen;
void *dst;
- enum pipe_format format = pt->texture->format;
+ enum pipe_format format = pt->resource->format;
if (src_stride == 0)
src_stride = util_format_get_stride(format, w);
- if (pipe_clip_tile(x, y, &w, &h, pt))
+ if (u_clip_tile(x, y, &w, &h, &pt->box))
return;
- dst = screen->transfer_map(screen, pt);
+ dst = pipe->transfer_map(pipe, pt);
assert(dst);
if(!dst)
return;
util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0);
- screen->transfer_unmap(screen, pt);
+ pipe->transfer_unmap(pipe, pt);
}
@@ -108,387 +108,6 @@ pipe_put_tile_raw(struct pipe_transfer *pt,
-/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
-
-static void
-a8r8g8b8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const unsigned pixel = *src++;
- pRow[0] = ubyte_to_float((pixel >> 16) & 0xff);
- pRow[1] = ubyte_to_float((pixel >> 8) & 0xff);
- pRow[2] = ubyte_to_float((pixel >> 0) & 0xff);
- pRow[3] = ubyte_to_float((pixel >> 24) & 0xff);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-a8r8g8b8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, g, b, a;
- r = float_to_ubyte(pRow[0]);
- g = float_to_ubyte(pRow[1]);
- b = float_to_ubyte(pRow[2]);
- a = float_to_ubyte(pRow[3]);
- *dst++ = (a << 24) | (r << 16) | (g << 8) | b;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_B8G8R8X8_UNORM ***/
-
-static void
-x8r8g8b8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const unsigned pixel = *src++;
- pRow[0] = ubyte_to_float((pixel >> 16) & 0xff);
- pRow[1] = ubyte_to_float((pixel >> 8) & 0xff);
- pRow[2] = ubyte_to_float((pixel >> 0) & 0xff);
- pRow[3] = 1.0F;
- }
- p += dst_stride;
- }
-}
-
-
-static void
-x8r8g8b8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, g, b;
- r = float_to_ubyte(pRow[0]);
- g = float_to_ubyte(pRow[1]);
- b = float_to_ubyte(pRow[2]);
- *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
-
-static void
-b8g8r8a8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const unsigned pixel = *src++;
- pRow[0] = ubyte_to_float((pixel >> 8) & 0xff);
- pRow[1] = ubyte_to_float((pixel >> 16) & 0xff);
- pRow[2] = ubyte_to_float((pixel >> 24) & 0xff);
- pRow[3] = ubyte_to_float((pixel >> 0) & 0xff);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-b8g8r8a8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, g, b, a;
- r = float_to_ubyte(pRow[0]);
- g = float_to_ubyte(pRow[1]);
- b = float_to_ubyte(pRow[2]);
- a = float_to_ubyte(pRow[3]);
- *dst++ = (b << 24) | (g << 16) | (r << 8) | a;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_A8B8G8R8_UNORM ***/
-
-static void
-r8g8b8a8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const unsigned pixel = *src++;
- pRow[0] = ubyte_to_float((pixel >> 24) & 0xff);
- pRow[1] = ubyte_to_float((pixel >> 16) & 0xff);
- pRow[2] = ubyte_to_float((pixel >> 8) & 0xff);
- pRow[3] = ubyte_to_float((pixel >> 0) & 0xff);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-r8g8b8a8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, g, b, a;
- r = float_to_ubyte(pRow[0]);
- g = float_to_ubyte(pRow[1]);
- b = float_to_ubyte(pRow[2]);
- a = float_to_ubyte(pRow[3]);
- *dst++ = (r << 24) | (g << 16) | (b << 8) | a;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_B5G5R5A1_UNORM ***/
-
-static void
-a1r5g5b5_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const ushort pixel = *src++;
- pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f);
- pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f);
- pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f);
- pRow[3] = ((pixel >> 15) ) * 1.0f;
- }
- p += dst_stride;
- }
-}
-
-
-static void
-a1r5g5b5_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, g, b, a;
- r = float_to_ubyte(pRow[0]);
- g = float_to_ubyte(pRow[1]);
- b = float_to_ubyte(pRow[2]);
- a = float_to_ubyte(pRow[3]);
- r = r >> 3; /* 5 bits */
- g = g >> 3; /* 5 bits */
- b = b >> 3; /* 5 bits */
- a = a >> 7; /* 1 bit */
- *dst++ = (a << 15) | (r << 10) | (g << 5) | b;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_B4G4R4A4_UNORM ***/
-
-static void
-a4r4g4b4_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const ushort pixel = *src++;
- pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f);
- pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f);
- pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f);
- pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-a4r4g4b4_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, g, b, a;
- r = float_to_ubyte(pRow[0]);
- g = float_to_ubyte(pRow[1]);
- b = float_to_ubyte(pRow[2]);
- a = float_to_ubyte(pRow[3]);
- r >>= 4;
- g >>= 4;
- b >>= 4;
- a >>= 4;
- *dst++ = (a << 12) | (r << 8) | (g << 4) | b;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_B5G6R5_UNORM ***/
-
-static void
-r5g6b5_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const ushort pixel = *src++;
- pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f);
- pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f);
- pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f);
- pRow[3] = 1.0f;
- }
- p += dst_stride;
- }
-}
-
-
-static void
-r5g6b5_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0);
- uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0);
- uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0);
- *dst++ = (r << 11) | (g << 5) | (b);
- }
- p += src_stride;
- }
-}
-
-
-
-/*** PIPE_FORMAT_R8G8B8_UNORM ***/
-
-static void
-r8g8b8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- pRow[0] = ubyte_to_float(src[0]);
- pRow[1] = ubyte_to_float(src[1]);
- pRow[2] = ubyte_to_float(src[2]);
- pRow[3] = 1.0f;
- src += 3;
- }
- p += dst_stride;
- }
-}
-
-
-static void
-r8g8b8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- dst[0] = float_to_ubyte(pRow[0]);
- dst[1] = float_to_ubyte(pRow[1]);
- dst[2] = float_to_ubyte(pRow[2]);
- dst += 3;
- }
- p += src_stride;
- }
-}
-
-
-
/*** PIPE_FORMAT_Z16_UNORM ***/
/**
@@ -518,448 +137,6 @@ z16_get_tile_rgba(const ushort *src,
-/*** PIPE_FORMAT_L8_UNORM ***/
-
-static void
-l8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] = ubyte_to_float(*src);
- pRow[3] = 1.0;
- }
- p += dst_stride;
- }
-}
-
-
-static void
-l8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r;
- r = float_to_ubyte(pRow[0]);
- *dst++ = (ubyte) r;
- }
- p += src_stride;
- }
-}
-
-
-
-/*** PIPE_FORMAT_A8_UNORM ***/
-
-static void
-a8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] = 0.0;
- pRow[3] = ubyte_to_float(*src);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-a8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned a;
- a = float_to_ubyte(pRow[3]);
- *dst++ = (ubyte) a;
- }
- p += src_stride;
- }
-}
-
-
-
-/*** PIPE_FORMAT_R16_SNORM ***/
-
-static void
-r16_get_tile_rgba(const short *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src++, pRow += 4) {
- pRow[0] = SHORT_TO_FLOAT(src[0]);
- pRow[1] =
- pRow[2] = 0.0;
- pRow[3] = 1.0;
- }
- p += dst_stride;
- }
-}
-
-
-static void
-r16_put_tile_rgba(short *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, dst++, pRow += 4) {
- UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]);
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/
-
-static void
-r16g16b16a16_get_tile_rgba(const short *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src += 4, pRow += 4) {
- pRow[0] = SHORT_TO_FLOAT(src[0]);
- pRow[1] = SHORT_TO_FLOAT(src[1]);
- pRow[2] = SHORT_TO_FLOAT(src[2]);
- pRow[3] = SHORT_TO_FLOAT(src[3]);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-r16g16b16a16_put_tile_rgba(short *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, dst += 4, pRow += 4) {
- UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]);
- UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]);
- UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]);
- UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]);
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_A8B8G8R8_SRGB ***/
-
-/**
- * Convert an 8-bit sRGB value from non-linear space to a
- * linear RGB value in [0, 1].
- * Implemented with a 256-entry lookup table.
- */
-static INLINE float
-srgb_to_linear(ubyte cs8)
-{
- static float table[256];
- static boolean tableReady = FALSE;
- if (!tableReady) {
- /* compute lookup table now */
- uint i;
- for (i = 0; i < 256; i++) {
- const float cs = ubyte_to_float(i);
- if (cs <= 0.04045) {
- table[i] = cs / 12.92f;
- }
- else {
- table[i] = (float) powf((cs + 0.055) / 1.055, 2.4);
- }
- }
- tableReady = TRUE;
- }
- return table[cs8];
-}
-
-
-/**
- * Convert linear float in [0,1] to an srgb ubyte value in [0,255].
- * XXX this hasn't been tested (render to srgb surface).
- * XXX this needs optimization.
- */
-static INLINE ubyte
-linear_to_srgb(float cl)
-{
- if (cl >= 1.0F)
- return 255;
- else if (cl >= 0.0031308F)
- return float_to_ubyte(1.055F * powf(cl, 0.41666F) - 0.055F);
- else if (cl > 0.0F)
- return float_to_ubyte(12.92F * cl);
- else
- return 0.0;
-}
-
-
-static void
-a8r8g8b8_srgb_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const unsigned pixel = *src++;
- pRow[0] = srgb_to_linear((pixel >> 16) & 0xff);
- pRow[1] = srgb_to_linear((pixel >> 8) & 0xff);
- pRow[2] = srgb_to_linear((pixel >> 0) & 0xff);
- pRow[3] = ubyte_to_float((pixel >> 24) & 0xff);
- }
- p += dst_stride;
- }
-}
-
-static void
-a8r8g8b8_srgb_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, g, b, a;
- r = linear_to_srgb(pRow[0]);
- g = linear_to_srgb(pRow[1]);
- b = linear_to_srgb(pRow[2]);
- a = float_to_ubyte(pRow[3]);
- *dst++ = (a << 24) | (r << 16) | (g << 8) | b;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_L8A8_SRGB ***/
-
-static void
-a8l8_srgb_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- ushort p = *src++;
- pRow[0] =
- pRow[1] =
- pRow[2] = srgb_to_linear(p & 0xff);
- pRow[3] = ubyte_to_float(p >> 8);
- }
- p += dst_stride;
- }
-}
-
-static void
-a8l8_srgb_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, a;
- r = linear_to_srgb(pRow[0]);
- a = float_to_ubyte(pRow[3]);
- *dst++ = (a << 8) | r;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_L8_SRGB ***/
-
-static void
-l8_srgb_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] = srgb_to_linear(*src);
- pRow[3] = 1.0;
- }
- p += dst_stride;
- }
-}
-
-static void
-l8_srgb_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r;
- r = linear_to_srgb(pRow[0]);
- *dst++ = (ubyte) r;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_I8_UNORM ***/
-
-static void
-i8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] =
- pRow[3] = ubyte_to_float(*src);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-i8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r;
- r = float_to_ubyte(pRow[0]);
- *dst++ = (ubyte) r;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_L8A8_UNORM ***/
-
-static void
-a8l8_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- ushort p = *src++;
- pRow[0] =
- pRow[1] =
- pRow[2] = ubyte_to_float(p & 0xff);
- pRow[3] = ubyte_to_float(p >> 8);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-a8l8_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, a;
- r = float_to_ubyte(pRow[0]);
- a = float_to_ubyte(pRow[3]);
- *dst++ = (a << 8) | r;
- }
- p += src_stride;
- }
-}
-
-
-
-
/*** PIPE_FORMAT_Z32_UNORM ***/
/**
@@ -987,7 +164,7 @@ z32_get_tile_rgba(const unsigned *src,
}
-/*** PIPE_FORMAT_Z24S8_UNORM ***/
+/*** PIPE_FORMAT_Z24_UNORM_S8_USCALED ***/
/**
* Return Z component as four float in [0,1]. Stencil part ignored.
@@ -1014,7 +191,7 @@ s8z24_get_tile_rgba(const unsigned *src,
}
-/*** PIPE_FORMAT_S8Z24_UNORM ***/
+/*** PIPE_FORMAT_S8_USCALED_Z24_UNORM ***/
/**
* Return Z component as four float in [0,1]. Stencil part ignored.
@@ -1067,94 +244,6 @@ z32f_get_tile_rgba(const float *src,
}
-/*** PIPE_FORMAT_UYVY / PIPE_FORMAT_YUYV ***/
-
-/**
- * Convert YCbCr (or YCrCb) to RGBA.
- */
-static void
-ycbcr_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride,
- boolean rev)
-{
- const float scale = 1.0f / 255.0f;
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- /* do two texels at a time */
- for (j = 0; j < (w & ~1); j += 2, src += 2) {
- const ushort t0 = src[0];
- const ushort t1 = src[1];
- const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */
- const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */
- ubyte cb, cr;
- float r, g, b;
-
- if (rev) {
- cb = t1 & 0xff; /* chroma U */
- cr = t0 & 0xff; /* chroma V */
- }
- else {
- cb = t0 & 0xff; /* chroma U */
- cr = t1 & 0xff; /* chroma V */
- }
-
- /* even pixel: y0,cr,cb */
- r = 1.164f * (y0-16) + 1.596f * (cr-128);
- g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
- b = 1.164f * (y0-16) + 2.018f * (cb-128);
- pRow[0] = r * scale;
- pRow[1] = g * scale;
- pRow[2] = b * scale;
- pRow[3] = 1.0f;
- pRow += 4;
-
- /* odd pixel: use y1,cr,cb */
- r = 1.164f * (y1-16) + 1.596f * (cr-128);
- g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
- b = 1.164f * (y1-16) + 2.018f * (cb-128);
- pRow[0] = r * scale;
- pRow[1] = g * scale;
- pRow[2] = b * scale;
- pRow[3] = 1.0f;
- pRow += 4;
-
- }
- /* do the last texel */
- if (w & 1) {
- const ushort t0 = src[0];
- const ushort t1 = src[1];
- const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */
- ubyte cb, cr;
- float r, g, b;
-
- if (rev) {
- cb = t1 & 0xff; /* chroma U */
- cr = t0 & 0xff; /* chroma V */
- }
- else {
- cb = t0 & 0xff; /* chroma U */
- cr = t1 & 0xff; /* chroma V */
- }
-
- /* even pixel: y0,cr,cb */
- r = 1.164f * (y0-16) + 1.596f * (cr-128);
- g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
- b = 1.164f * (y0-16) + 2.018f * (cb-128);
- pRow[0] = r * scale;
- pRow[1] = g * scale;
- pRow[2] = b * scale;
- pRow[3] = 1.0f;
- pRow += 4;
- }
- p += dst_stride;
- }
-}
-
-
void
pipe_tile_raw_to_rgba(enum pipe_format format,
void *src,
@@ -1162,80 +251,23 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
float *dst, unsigned dst_stride)
{
switch (format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_A8B8G8R8_UNORM:
- r8g8b8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_B4G4R4A4_UNORM:
- a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_B5G6R5_UNORM:
- r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_R8G8B8_UNORM:
- r8g8b8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_L8_UNORM:
- l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_A8_UNORM:
- a8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_I8_UNORM:
- i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_L8A8_UNORM:
- a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_R16_SNORM:
- r16_get_tile_rgba((short *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_B8G8R8A8_SRGB:
- a8r8g8b8_srgb_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_L8A8_SRGB:
- a8l8_srgb_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
- break;
- case PIPE_FORMAT_L8_SRGB:
- l8_srgb_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
- break;
case PIPE_FORMAT_Z16_UNORM:
z16_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
break;
case PIPE_FORMAT_Z32_UNORM:
z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z24X8_UNORM:
s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
case PIPE_FORMAT_Z32_FLOAT:
z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride);
break;
- case PIPE_FORMAT_UYVY:
- ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE);
- break;
- case PIPE_FORMAT_YUYV:
- ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE);
- break;
default:
util_format_read_4f(format,
dst, dst_stride * sizeof(float),
@@ -1246,15 +278,16 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
void
-pipe_get_tile_rgba(struct pipe_transfer *pt,
+pipe_get_tile_rgba(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
float *p)
{
unsigned dst_stride = w * 4;
void *packed;
- enum pipe_format format = pt->texture->format;
+ enum pipe_format format = pt->resource->format;
- if (pipe_clip_tile(x, y, &w, &h, pt))
+ if (u_clip_tile(x, y, &w, &h, &pt->box))
return;
packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format));
@@ -1265,24 +298,94 @@ pipe_get_tile_rgba(struct pipe_transfer *pt,
if(format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV)
assert((x & 1) == 0);
- pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
+ pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0);
+
+ pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride);
+
+ FREE(packed);
+}
+
+
+void
+pipe_get_tile_swizzle(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
+ uint x,
+ uint y,
+ uint w,
+ uint h,
+ uint swizzle_r,
+ uint swizzle_g,
+ uint swizzle_b,
+ uint swizzle_a,
+ enum pipe_format format,
+ float *p)
+{
+ unsigned dst_stride = w * 4;
+ void *packed;
+ uint iy;
+ float rgba01[6];
+
+ if (u_clip_tile(x, y, &w, &h, &pt->box)) {
+ return;
+ }
+
+ packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format));
+ if (!packed) {
+ return;
+ }
+
+ if (format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV) {
+ assert((x & 1) == 0);
+ }
+
+ pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0);
pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride);
FREE(packed);
+
+ if (swizzle_r == PIPE_SWIZZLE_RED &&
+ swizzle_g == PIPE_SWIZZLE_GREEN &&
+ swizzle_b == PIPE_SWIZZLE_BLUE &&
+ swizzle_a == PIPE_SWIZZLE_ALPHA) {
+ /* no-op, skip */
+ return;
+ }
+
+ rgba01[PIPE_SWIZZLE_ZERO] = 0.0f;
+ rgba01[PIPE_SWIZZLE_ONE] = 1.0f;
+
+ for (iy = 0; iy < h; iy++) {
+ float *row = p;
+ uint ix;
+
+ for (ix = 0; ix < w; ix++) {
+ rgba01[PIPE_SWIZZLE_RED] = row[0];
+ rgba01[PIPE_SWIZZLE_GREEN] = row[1];
+ rgba01[PIPE_SWIZZLE_BLUE] = row[2];
+ rgba01[PIPE_SWIZZLE_ALPHA] = row[3];
+
+ *row++ = rgba01[swizzle_r];
+ *row++ = rgba01[swizzle_g];
+ *row++ = rgba01[swizzle_b];
+ *row++ = rgba01[swizzle_a];
+ }
+ p += dst_stride;
+ }
}
void
-pipe_put_tile_rgba(struct pipe_transfer *pt,
+pipe_put_tile_rgba(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const float *p)
{
unsigned src_stride = w * 4;
void *packed;
- enum pipe_format format = pt->texture->format;
+ enum pipe_format format = pt->resource->format;
- if (pipe_clip_tile(x, y, &w, &h, pt))
+ if (u_clip_tile(x, y, &w, &h, &pt->box))
return;
packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format));
@@ -1291,68 +394,17 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
return;
switch (format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_A8B8G8R8_UNORM:
- r8g8b8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_B5G6R5_UNORM:
- r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_R8G8B8_UNORM:
- r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_B4G4R4A4_UNORM:
- a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_L8_UNORM:
- l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_A8_UNORM:
- a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_I8_UNORM:
- i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_L8A8_UNORM:
- a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_R16_SNORM:
- r16_put_tile_rgba((short *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_B8G8R8A8_SRGB:
- a8r8g8b8_srgb_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_L8A8_SRGB:
- a8l8_srgb_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_L8_SRGB:
- l8_srgb_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
- break;
case PIPE_FORMAT_Z16_UNORM:
/*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/
break;
case PIPE_FORMAT_Z32_UNORM:
/*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
break;
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z24X8_UNORM:
/*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
break;
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
/*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
break;
@@ -1363,7 +415,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
0, 0, w, h);
}
- pipe_put_tile_raw(pt, x, y, w, h, packed, 0);
+ pipe_put_tile_raw(pipe, pt, x, y, w, h, packed, 0);
FREE(packed);
}
@@ -1373,21 +425,21 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
* Get a block of Z values, converted to 32-bit range.
*/
void
-pipe_get_tile_z(struct pipe_transfer *pt,
+pipe_get_tile_z(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
uint *z)
{
- struct pipe_screen *screen = pt->texture->screen;
const uint dstStride = w;
ubyte *map;
uint *pDest = z;
uint i, j;
- enum pipe_format format = pt->texture->format;
+ enum pipe_format format = pt->resource->format;
- if (pipe_clip_tile(x, y, &w, &h, pt))
+ if (u_clip_tile(x, y, &w, &h, &pt->box))
return;
- map = (ubyte *)screen->transfer_map(screen, pt);
+ map = (ubyte *)pipe->transfer_map(pipe, pt);
if (!map) {
assert(0);
return;
@@ -1405,7 +457,7 @@ pipe_get_tile_z(struct pipe_transfer *pt,
}
}
break;
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z24X8_UNORM:
{
const uint *ptrc
@@ -1420,7 +472,7 @@ pipe_get_tile_z(struct pipe_transfer *pt,
}
}
break;
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
{
const uint *ptrc
@@ -1453,26 +505,26 @@ pipe_get_tile_z(struct pipe_transfer *pt,
assert(0);
}
- screen->transfer_unmap(screen, pt);
+ pipe->transfer_unmap(pipe, pt);
}
void
-pipe_put_tile_z(struct pipe_transfer *pt,
+pipe_put_tile_z(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const uint *zSrc)
{
- struct pipe_screen *screen = pt->texture->screen;
const uint srcStride = w;
const uint *ptrc = zSrc;
ubyte *map;
uint i, j;
- enum pipe_format format = pt->texture->format;
+ enum pipe_format format = pt->resource->format;
- if (pipe_clip_tile(x, y, &w, &h, pt))
+ if (u_clip_tile(x, y, &w, &h, &pt->box))
return;
- map = (ubyte *)screen->transfer_map(screen, pt);
+ map = (ubyte *)pipe->transfer_map(pipe, pt);
if (!map) {
assert(0);
return;
@@ -1489,10 +541,10 @@ pipe_put_tile_z(struct pipe_transfer *pt,
}
}
break;
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
{
uint *pDest = (uint *) (map + y * pt->stride + x*4);
- assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);
+ /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 32-bit Z to 24-bit Z, preserve stencil */
@@ -1516,10 +568,10 @@ pipe_put_tile_z(struct pipe_transfer *pt,
}
}
break;
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
{
uint *pDest = (uint *) (map + y * pt->stride + x*4);
- assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);
+ /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 32-bit Z to 24-bit Z, preserve stencil */
@@ -1560,7 +612,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
assert(0);
}
- screen->transfer_unmap(screen, pt);
+ pipe->transfer_unmap(pipe, pt);
}
diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
index 1453af38b8a..986eee07435 100644
--- a/src/gallium/auxiliary/util/u_tile.h
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -32,22 +32,24 @@
struct pipe_transfer;
-
/**
* Clip tile against transfer dims.
+ *
+ * XXX: this only clips width and height!
+ *
* \return TRUE if tile is totally clipped, FALSE otherwise
*/
static INLINE boolean
-pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_transfer *pt)
+u_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_box *box)
{
- if (x >= pt->width)
+ if (x >= box->width)
return TRUE;
- if (y >= pt->height)
+ if (y >= box->height)
return TRUE;
- if (x + *w > pt->width)
- *w = pt->width - x;
- if (y + *h > pt->height)
- *h = pt->height - y;
+ if (x + *w > box->width)
+ *w = box->width - x;
+ if (y + *h > box->height)
+ *h = box->height - y;
return FALSE;
}
@@ -56,34 +58,54 @@ extern "C" {
#endif
void
-pipe_get_tile_raw(struct pipe_transfer *pt,
+pipe_get_tile_raw(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
void *p, int dst_stride);
void
-pipe_put_tile_raw(struct pipe_transfer *pt,
+pipe_put_tile_raw(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const void *p, int src_stride);
void
-pipe_get_tile_rgba(struct pipe_transfer *pt,
+pipe_get_tile_rgba(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
float *p);
void
-pipe_put_tile_rgba(struct pipe_transfer *pt,
+pipe_get_tile_swizzle(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
+ uint x,
+ uint y,
+ uint w,
+ uint h,
+ uint swizzle_r,
+ uint swizzle_g,
+ uint swizzle_b,
+ uint swizzle_a,
+ enum pipe_format format,
+ float *p);
+
+void
+pipe_put_tile_rgba(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const float *p);
void
-pipe_get_tile_z(struct pipe_transfer *pt,
+pipe_get_tile_z(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
uint *z);
void
-pipe_put_tile_z(struct pipe_transfer *pt,
+pipe_put_tile_z(struct pipe_context *pipe,
+ struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const uint *z);
diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c
deleted file mode 100644
index d88298bc14c..00000000000
--- a/src/gallium/auxiliary/util/u_timed_winsys.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-/*
- * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com>
- */
-
-#include "pipe/p_state.h"
-#include "util/u_simple_screen.h"
-#include "u_timed_winsys.h"
-#include "util/u_memory.h"
-#include "os/os_time.h"
-
-
-struct timed_winsys {
- struct pipe_winsys base;
- struct pipe_winsys *backend;
- uint64_t last_dump;
- struct {
- const char *name_key;
- double total;
- unsigned calls;
- } funcs[13];
-};
-
-
-static struct timed_winsys *timed_winsys( struct pipe_winsys *winsys )
-{
- return (struct timed_winsys *)winsys;
-}
-
-
-static void time_display( struct pipe_winsys *winsys )
-{
- struct timed_winsys *tws = timed_winsys(winsys);
- unsigned i;
- double overall = 0;
-
- for (i = 0; i < Elements(tws->funcs); i++) {
- if (tws->funcs[i].name_key) {
- debug_printf("*** %-25s %5.3fms (%d calls, avg %.3fms)\n",
- tws->funcs[i].name_key,
- tws->funcs[i].total,
- tws->funcs[i].calls,
- tws->funcs[i].total / tws->funcs[i].calls);
- overall += tws->funcs[i].total;
- tws->funcs[i].calls = 0;
- tws->funcs[i].total = 0;
- }
- }
-
- debug_printf("*** %-25s %5.3fms\n",
- "OVERALL WINSYS",
- overall);
-}
-
-static void time_finish( struct pipe_winsys *winsys,
- long long startval,
- unsigned idx,
- const char *name )
-{
- struct timed_winsys *tws = timed_winsys(winsys);
- int64_t endval = os_time_get();
- double elapsed = (endval - startval)/1000.0;
-
- if (endval - startval > 1000LL)
- debug_printf("*** %s %.3f\n", name, elapsed );
-
- assert( tws->funcs[idx].name_key == name ||
- tws->funcs[idx].name_key == NULL);
-
- tws->funcs[idx].name_key = name;
- tws->funcs[idx].total += elapsed;
- tws->funcs[idx].calls++;
-
- if (endval - tws->last_dump > 10LL * 1000LL * 1000LL) {
- time_display( winsys );
- tws->last_dump = endval;
- }
-}
-
-
-/* Pipe has no concept of pools, but the psb driver passes a flag that
- * can be mapped onto pools in the backend.
- */
-static struct pipe_buffer *
-timed_buffer_create(struct pipe_winsys *winsys,
- unsigned alignment,
- unsigned usage,
- unsigned size )
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- struct pipe_buffer *buf =
- backend->buffer_create( backend, alignment, usage, size );
-
- time_finish(winsys, start, 0, __FUNCTION__);
-
- return buf;
-}
-
-
-
-
-static struct pipe_buffer *
-timed_user_buffer_create(struct pipe_winsys *winsys,
- void *data,
- unsigned bytes)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- struct pipe_buffer *buf = backend->user_buffer_create( backend, data, bytes );
-
- time_finish(winsys, start, 1, __FUNCTION__);
-
- return buf;
-}
-
-
-static void *
-timed_buffer_map(struct pipe_winsys *winsys,
- struct pipe_buffer *buf,
- unsigned flags)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- void *map = backend->buffer_map( backend, buf, flags );
-
- time_finish(winsys, start, 2, __FUNCTION__);
-
- return map;
-}
-
-
-static void
-timed_buffer_unmap(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- backend->buffer_unmap( backend, buf );
-
- time_finish(winsys, start, 3, __FUNCTION__);
-}
-
-
-static void
-timed_buffer_destroy(struct pipe_buffer *buf)
-{
- struct pipe_winsys *winsys = buf->screen->winsys;
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- backend->buffer_destroy( buf );
-
- time_finish(winsys, start, 4, __FUNCTION__);
-}
-
-
-static void
-timed_flush_frontbuffer( struct pipe_winsys *winsys,
- struct pipe_surface *surf,
- void *context_private)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- backend->flush_frontbuffer( backend, surf, context_private );
-
- time_finish(winsys, start, 5, __FUNCTION__);
-}
-
-
-
-
-static struct pipe_buffer *
-timed_surface_buffer_create(struct pipe_winsys *winsys,
- unsigned width, unsigned height,
- enum pipe_format format,
- unsigned usage,
- unsigned tex_usage,
- unsigned *stride)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height,
- format, usage, tex_usage, stride );
-
- time_finish(winsys, start, 7, __FUNCTION__);
-
- return ret;
-}
-
-
-static const char *
-timed_get_name( struct pipe_winsys *winsys )
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- const char *ret = backend->get_name( backend );
-
- time_finish(winsys, start, 9, __FUNCTION__);
-
- return ret;
-}
-
-static void
-timed_fence_reference(struct pipe_winsys *winsys,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *fence)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- backend->fence_reference( backend, ptr, fence );
-
- time_finish(winsys, start, 10, __FUNCTION__);
-}
-
-
-static int
-timed_fence_signalled( struct pipe_winsys *winsys,
- struct pipe_fence_handle *fence,
- unsigned flag )
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- int ret = backend->fence_signalled( backend, fence, flag );
-
- time_finish(winsys, start, 11, __FUNCTION__);
-
- return ret;
-}
-
-static int
-timed_fence_finish( struct pipe_winsys *winsys,
- struct pipe_fence_handle *fence,
- unsigned flag )
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- int64_t start = os_time_get();
-
- int ret = backend->fence_finish( backend, fence, flag );
-
- time_finish(winsys, start, 12, __FUNCTION__);
-
- return ret;
-}
-
-static void
-timed_winsys_destroy( struct pipe_winsys *winsys )
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- backend->destroy( backend );
- FREE(winsys);
-}
-
-
-
-struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend )
-{
- struct timed_winsys *ws = CALLOC_STRUCT(timed_winsys);
-
- ws->base.user_buffer_create = timed_user_buffer_create;
- ws->base.buffer_map = timed_buffer_map;
- ws->base.buffer_unmap = timed_buffer_unmap;
- ws->base.buffer_destroy = timed_buffer_destroy;
- ws->base.buffer_create = timed_buffer_create;
- ws->base.surface_buffer_create = timed_surface_buffer_create;
- ws->base.flush_frontbuffer = timed_flush_frontbuffer;
- ws->base.get_name = timed_get_name;
- ws->base.fence_reference = timed_fence_reference;
- ws->base.fence_signalled = timed_fence_signalled;
- ws->base.fence_finish = timed_fence_finish;
- ws->base.destroy = timed_winsys_destroy;
-
- ws->backend = backend;
-
- return &ws->base;
-}
-
diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c
new file mode 100644
index 00000000000..bedace3b1dc
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_transfer.c
@@ -0,0 +1,110 @@
+#include "pipe/p_context.h"
+#include "util/u_rect.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+#include "util/u_memory.h"
+
+/* One-shot transfer operation with data supplied in a user
+ * pointer. XXX: strides??
+ */
+void u_default_transfer_inline_write( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned slice_stride)
+{
+ struct pipe_transfer *transfer = NULL;
+ uint8_t *map = NULL;
+
+ transfer = pipe->get_transfer(pipe,
+ resource,
+ sr,
+ usage,
+ box );
+ if (transfer == NULL)
+ goto out;
+
+ map = pipe_transfer_map(pipe, transfer);
+ if (map == NULL)
+ goto out;
+
+ assert(box->depth == 1); /* XXX: fix me */
+
+ util_copy_rect(map,
+ resource->format,
+ transfer->stride, /* bytes? */
+ 0, 0,
+ box->width,
+ box->height,
+ data,
+ box->width, /* bytes? texels? */
+ 0, 0);
+
+out:
+ if (map)
+ pipe_transfer_unmap(pipe, transfer);
+
+ if (transfer)
+ pipe_transfer_destroy(pipe, transfer);
+}
+
+
+boolean u_default_resource_get_handle(struct pipe_screen *screen,
+ struct pipe_resource *resource,
+ struct winsys_handle *handle)
+{
+ return FALSE;
+}
+
+
+
+void u_default_transfer_flush_region( struct pipe_context *pipe,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ /* This is a no-op implementation, nothing to do.
+ */
+}
+
+unsigned u_default_is_resource_referenced( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned face, unsigned level)
+{
+ return 0;
+}
+
+struct pipe_transfer * u_default_get_transfer(struct pipe_context *context,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer);
+ if (transfer == NULL)
+ return NULL;
+
+ transfer->resource = resource;
+ transfer->sr = sr;
+ transfer->usage = usage;
+ transfer->box = *box;
+
+ /* Note strides are zero, this is ok for buffers, but not for
+ * textures 2d & higher at least.
+ */
+ return transfer;
+}
+
+void u_default_transfer_unmap( struct pipe_context *pipe,
+ struct pipe_transfer *transfer )
+{
+}
+
+void u_default_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ FREE(transfer);
+}
+
diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h
new file mode 100644
index 00000000000..eb07945d15f
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_transfer.h
@@ -0,0 +1,145 @@
+
+#ifndef U_TRANSFER_H
+#define U_TRANSFER_H
+
+/* Fallback implementations for inline read/writes which just go back
+ * to the regular transfer behaviour.
+ */
+#include "pipe/p_state.h"
+
+struct pipe_context;
+
+boolean u_default_resource_get_handle(struct pipe_screen *screen,
+ struct pipe_resource *resource,
+ struct winsys_handle *handle);
+
+void u_default_transfer_inline_write( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned slice_stride);
+
+void u_default_transfer_flush_region( struct pipe_context *pipe,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box);
+
+unsigned u_default_is_resource_referenced( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned face, unsigned level);
+
+struct pipe_transfer * u_default_get_transfer(struct pipe_context *context,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box);
+
+void u_default_transfer_unmap( struct pipe_context *pipe,
+ struct pipe_transfer *transfer );
+
+void u_default_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer);
+
+
+
+/* Useful helper to allow >1 implementation of resource functionality
+ * to exist in a single driver. This is intended to be transitionary!
+ */
+struct u_resource_vtbl {
+
+ boolean (*resource_get_handle)(struct pipe_screen *,
+ struct pipe_resource *tex,
+ struct winsys_handle *handle);
+
+ void (*resource_destroy)(struct pipe_screen *,
+ struct pipe_resource *pt);
+
+ unsigned (*is_resource_referenced)(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ unsigned face, unsigned level);
+
+ struct pipe_transfer *(*get_transfer)(struct pipe_context *,
+ struct pipe_resource *resource,
+ struct pipe_subresource,
+ unsigned usage,
+ const struct pipe_box *);
+
+ void (*transfer_destroy)(struct pipe_context *,
+ struct pipe_transfer *);
+
+ void *(*transfer_map)( struct pipe_context *,
+ struct pipe_transfer *transfer );
+
+ void (*transfer_flush_region)( struct pipe_context *,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *);
+
+ void (*transfer_unmap)( struct pipe_context *,
+ struct pipe_transfer *transfer );
+
+ void (*transfer_inline_write)( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned slice_stride);
+};
+
+
+struct u_resource {
+ struct pipe_resource b;
+ struct u_resource_vtbl *vtbl;
+};
+
+
+boolean u_resource_get_handle_vtbl(struct pipe_screen *screen,
+ struct pipe_resource *resource,
+ struct winsys_handle *handle);
+
+void u_resource_destroy_vtbl(struct pipe_screen *screen,
+ struct pipe_resource *resource);
+
+unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned face, unsigned level);
+
+struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box);
+
+void u_transfer_destroy_vtbl(struct pipe_context *pipe,
+ struct pipe_transfer *transfer);
+
+void *u_transfer_map_vtbl( struct pipe_context *pipe,
+ struct pipe_transfer *transfer );
+
+void u_transfer_flush_region_vtbl( struct pipe_context *pipe,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box);
+
+void u_transfer_unmap_vtbl( struct pipe_context *rm_ctx,
+ struct pipe_transfer *transfer );
+
+void u_transfer_inline_write_vtbl( struct pipe_context *rm_ctx,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned slice_stride);
+
+
+
+
+
+
+
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index 012b2ae2336..75d44432d9e 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -31,7 +31,7 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
-#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
#include "util/u_memory.h"
#include "util/u_math.h"
@@ -39,7 +39,7 @@
struct u_upload_mgr {
- struct pipe_screen *screen;
+ struct pipe_context *pipe;
unsigned default_size;
unsigned alignment;
@@ -47,21 +47,21 @@ struct u_upload_mgr {
/* The active buffer:
*/
- struct pipe_buffer *buffer;
+ struct pipe_resource *buffer;
unsigned size;
unsigned offset;
};
-struct u_upload_mgr *u_upload_create( struct pipe_screen *screen,
+struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
unsigned default_size,
unsigned alignment,
unsigned usage )
{
struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr );
+ upload->pipe = pipe;
upload->default_size = default_size;
- upload->screen = screen;
upload->alignment = alignment;
upload->usage = usage;
upload->buffer = NULL;
@@ -69,31 +69,44 @@ struct u_upload_mgr *u_upload_create( struct pipe_screen *screen,
return upload;
}
-
+/* Slightly specialized version of buffer_write designed to maximize
+ * chances of the driver consolidating successive writes into a single
+ * upload.
+ *
+ * dirty_size may be slightly greater than size to cope with
+ * alignment. We don't want to leave holes between succesively mapped
+ * regions as that may prevent the driver from consolidating uploads.
+ *
+ * Note that the 'data' pointer has probably come from the application
+ * and we cannot read even a byte past its end without risking
+ * segfaults, or at least complaints from valgrind..
+ */
static INLINE enum pipe_error
-my_buffer_write(struct pipe_screen *screen,
- struct pipe_buffer *buf,
+my_buffer_write(struct pipe_context *pipe,
+ struct pipe_resource *buf,
unsigned offset, unsigned size, unsigned dirty_size,
const void *data)
{
+ struct pipe_transfer *transfer = NULL;
uint8_t *map;
- assert(offset < buf->size);
- assert(offset + size <= buf->size);
+ assert(offset < buf->width0);
+ assert(offset + size <= buf->width0);
assert(dirty_size >= size);
assert(size);
- map = pipe_buffer_map_range(screen, buf, offset, size,
- PIPE_BUFFER_USAGE_CPU_WRITE |
- PIPE_BUFFER_USAGE_FLUSH_EXPLICIT |
- PIPE_BUFFER_USAGE_DISCARD |
- PIPE_BUFFER_USAGE_UNSYNCHRONIZED);
+ map = pipe_buffer_map_range(pipe, buf, offset, dirty_size,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_FLUSH_EXPLICIT |
+ PIPE_TRANSFER_DISCARD |
+ PIPE_TRANSFER_UNSYNCHRONIZED,
+ &transfer);
if (map == NULL)
return PIPE_ERROR_OUT_OF_MEMORY;
memcpy(map + offset, data, size);
- pipe_buffer_flush_mapped_range(screen, buf, offset, dirty_size);
- pipe_buffer_unmap(screen, buf);
+ pipe_buffer_flush_mapped_range(pipe, transfer, offset, dirty_size);
+ pipe_buffer_unmap(pipe, buf, transfer);
return PIPE_OK;
}
@@ -109,7 +122,7 @@ my_buffer_write(struct pipe_screen *screen,
*/
void u_upload_flush( struct u_upload_mgr *upload )
{
- pipe_buffer_reference( &upload->buffer, NULL );
+ pipe_resource_reference( &upload->buffer, NULL );
upload->size = 0;
}
@@ -135,9 +148,8 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
*/
size = align(MAX2(upload->default_size, min_size), 4096);
- upload->buffer = pipe_buffer_create( upload->screen,
- upload->alignment,
- upload->usage | PIPE_BUFFER_USAGE_CPU_WRITE,
+ upload->buffer = pipe_buffer_create( upload->pipe->screen,
+ upload->usage,
size );
if (upload->buffer == NULL)
goto fail;
@@ -149,7 +161,7 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
fail:
if (upload->buffer)
- pipe_buffer_reference( &upload->buffer, NULL );
+ pipe_resource_reference( &upload->buffer, NULL );
return PIPE_ERROR_OUT_OF_MEMORY;
}
@@ -159,7 +171,7 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
unsigned size,
const void *data,
unsigned *out_offset,
- struct pipe_buffer **outbuf )
+ struct pipe_resource **outbuf )
{
unsigned alloc_size = align( size, upload->alignment );
enum pipe_error ret = PIPE_OK;
@@ -172,7 +184,7 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
/* Copy the data, using map_range if available:
*/
- ret = my_buffer_write( upload->screen,
+ ret = my_buffer_write( upload->pipe,
upload->buffer,
upload->offset,
size,
@@ -183,7 +195,7 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
/* Emit the return values:
*/
- pipe_buffer_reference( outbuf, upload->buffer );
+ pipe_resource_reference( outbuf, upload->buffer );
*out_offset = upload->offset;
upload->offset += alloc_size;
return PIPE_OK;
@@ -198,15 +210,18 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
unsigned offset,
unsigned size,
- struct pipe_buffer *inbuf,
+ struct pipe_resource *inbuf,
unsigned *out_offset,
- struct pipe_buffer **outbuf )
+ struct pipe_resource **outbuf )
{
enum pipe_error ret = PIPE_OK;
+ struct pipe_transfer *transfer = NULL;
const char *map = NULL;
- map = (const char *)pipe_buffer_map(
- upload->screen, inbuf, PIPE_BUFFER_USAGE_CPU_READ );
+ map = (const char *)pipe_buffer_map(upload->pipe,
+ inbuf,
+ PIPE_TRANSFER_READ,
+ &transfer);
if (map == NULL) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
@@ -226,7 +241,7 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
done:
if (map)
- pipe_buffer_unmap( upload->screen, inbuf );
+ pipe_buffer_unmap( upload->pipe, inbuf, transfer );
return ret;
}
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index e158bed9d04..a124924fc80 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -35,11 +35,11 @@
#include "pipe/p_defines.h"
struct pipe_screen;
-struct pipe_buffer;
+struct pipe_resource;
struct u_upload_mgr;
-struct u_upload_mgr *u_upload_create( struct pipe_screen *screen,
+struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
unsigned default_size,
unsigned alignment,
unsigned usage );
@@ -61,15 +61,15 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
unsigned size,
const void *data,
unsigned *out_offset,
- struct pipe_buffer **outbuf );
+ struct pipe_resource **outbuf );
enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
unsigned offset,
unsigned size,
- struct pipe_buffer *inbuf,
+ struct pipe_resource *inbuf,
unsigned *out_offset,
- struct pipe_buffer **outbuf );
+ struct pipe_resource **outbuf );
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index b11f45fe5f5..8f21eb68d64 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -194,6 +194,7 @@ static bool
init_buffers(struct vl_compositor *c)
{
struct fragment_shader_consts fsc;
+ struct pipe_vertex_element vertex_elems[2];
assert(c);
@@ -203,34 +204,33 @@ init_buffers(struct vl_compositor *c)
c->vertex_buf.stride = sizeof(struct vertex4f);
c->vertex_buf.max_index = (VL_COMPOSITOR_MAX_LAYERS + 2) * 6 - 1;
c->vertex_buf.buffer_offset = 0;
+ /* XXX: Create with DYNAMIC or STREAM */
c->vertex_buf.buffer = pipe_buffer_create
(
c->pipe->screen,
- 1,
- PIPE_BUFFER_USAGE_VERTEX,
+ PIPE_BIND_VERTEX_BUFFER,
sizeof(struct vertex4f) * (VL_COMPOSITOR_MAX_LAYERS + 2) * 6
);
- c->vertex_elems[0].src_offset = 0;
- c->vertex_elems[0].instance_divisor = 0;
- c->vertex_elems[0].vertex_buffer_index = 0;
- c->vertex_elems[0].nr_components = 2;
- c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
- c->vertex_elems[1].src_offset = sizeof(struct vertex2f);
- c->vertex_elems[1].instance_divisor = 0;
- c->vertex_elems[1].vertex_buffer_index = 0;
- c->vertex_elems[1].nr_components = 2;
- c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[0].src_offset = 0;
+ vertex_elems[0].instance_divisor = 0;
+ vertex_elems[0].vertex_buffer_index = 0;
+ vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[1].src_offset = sizeof(struct vertex2f);
+ vertex_elems[1].instance_divisor = 0;
+ vertex_elems[1].vertex_buffer_index = 0;
+ vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ c->vertex_elems_state = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
/*
* Create our fragment shader's constant buffer
* Const buffer contains the color conversion matrix and bias vectors
*/
+ /* XXX: Create with IMMUTABLE/STATIC... although it does change every once in a long while... */
c->fs_const_buf = pipe_buffer_create
(
c->pipe->screen,
- 1,
- PIPE_BUFFER_USAGE_CONSTANT,
+ PIPE_BIND_CONSTANT_BUFFER,
sizeof(struct fragment_shader_consts)
);
@@ -246,8 +246,9 @@ cleanup_buffers(struct vl_compositor *c)
{
assert(c);
- pipe_buffer_reference(&c->vertex_buf.buffer, NULL);
- pipe_buffer_reference(&c->fs_const_buf, NULL);
+ c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems_state);
+ pipe_resource_reference(&c->vertex_buf.buffer, NULL);
+ pipe_resource_reference(&c->fs_const_buf, NULL);
}
bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
@@ -391,6 +392,7 @@ static unsigned gen_data(struct vl_compositor *c,
struct pipe_surface **textures)
{
void *vb;
+ struct pipe_transfer *buf_transfer;
unsigned num_rects = 0;
unsigned i;
@@ -400,8 +402,9 @@ static unsigned gen_data(struct vl_compositor *c,
assert(dst_rect);
assert(textures);
- vb = pipe_buffer_map(c->pipe->screen, c->vertex_buf.buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD);
+ vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ &buf_transfer);
if (!vb)
return 0;
@@ -434,7 +437,7 @@ static unsigned gen_data(struct vl_compositor *c,
}
}
- pipe_buffer_unmap(c->pipe->screen, c->vertex_buf.buffer);
+ pipe_buffer_unmap(c->pipe, c->vertex_buf.buffer, buf_transfer);
return num_rects;
}
@@ -456,7 +459,7 @@ static void draw_layers(struct vl_compositor *c,
num_rects = gen_data(c, src_surface, src_rect, dst_rect, src_surfaces);
for (i = 0; i < num_rects; ++i) {
- c->pipe->set_fragment_sampler_textures(c->pipe, 1, &src_surfaces[i]->texture);
+ //c->pipe->set_fragment_sampler_views(c->pipe, 1, &src_surfaces[i]->texture);
c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
}
}
@@ -506,7 +509,7 @@ void vl_compositor_render(struct vl_compositor *compositor,
compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
compositor->pipe->set_vertex_buffers(compositor->pipe, 1, &compositor->vertex_buf);
- compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
+ compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems_state);
compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
draw_layers(compositor, src_surface, src_area, dst_area);
@@ -517,15 +520,19 @@ void vl_compositor_render(struct vl_compositor *compositor,
void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat)
{
+ struct pipe_transfer *buf_transfer;
+
assert(compositor);
memcpy
(
- pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf,
- PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD),
+ pipe_buffer_map(compositor->pipe, compositor->fs_const_buf,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ &buf_transfer),
mat,
sizeof(struct fragment_shader_consts)
);
- pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf);
+ pipe_buffer_unmap(compositor->pipe, compositor->fs_const_buf,
+ buf_transfer);
}
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 3b1e809b868..4b0cdd6be5f 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -44,12 +44,13 @@ struct vl_compositor
struct pipe_framebuffer_state fb_state;
struct vertex2f fb_inv_size;
void *sampler;
+ struct pipe_sampler_view *sampler_view;
void *vertex_shader;
void *fragment_shader;
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buf;
- struct pipe_vertex_element vertex_elems[2];
- struct pipe_buffer *fs_const_buf;
+ void *vertex_elems_state;
+ struct pipe_resource *fs_const_buf;
struct pipe_surface *bg;
struct pipe_video_rect bg_src_rect;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 769ee38c46b..a4a9320d667 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -32,6 +32,7 @@
#include <util/u_format.h>
#include <util/u_math.h>
#include <util/u_memory.h>
+#include <util/u_sampler.h>
#include <tgsi/tgsi_ureg.h>
#define DEFAULT_BUF_ALIGNMENT 1
@@ -378,14 +379,23 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
assert(r);
for (i = 0; i < 3; ++i) {
- r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
+ struct pipe_box rect =
+ {
+ 0, 0, 0,
+ r->textures.all[i]->width0,
+ r->textures.all[i]->height0,
+ 0
+ };
+
+ r->tex_transfer[i] = r->pipe->get_transfer
(
- r->pipe->screen, r->textures.all[i],
- 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
- r->textures.all[i]->width0, r->textures.all[i]->height0
+ r->pipe, r->textures.all[i],
+ u_subresource(0, 0),
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ &rect
);
- r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
+ r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
}
}
@@ -397,8 +407,8 @@ xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
assert(r);
for (i = 0; i < 3; ++i) {
- r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
- r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
+ r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
+ r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
}
}
@@ -509,7 +519,9 @@ cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
static bool
init_buffers(struct vl_mpeg12_mc_renderer *r)
{
- struct pipe_texture template;
+ struct pipe_resource template;
+ struct pipe_vertex_element vertex_elems[8];
+ struct pipe_sampler_view sampler_view;
const unsigned mbw =
align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
@@ -525,7 +537,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
r->num_macroblocks = 0;
r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
- memset(&template, 0, sizeof(struct pipe_texture));
+ memset(&template, 0, sizeof(struct pipe_resource));
template.target = PIPE_TEXTURE_2D;
/* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
template.format = PIPE_FORMAT_R16_SNORM;
@@ -535,9 +547,11 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
template.height0 = r->pot_buffers ?
util_next_power_of_two(r->picture_height) : r->picture_height;
template.depth0 = 1;
- template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
+ template.usage = PIPE_USAGE_DYNAMIC;
+ template.bind = PIPE_BIND_SAMPLER_VIEW;
+ template.flags = 0;
- r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
+ r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
template.width0 = r->pot_buffers ?
@@ -553,18 +567,25 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
r->picture_height / 2;
r->textures.individual.cb =
- r->pipe->screen->texture_create(r->pipe->screen, &template);
+ r->pipe->screen->resource_create(r->pipe->screen, &template);
r->textures.individual.cr =
- r->pipe->screen->texture_create(r->pipe->screen, &template);
+ r->pipe->screen->resource_create(r->pipe->screen, &template);
+
+ for (i = 0; i < 3; ++i) {
+ u_sampler_view_default_template(&sampler_view,
+ r->textures.all[i],
+ r->textures.all[i]->format);
+ r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
+ }
r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
+ /* XXX: Create with usage DYNAMIC or STREAM */
r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
(
r->pipe->screen,
- DEFAULT_BUF_ALIGNMENT,
- PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
+ PIPE_BIND_VERTEX_BUFFER,
sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
);
@@ -572,76 +593,71 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
r->vertex_bufs.all[i].buffer_offset = 0;
+ /* XXX: Create with usage DYNAMIC or STREAM */
r->vertex_bufs.all[i].buffer = pipe_buffer_create
(
r->pipe->screen,
- DEFAULT_BUF_ALIGNMENT,
- PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
+ PIPE_BIND_VERTEX_BUFFER,
sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
);
}
/* Position element */
- r->vertex_elems[0].src_offset = 0;
- r->vertex_elems[0].instance_divisor = 0;
- r->vertex_elems[0].vertex_buffer_index = 0;
- r->vertex_elems[0].nr_components = 2;
- r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[0].src_offset = 0;
+ vertex_elems[0].instance_divisor = 0;
+ vertex_elems[0].vertex_buffer_index = 0;
+ vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Luma, texcoord element */
- r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
- r->vertex_elems[1].instance_divisor = 0;
- r->vertex_elems[1].vertex_buffer_index = 0;
- r->vertex_elems[1].nr_components = 2;
- r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[1].src_offset = sizeof(struct vertex2f);
+ vertex_elems[1].instance_divisor = 0;
+ vertex_elems[1].vertex_buffer_index = 0;
+ vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Chroma Cr texcoord element */
- r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
- r->vertex_elems[2].instance_divisor = 0;
- r->vertex_elems[2].vertex_buffer_index = 0;
- r->vertex_elems[2].nr_components = 2;
- r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
+ vertex_elems[2].instance_divisor = 0;
+ vertex_elems[2].vertex_buffer_index = 0;
+ vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Chroma Cb texcoord element */
- r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
- r->vertex_elems[3].instance_divisor = 0;
- r->vertex_elems[3].vertex_buffer_index = 0;
- r->vertex_elems[3].nr_components = 2;
- r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
+ vertex_elems[3].instance_divisor = 0;
+ vertex_elems[3].vertex_buffer_index = 0;
+ vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* First ref surface top field texcoord element */
- r->vertex_elems[4].src_offset = 0;
- r->vertex_elems[4].instance_divisor = 0;
- r->vertex_elems[4].vertex_buffer_index = 1;
- r->vertex_elems[4].nr_components = 2;
- r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[4].src_offset = 0;
+ vertex_elems[4].instance_divisor = 0;
+ vertex_elems[4].vertex_buffer_index = 1;
+ vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* First ref surface bottom field texcoord element */
- r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
- r->vertex_elems[5].instance_divisor = 0;
- r->vertex_elems[5].vertex_buffer_index = 1;
- r->vertex_elems[5].nr_components = 2;
- r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[5].src_offset = sizeof(struct vertex2f);
+ vertex_elems[5].instance_divisor = 0;
+ vertex_elems[5].vertex_buffer_index = 1;
+ vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Second ref surface top field texcoord element */
- r->vertex_elems[6].src_offset = 0;
- r->vertex_elems[6].instance_divisor = 0;
- r->vertex_elems[6].vertex_buffer_index = 2;
- r->vertex_elems[6].nr_components = 2;
- r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[6].src_offset = 0;
+ vertex_elems[6].instance_divisor = 0;
+ vertex_elems[6].vertex_buffer_index = 2;
+ vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
/* Second ref surface bottom field texcoord element */
- r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
- r->vertex_elems[7].instance_divisor = 0;
- r->vertex_elems[7].vertex_buffer_index = 2;
- r->vertex_elems[7].nr_components = 2;
- r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[7].src_offset = sizeof(struct vertex2f);
+ vertex_elems[7].instance_divisor = 0;
+ vertex_elems[7].vertex_buffer_index = 2;
+ vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
+ r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
+ r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
r->vs_const_buf = pipe_buffer_create
(
r->pipe->screen,
- DEFAULT_BUF_ALIGNMENT,
- PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
+ PIPE_BIND_CONSTANT_BUFFER,
sizeof(struct vertex_shader_consts)
);
@@ -655,13 +671,14 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
assert(r);
- pipe_buffer_reference(&r->vs_const_buf, NULL);
+ pipe_resource_reference(&r->vs_const_buf, NULL);
- for (i = 0; i < 3; ++i)
- pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
-
- for (i = 0; i < 3; ++i)
- pipe_texture_reference(&r->textures.all[i], NULL);
+ for (i = 0; i < 3; ++i) {
+ r->pipe->sampler_view_destroy(r->pipe, r->sampler_views.all[i]);
+ r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
+ pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
+ pipe_resource_reference(&r->textures.all[i], NULL);
+ }
FREE(r->macroblock_buf);
}
@@ -940,6 +957,7 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
unsigned offset[NUM_MACROBLOCK_TYPES];
struct vert_stream_0 *ycbcr_vb;
struct vertex2f *ref_vb[2];
+ struct pipe_transfer *buf_transfer[3];
unsigned i;
assert(r);
@@ -957,17 +975,19 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
(
- r->pipe->screen,
+ r->pipe,
r->vertex_bufs.individual.ycbcr.buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ &buf_transfer[0]
);
for (i = 0; i < 2; ++i)
ref_vb[i] = (struct vertex2f *)pipe_buffer_map
(
- r->pipe->screen,
+ r->pipe,
r->vertex_bufs.individual.ref[i].buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ &buf_transfer[i + 1]
);
for (i = 0; i < r->num_macroblocks; ++i) {
@@ -979,9 +999,9 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
++offset[mb_type];
}
- pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
+ pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
for (i = 0; i < 2; ++i)
- pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
+ pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
}
static void
@@ -990,6 +1010,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
unsigned vb_start = 0;
struct vertex_shader_consts *vs_consts;
+ struct pipe_transfer *buf_transfer;
unsigned i;
assert(r);
@@ -1004,22 +1025,23 @@ flush(struct vl_mpeg12_mc_renderer *r)
vs_consts = pipe_buffer_map
(
- r->pipe->screen, r->vs_const_buf,
- PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ r->pipe, r->vs_const_buf,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ &buf_transfer
);
vs_consts->denorm.x = r->surface->width;
vs_consts->denorm.y = r->surface->height;
- pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf);
+ pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
r->vs_const_buf);
if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
- r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
+ r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->i_vs);
r->pipe->bind_fs_state(r->pipe, r->i_fs);
@@ -1029,11 +1051,11 @@ flush(struct vl_mpeg12_mc_renderer *r)
vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
}
- if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
+ if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0*/) {
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
r->textures.individual.ref[0] = r->past->texture;
- r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
@@ -1045,9 +1067,9 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
r->textures.individual.ref[0] = r->past->texture;
- r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
@@ -1057,11 +1079,11 @@ flush(struct vl_mpeg12_mc_renderer *r)
vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
}
- if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
+ if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0*/) {
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
r->textures.individual.ref[0] = r->future->texture;
- r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
@@ -1071,11 +1093,11 @@ flush(struct vl_mpeg12_mc_renderer *r)
vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
}
- if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
+ if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0*/ ) {
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
r->textures.individual.ref[0] = r->future->texture;
- r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
@@ -1085,12 +1107,12 @@ flush(struct vl_mpeg12_mc_renderer *r)
vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
}
- if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
+ if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0*/) {
r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
r->textures.individual.ref[0] = r->past->texture;
r->textures.individual.ref[1] = r->future->texture;
- r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
+ r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
@@ -1102,10 +1124,10 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
r->textures.individual.ref[0] = r->past->texture;
r->textures.individual.ref[1] = r->future->texture;
- r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
+ r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
@@ -1172,7 +1194,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
assert(r);
assert(blocks);
- tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->texture->format);
+ tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
texels = r->texels[0] + mbpy * tex_pitch + mbpx;
for (y = 0; y < 2; ++y) {
@@ -1211,7 +1233,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
mbpy /= 2;
for (tb = 0; tb < 2; ++tb) {
- tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
+ tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
if ((cbp >> (1 - tb)) & 1) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 1a216b5ad49..2e37efbc3cd 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -62,9 +62,13 @@ struct vl_mpeg12_mc_renderer
unsigned macroblocks_per_batch;
struct pipe_viewport_state viewport;
- struct pipe_buffer *vs_const_buf;
+ struct pipe_resource *vs_const_buf;
struct pipe_framebuffer_state fb_state;
- struct pipe_vertex_element vertex_elems[8];
+ union
+ {
+ void *all[3];
+ struct { void *i, *p, *b; } individual;
+ } vertex_elems_state;
union
{
@@ -72,13 +76,19 @@ struct vl_mpeg12_mc_renderer
struct { void *y, *cb, *cr, *ref[2]; } individual;
} samplers;
+ union
+ {
+ struct pipe_sampler_view *all[5];
+ struct { struct pipe_sampler_view *y, *cb, *cr, *ref[2]; } individual;
+ } sampler_views;
+
void *i_vs, *p_vs[2], *b_vs[2];
void *i_fs, *p_fs[2], *b_fs[2];
union
{
- struct pipe_texture *all[5];
- struct { struct pipe_texture *y, *cb, *cr, *ref[2]; } individual;
+ struct pipe_resource *all[5];
+ struct { struct pipe_resource *y, *cb, *cr, *ref[2]; } individual;
} textures;
union