From f8f4757d46627fb453f08dc63fde3d7f458eafe2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 31 Dec 2009 22:18:17 +0000 Subject: scons: Aggregate all tiny libraries in a single library. Makes integration of gallium into out of tree components much easier. No pratical change for components in this tree, --- src/gallium/auxiliary/SConscript | 185 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 src/gallium/auxiliary/SConscript (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript new file mode 100644 index 00000000000..782eb533863 --- /dev/null +++ b/src/gallium/auxiliary/SConscript @@ -0,0 +1,185 @@ +Import('*') + +from sys import executable as python_cmd + +env.Append(CPPPATH = [ + 'indices', + 'util', +]) + +env.CodeGenerate( + target = 'indices/u_indices_gen.c', + script = 'indices/u_indices_gen.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'indices/u_unfilled_gen.c', + script = 'indices/u_unfilled_gen.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'util/u_format_table.c', + script = 'util/u_format_table.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + +env.CodeGenerate( + target = 'util/u_format_access.c', + script = 'util/u_format_access.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + +source = [ + 'cso_cache/cso_context.c', + 'cso_cache/cso_cache.c', + 'cso_cache/cso_hash.c', + 'draw/draw_context.c', + 'draw/draw_pipe.c', + 'draw/draw_pipe_aaline.c', + 'draw/draw_pipe_aapoint.c', + 'draw/draw_pipe_clip.c', + 'draw/draw_pipe_cull.c', + 'draw/draw_pipe_flatshade.c', + 'draw/draw_pipe_offset.c', + 'draw/draw_pipe_pstipple.c', + 'draw/draw_pipe_stipple.c', + 'draw/draw_pipe_twoside.c', + 'draw/draw_pipe_unfilled.c', + 'draw/draw_pipe_util.c', + 'draw/draw_pipe_validate.c', + 'draw/draw_pipe_vbuf.c', + 'draw/draw_pipe_wide_line.c', + 'draw/draw_pipe_wide_point.c', + 'draw/draw_pt.c', + 'draw/draw_pt_elts.c', + 'draw/draw_pt_emit.c', + 'draw/draw_pt_fetch.c', + 'draw/draw_pt_fetch_emit.c', + 'draw/draw_pt_fetch_shade_emit.c', + 'draw/draw_pt_fetch_shade_pipeline.c', + 'draw/draw_pt_post_vs.c', + 'draw/draw_pt_util.c', + 'draw/draw_pt_varray.c', + 'draw/draw_pt_vcache.c', + 'draw/draw_vertex.c', + 'draw/draw_vs.c', + 'draw/draw_vs_aos.c', + 'draw/draw_vs_aos_io.c', + 'draw/draw_vs_aos_machine.c', + 'draw/draw_vs_exec.c', + 'draw/draw_vs_llvm.c', + 'draw/draw_vs_ppc.c', + 'draw/draw_vs_sse.c', + 'draw/draw_vs_varient.c', + 'draw/draw_gs.c', + #'indices/u_indices.c', + #'indices/u_unfilled_indices.c', + 'indices/u_indices_gen.c', + 'indices/u_unfilled_gen.c', + 'pipebuffer/pb_buffer_fenced.c', + 'pipebuffer/pb_buffer_malloc.c', + 'pipebuffer/pb_bufmgr_alt.c', + 'pipebuffer/pb_bufmgr_cache.c', + 'pipebuffer/pb_bufmgr_debug.c', + 'pipebuffer/pb_bufmgr_fenced.c', + 'pipebuffer/pb_bufmgr_mm.c', + 'pipebuffer/pb_bufmgr_ondemand.c', + 'pipebuffer/pb_bufmgr_pool.c', + 'pipebuffer/pb_bufmgr_slab.c', + 'pipebuffer/pb_validate.c', + 'rbug/rbug_core.c', + 'rbug/rbug_shader.c', + 'rbug/rbug_context.c', + 'rbug/rbug_texture.c', + 'rbug/rbug_demarshal.c', + 'rbug/rbug_connection.c', + 'rtasm/rtasm_cpu.c', + 'rtasm/rtasm_execmem.c', + 'rtasm/rtasm_x86sse.c', + 'rtasm/rtasm_ppc.c', + 'rtasm/rtasm_ppc_spe.c', + 'tgsi/tgsi_build.c', + 'tgsi/tgsi_dump.c', + 'tgsi/tgsi_dump_c.c', + 'tgsi/tgsi_exec.c', + 'tgsi/tgsi_info.c', + 'tgsi/tgsi_iterate.c', + 'tgsi/tgsi_parse.c', + 'tgsi/tgsi_sanity.c', + 'tgsi/tgsi_scan.c', + 'tgsi/tgsi_ppc.c', + 'tgsi/tgsi_sse2.c', + 'tgsi/tgsi_text.c', + 'tgsi/tgsi_transform.c', + 'tgsi/tgsi_ureg.c', + 'tgsi/tgsi_util.c', + 'translate/translate_generic.c', + 'translate/translate_sse.c', + 'translate/translate.c', + 'translate/translate_cache.c', + 'util/u_bitmask.c', + 'util/u_blit.c', + 'util/u_blitter.c', + 'util/u_cache.c', + 'util/u_cpu_detect.c', + 'util/u_debug.c', + 'util/u_debug_dump.c', + 'util/u_debug_memory.c', + 'util/u_debug_stack.c', + 'util/u_debug_symbol.c', + 'util/u_dl.c', + 'util/u_draw_quad.c', + 'util/u_format.c', + 'util/u_format_access.c', + 'util/u_format_table.c', + 'util/u_gen_mipmap.c', + 'util/u_handle_table.c', + 'util/u_hash.c', + 'util/u_hash_table.c', + 'util/u_keymap.c', + 'util/u_network.c', + 'util/u_math.c', + 'util/u_mm.c', + 'util/u_rect.c', + 'util/u_simple_shaders.c', + 'util/u_snprintf.c', + 'util/u_stream_stdc.c', + 'util/u_stream_wd.c', + 'util/u_surface.c', + 'util/u_texture.c', + 'util/u_tile.c', + 'util/u_time.c', + 'util/u_timed_winsys.c', + 'util/u_upload_mgr.c', + 'util/u_simple_screen.c', + 'vl/vl_bitstream_parser.c', + 'vl/vl_mpeg12_mc_renderer.c', + 'vl/vl_compositor.c', + 'vl/vl_csc.c', + 'vl/vl_shader_build.c', +] + +if env['llvm']: + source += [ + 'gallivm/gallivm.cpp', + 'gallivm/gallivm_cpu.cpp', + 'gallivm/instructions.cpp', + 'gallivm/loweringpass.cpp', + 'gallivm/tgsitollvm.cpp', + 'gallivm/storage.cpp', + 'gallivm/storagesoa.cpp', + 'gallivm/instructionssoa.cpp', + ] + +gallium = env.ConvenienceLibrary( + target = 'gallium', + source = source, +) + +Export('gallium') -- cgit v1.2.3 From 164fd16cfbc09970676c2e6866e062a5c9b410db Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 16 Jan 2010 21:11:01 +0000 Subject: util: add generic ringbuffer utitilty --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/SConscript | 1 + src/gallium/auxiliary/util/u_ringbuffer.c | 145 ++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_ringbuffer.h | 29 ++++++ 4 files changed, 176 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_ringbuffer.c create mode 100644 src/gallium/auxiliary/util/u_ringbuffer.h (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index e3af41c6e04..8f937e3b4e9 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -111,6 +111,7 @@ C_SOURCES = \ util/u_math.c \ util/u_mm.c \ util/u_rect.c \ + util/u_ringbuffer.c \ util/u_simple_shaders.c \ util/u_snprintf.c \ util/u_stream_stdc.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 782eb533863..f957090b5fb 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -147,6 +147,7 @@ source = [ 'util/u_math.c', 'util/u_mm.c', 'util/u_rect.c', + 'util/u_ringbuffer.c', 'util/u_simple_shaders.c', 'util/u_snprintf.c', 'util/u_stream_stdc.c', diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c b/src/gallium/auxiliary/util/u_ringbuffer.c new file mode 100644 index 00000000000..3f43a19e018 --- /dev/null +++ b/src/gallium/auxiliary/util/u_ringbuffer.c @@ -0,0 +1,145 @@ + +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "util/u_ringbuffer.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +/* Generic ringbuffer: + */ +struct util_ringbuffer +{ + struct util_packet *buf; + unsigned mask; + + /* Can this be done with atomic variables?? + */ + unsigned head; + unsigned tail; + pipe_condvar change; + pipe_mutex mutex; +}; + + +struct util_ringbuffer *util_ringbuffer_create( unsigned dwords ) +{ + struct util_ringbuffer *ring = CALLOC_STRUCT(util_ringbuffer); + if (ring == NULL) + return NULL; + + assert(util_is_power_of_two(dwords)); + + ring->buf = MALLOC( dwords * sizeof(unsigned) ); + if (ring->buf == NULL) + goto fail; + + ring->mask = dwords - 1; + + pipe_condvar_init(ring->change); + pipe_mutex_init(ring->mutex); + return ring; + +fail: + FREE(ring->buf); + FREE(ring); + return NULL; +} + +void util_ringbuffer_destroy( struct util_ringbuffer *ring ) +{ + pipe_condvar_destroy(ring->change); + pipe_mutex_destroy(ring->mutex); + FREE(ring->buf); + FREE(ring); +} + +static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring ) +{ + return (ring->tail - (ring->head + 1)) & ring->mask; +} + +void util_ringbuffer_enqueue( struct util_ringbuffer *ring, + const struct util_packet *packet ) +{ + unsigned i; + + /* XXX: over-reliance on mutexes, etc: + */ + pipe_mutex_lock(ring->mutex); + + /* Wait for free space: + */ + while (util_ringbuffer_space(ring) < packet->dwords) + pipe_condvar_wait(ring->change, ring->mutex); + + /* Copy data to ring: + */ + for (i = 0; i < packet->dwords; i++) { + + /* Copy all dwords of the packet. Note we're abusing the + * typesystem a little - we're being passed a pointer to + * something, but probably not an array of packet structs: + */ + ring->buf[ring->head] = packet[i]; + ring->head++; + ring->head &= ring->mask; + } + + /* Signal change: + */ + pipe_condvar_signal(ring->change); + pipe_mutex_unlock(ring->mutex); +} + +enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, + struct util_packet *packet, + unsigned max_dwords, + boolean wait ) +{ + const struct util_packet *ring_packet; + unsigned i; + int ret = PIPE_OK; + + /* XXX: over-reliance on mutexes, etc: + */ + pipe_mutex_lock(ring->mutex); + + /* Wait for free space: + */ + if (wait) { + while (util_ringbuffer_space(ring) == 0) + pipe_condvar_wait(ring->change, ring->mutex); + } + else { + if (util_ringbuffer_space(ring) == 0) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + } + + ring_packet = &ring->buf[ring->tail]; + + /* Both of these are considered bugs. Raise an assert on debug builds. + */ + if (ring_packet->dwords > ring->mask + 1 - util_ringbuffer_space(ring) || + ring_packet->dwords > max_dwords) { + assert(0); + ret = PIPE_ERROR_BAD_INPUT; + goto out; + } + + /* Copy data from ring: + */ + for (i = 0; i < ring_packet->dwords; i++) { + packet[i] = ring->buf[ring->tail]; + ring->tail++; + ring->tail &= ring->mask; + } + +out: + /* Signal change: + */ + pipe_condvar_signal(ring->change); + pipe_mutex_unlock(ring->mutex); + return ret; +} diff --git a/src/gallium/auxiliary/util/u_ringbuffer.h b/src/gallium/auxiliary/util/u_ringbuffer.h new file mode 100644 index 00000000000..85f0ad6c1f6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_ringbuffer.h @@ -0,0 +1,29 @@ + +#ifndef UTIL_RINGBUFFER_H +#define UTIL_RINGBUFFER_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" /* only for pipe_error! */ + +/* Generic header + */ +struct util_packet { + unsigned dwords:8; + unsigned data24:24; +}; + +struct util_ringbuffer; + +struct util_ringbuffer *util_ringbuffer_create( unsigned dwords ); + +void util_ringbuffer_destroy( struct util_ringbuffer *ring ); + +void util_ringbuffer_enqueue( struct util_ringbuffer *ring, + const struct util_packet *packet ); + +enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, + struct util_packet *packet, + unsigned max_dwords, + boolean wait ); + +#endif -- cgit v1.2.3 From d88b219cf37c488f57e18850c843078ff918b55d Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Fri, 22 Jan 2010 19:32:12 +0000 Subject: Remove obsolete file --- src/gallium/auxiliary/SConscript | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index f957090b5fb..3aa782f81e6 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -87,7 +87,6 @@ source = [ 'pipebuffer/pb_bufmgr_alt.c', 'pipebuffer/pb_bufmgr_cache.c', 'pipebuffer/pb_bufmgr_debug.c', - 'pipebuffer/pb_bufmgr_fenced.c', 'pipebuffer/pb_bufmgr_mm.c', 'pipebuffer/pb_bufmgr_ondemand.c', 'pipebuffer/pb_bufmgr_pool.c', -- cgit v1.2.3 From 0b0e7057128c213bee8d2158b976869475f7cb42 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 3 Feb 2010 12:11:58 +0000 Subject: os: New OS abstraction module. --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/SConscript | 1 + src/gallium/auxiliary/os/os_memory.h | 78 +++++++++++ src/gallium/auxiliary/os/os_memory_aligned.h | 72 ++++++++++ src/gallium/auxiliary/os/os_memory_debug.h | 83 ++++++++++++ src/gallium/auxiliary/os/os_memory_stdc.h | 76 +++++++++++ src/gallium/auxiliary/os/os_memory_win32k.h | 123 ++++++++++++++++++ src/gallium/auxiliary/os/os_misc.c | 188 +++++++++++++++++++++++++++ src/gallium/auxiliary/os/os_misc.h | 95 ++++++++++++++ 9 files changed, 717 insertions(+) create mode 100644 src/gallium/auxiliary/os/os_memory.h create mode 100644 src/gallium/auxiliary/os/os_memory_aligned.h create mode 100644 src/gallium/auxiliary/os/os_memory_debug.h create mode 100644 src/gallium/auxiliary/os/os_memory_stdc.h create mode 100644 src/gallium/auxiliary/os/os_memory_win32k.h create mode 100644 src/gallium/auxiliary/os/os_misc.c create mode 100644 src/gallium/auxiliary/os/os_misc.h (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index da1fb6b299f..e38990a15f9 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -48,6 +48,7 @@ C_SOURCES = \ draw/draw_vs_sse.c \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ + os/os_misc.c \ pipebuffer/pb_buffer_malloc.c \ pipebuffer/pb_bufmgr_alt.c \ pipebuffer/pb_bufmgr_cache.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 3aa782f81e6..bfc68bad2f9 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -82,6 +82,7 @@ source = [ #'indices/u_unfilled_indices.c', 'indices/u_indices_gen.c', 'indices/u_unfilled_gen.c', + 'os/os_misc.c', 'pipebuffer/pb_buffer_fenced.c', 'pipebuffer/pb_buffer_malloc.c', 'pipebuffer/pb_bufmgr_alt.c', diff --git a/src/gallium/auxiliary/os/os_memory.h b/src/gallium/auxiliary/os/os_memory.h new file mode 100644 index 00000000000..f18d5a3d9b9 --- /dev/null +++ b/src/gallium/auxiliary/os/os_memory.h @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * OS memory management abstractions + */ + + +#ifndef _OS_MEMORY_H_ +#define _OS_MEMORY_H_ + + +#include "pipe/p_config.h" +#include "pipe/p_compiler.h" + + +#if defined(PIPE_OS_EMBEDDED) + +#ifdef __cplusplus +extern "C" { +#endif + +void * +os_malloc(size_t size); + +void * +os_calloc(size_t count, size_t size); + +void +os_free(void *ptr); + +void * +os_realloc(void *ptr, size_t old_size, size_t new_size); + +#ifdef __cplusplus +} +#endif + +#elif defined(PIPE_OS_WINDOWS) && defined(DEBUG) && !defined(DEBUG_MEMORY_IMPLEMENTATION) + +# include "os_memory_debug.h" + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + +# include "os_memory_win32k.h" + +#else + +# include "os_memory_stdc.h" + +#endif + +#endif /* _OS_MEMORY_H_ */ diff --git a/src/gallium/auxiliary/os/os_memory_aligned.h b/src/gallium/auxiliary/os/os_memory_aligned.h new file mode 100644 index 00000000000..d4528f73196 --- /dev/null +++ b/src/gallium/auxiliary/os/os_memory_aligned.h @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Memory alignment wrappers. + */ + + +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + + +#include "pipe/p_compiler.h" + + +/** + * Return memory on given byte alignment + */ +static INLINE void * +os_malloc_aligned(size_t size, uint alignment) +{ + char *ptr, *buf; + + ptr = (char *) os_malloc(size + alignment + sizeof(void *)); + if (!ptr) + return NULL; + + buf = (char *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) & ~(alignment - 1)); + *(char **)(buf - sizeof(void *)) = ptr; + + return buf; +} + + +/** + * Free memory returned by align_malloc(). + */ +static INLINE void +os_free_aligned(void *ptr) +{ + if (ptr) { + void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); + void *realAddr = *cubbyHole; + os_free(realAddr); + } +} diff --git a/src/gallium/auxiliary/os/os_memory_debug.h b/src/gallium/auxiliary/os/os_memory_debug.h new file mode 100644 index 00000000000..c664be9aad5 --- /dev/null +++ b/src/gallium/auxiliary/os/os_memory_debug.h @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Debugging wrappers for OS memory management abstractions. + */ + + +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +void * +debug_malloc(const char *file, unsigned line, const char *function, + size_t size); + +void * +debug_calloc(const char *file, unsigned line, const char *function, + size_t count, size_t size ); + +void +debug_free(const char *file, unsigned line, const char *function, + void *ptr); + +void * +debug_realloc(const char *file, unsigned line, const char *function, + void *old_ptr, size_t old_size, size_t new_size ); + + +#ifdef __cplusplus +} +#endif + + +#ifndef DEBUG_MEMORY_IMPLEMENTATION + +#define os_malloc( _size ) \ + debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) +#define os_calloc( _count, _size ) \ + debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) +#define os_free( _ptr ) \ + debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) +#define os_realloc( _ptr, _old_size, _new_size ) \ + debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _new_size ) + +/* TODO: wrap os_malloc_aligned() and os_free_aligned() too */ +#include "os_memory_aligned.h" + +#endif /* !DEBUG_MEMORY_IMPLEMENTATION */ diff --git a/src/gallium/auxiliary/os/os_memory_stdc.h b/src/gallium/auxiliary/os/os_memory_stdc.h new file mode 100644 index 00000000000..806e5363568 --- /dev/null +++ b/src/gallium/auxiliary/os/os_memory_stdc.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * OS memory management abstractions for the standard C library. + */ + + +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + +#include + +#include "pipe/p_compiler.h" + + +#define os_malloc(_size) malloc(_size) +#define os_calloc(_count, _size ) calloc(_count, _size ) +#define os_free(_ptr) free(_ptr) + +#define os_realloc( _old_ptr, _old_size, _new_size) \ + realloc(_old_ptr, _new_size + 0*(_old_size)) + + +#if defined(HAVE_POSIX_MEMALIGN) + +static INLINE void * +os_malloc_aligned(size_t size, size_t alignment) +{ + void *ptr; + alignment = (alignment + sizeof(void*) - 1) & ~(sizeof(void*) - 1); + if(posix_memalign(&ptr, alignment, size) != 0) + return NULL; + return ptr; +} + +#define os_free_aligned(_ptr) free(_ptr) + +#elif defined(PIPE_OS_WINDOWS) + +#include + +#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align) +#define os_free_aligned(_ptr) _aligned_free(_ptr) + +#else + +#include "os_memory_aligned.h" + +#endif diff --git a/src/gallium/auxiliary/os/os_memory_win32k.h b/src/gallium/auxiliary/os/os_memory_win32k.h new file mode 100644 index 00000000000..d56d6908722 --- /dev/null +++ b/src/gallium/auxiliary/os/os_memory_win32k.h @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * OS memory management abstractions for Windows kernel. + */ + + +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void * __stdcall +EngAllocMem(unsigned long Flags, + unsigned long MemSize, + unsigned long Tag); + +void __stdcall +EngFreeMem(void *Mem); + +#define os_malloc(_size) EngAllocMem(0, _size, 'D3AG') +#define os_calloc(_count, _size) EngAllocMem(1, (_count)*(_size), 'D3AG') +#define _os_free(_ptr) EngFreeMem(_ptr) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + +void * +ExAllocatePool(unsigned long PoolType, + size_t NumberOfBytes); + +void +ExFreePool(void *P); + +#define os_malloc(_size) ExAllocatePool(0, _size) +#define _os_free(_ptr) ExFreePool(_ptr) + +static INLINE void * +os_calloc(unsigned count, unsigned size) +{ + void *ptr = os_malloc(count * size); + if (ptr) { + memset(ptr, 0, count * size); + } + return ptr; +} + +#else + +#error "Unsupported subsystem" + +#endif + + +static INLINE void +os_free( void *ptr ) +{ + if (ptr) { + _os_free(ptr); + } +} + + +static INLINE void * +os_realloc(void *old_ptr, unsigned old_size, unsigned new_size) +{ + void *new_ptr = NULL; + + if (new_size != 0) { + unsigned copy_size = old_size < new_size ? old_size : new_size; + new_ptr = os_malloc( new_size ); + if (new_ptr && old_ptr && copy_size) { + memcpy(new_ptr, old_ptr, copy_size); + } + } + + os_free(old_ptr); + + return new_ptr; +} + + +#ifdef __cplusplus +} +#endif + + +#include "os_memory_aligned.h" diff --git a/src/gallium/auxiliary/os/os_misc.c b/src/gallium/auxiliary/os/os_misc.c new file mode 100644 index 00000000000..384988017b7 --- /dev/null +++ b/src/gallium/auxiliary/os/os_misc.c @@ -0,0 +1,188 @@ +/************************************************************************** + * + * Copyright 2008-2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "os_misc.h" + +#include + + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY + +#include +#include + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +#include +#include +#include +#include + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers +#endif +#include +#include + +#else + +#include +#include + +#endif + + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY +static INLINE void +_EngDebugPrint(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + EngDebugPrint("", (PCHAR)format, ap); + va_end(ap); +} +#endif + + +void +os_log_message(const char *message) +{ +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + _EngDebugPrint("%s", message); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + OutputDebugStringA(message); + if(GetConsoleWindow() && !IsDebuggerPresent()) { + fflush(stdout); + fputs(message, stderr); + fflush(stderr); + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + wchar_t *wide_format; + long wide_str_len; + /* Format is ascii - needs to be converted to wchar_t for printing */ + wide_str_len = MultiByteToWideChar(CP_ACP, 0, message, -1, NULL, 0); + wide_format = (wchar_t *) malloc((wide_str_len+1) * sizeof(wchar_t)); + if (wide_format) { + MultiByteToWideChar(CP_ACP, 0, message, -1, + wide_format, wide_str_len); + NKDbgPrintfW(wide_format, wide_format); + free(wide_format); + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* TODO */ +#else /* !PIPE_SUBSYSTEM_WINDOWS */ + fflush(stdout); + fputs(message, stderr); +#endif +} + + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY +static const char * +find(const char *start, const char *end, char c) +{ + const char *p; + for(p = start; !end || p != end; ++p) { + if(*p == c) + return p; + if(*p < 32) + break; + } + return NULL; +} + +static int +compare(const char *start, const char *end, const char *s) +{ + const char *p, *q; + for(p = start, q = s; p != end && *q != '\0'; ++p, ++q) { + if(*p != *q) + return 0; + } + return p == end && *q == '\0'; +} + +static void +copy(char *dst, const char *start, const char *end, size_t n) +{ + const char *p; + char *q; + for(p = start, q = dst, n = n - 1; p != end && n; ++p, ++q, --n) + *q = *p; + *q = '\0'; +} +#endif + + +const char * +os_get_option(const char *name) +{ +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + /* EngMapFile creates the file if it does not exists, so it must either be + * disabled on release versions (or put in a less conspicuous place). */ +#ifdef DEBUG + const char *result = NULL; + ULONG_PTR iFile = 0; + const void *pMap = NULL; + const char *sol, *eol, *sep; + static char output[1024]; + + pMap = EngMapFile(L"\\??\\c:\\gallium.cfg", 0, &iFile); + if(pMap) { + sol = (const char *)pMap; + while(1) { + /* TODO: handle LF line endings */ + eol = find(sol, NULL, '\r'); + if(!eol || eol == sol) + break; + sep = find(sol, eol, '='); + if(!sep) + break; + if(compare(sol, sep, name)) { + copy(output, sep + 1, eol, sizeof(output)); + result = output; + break; + } + sol = eol + 2; + } + EngUnmapFile(iFile); + } + return result; +#else + return NULL; +#endif +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* TODO: implement */ + return NULL; +#else + return getenv(name); +#endif +} + diff --git a/src/gallium/auxiliary/os/os_misc.h b/src/gallium/auxiliary/os/os_misc.h new file mode 100644 index 00000000000..56e48ca9bee --- /dev/null +++ b/src/gallium/auxiliary/os/os_misc.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Miscellaneous OS services. + */ + + +#ifndef _OS_MISC_H_ +#define _OS_MISC_H_ + + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Trap into the debugger. + */ +#if (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) && defined(PIPE_CC_GCC) +# define os_break() __asm("int3") +#elif defined(PIPE_CC_MSVC) +# define os_break() __debugbreak() +#elif defined(PIPE_OS_UNIX) +# include /* for kill() */ +# include /* for getpid() */ +# define os_break() kill(getpid(), SIGTRAP) +#elif defined(PIPE_OS_EMBEDDED) +void os_break(void); +#else +# define os_break() abort() +#endif + + +/* + * Abort the program. + */ +#if defined(DEBUG) || defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +# define os_abort() os_break() +#elif defined(PIPE_OS_EMBEDDED) +void os_abort(void); +#else +# define os_abort() abort() +#endif + + +/* + * Output a message. Message should preferably end in a newline. + */ +void +os_log_message(const char *message); + + +/* + * Get an option. Should return NULL if specified option is not set. + */ +const char * +os_get_option(const char *name); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _OS_MISC_H_ */ -- cgit v1.2.3 From 2ef6d311b7cba30deb3d7e38eec14cef89e7e579 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 4 Feb 2010 18:23:49 +0000 Subject: os: Time abstractions. Simplified version of u_time.[ch] --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/SConscript | 1 + src/gallium/auxiliary/os/os_time.c | 128 +++++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/os/os_time.h | 92 ++++++++++++++++++++++++++ 4 files changed, 222 insertions(+) create mode 100644 src/gallium/auxiliary/os/os_time.c create mode 100644 src/gallium/auxiliary/os/os_time.h (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index e38990a15f9..7d5e0096496 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -49,6 +49,7 @@ C_SOURCES = \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ os/os_misc.c \ + os/os_time.c \ pipebuffer/pb_buffer_malloc.c \ pipebuffer/pb_bufmgr_alt.c \ pipebuffer/pb_bufmgr_cache.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index bfc68bad2f9..21b443591ce 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -83,6 +83,7 @@ source = [ 'indices/u_indices_gen.c', 'indices/u_unfilled_gen.c', 'os/os_misc.c', + 'os/os_time.c', 'pipebuffer/pb_buffer_fenced.c', 'pipebuffer/pb_buffer_malloc.c', 'pipebuffer/pb_bufmgr_alt.c', diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c new file mode 100644 index 00000000000..6259142bec0 --- /dev/null +++ b/src/gallium/auxiliary/os/os_time.c @@ -0,0 +1,128 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_config.h" + +#if !defined(PIPE_OS_EMBEDDED) + +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) +# include /* timeval */ +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +# include +# include +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +# include +extern VOID KeQuerySystemTime(PLARGE_INTEGER); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) +# include +#else +# error Unsupported OS +#endif + +#include "os_time.h" + + +int64_t +os_time_get(void) +{ +#if defined(PIPE_OS_UNIX) + + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_usec + tv.tv_sec*1000000LL; + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + + static LONGLONG frequency; + LONGLONG counter; + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + EngQueryPerformanceCounter(&counter); + return counter*INT64_C(1000000)/frequency; + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + + static LARGE_INTEGER frequency; + LARGE_INTEGER counter; + if(!frequency.QuadPart) + QueryPerformanceFrequency(&frequency); + QueryPerformanceCounter(&counter); + return counter.QuadPart*INT64_C(1000000)/frequency.QuadPart; + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + + /* Updated every 10 miliseconds, measured in units of 100 nanoseconds. + * http://msdn.microsoft.com/en-us/library/ms801642.aspx */ + LARGE_INTEGER counter; + KeQuerySystemTime(&counter); + return counter.QuadPart/10; + +#endif +} + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void +os_time_sleep(int64_t usecs) +{ + static LONGLONG frequency; + LONGLONG start, curr, end; + + EngQueryPerformanceCounter(&start); + + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + + end = start + (usecs * frequency + 999999LL)/1000000LL; + + do { + EngQueryPerformanceCounter(&curr); + } while(start <= curr && curr < end || + end < start && (curr < end || start <= curr)); +} + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + +void +os_time_sleep(int64_t usecs) +{ + Sleep((usecs + 999) / 1000); +} + +#endif + + +#endif /* !PIPE_OS_EMBEDDED */ diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h new file mode 100644 index 00000000000..5b55c1b3747 --- /dev/null +++ b/src/gallium/auxiliary/os/os_time.h @@ -0,0 +1,92 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca + */ + +#ifndef _OS_TIME_H_ +#define _OS_TIME_H_ + + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_UNIX) +# include /* usleep */ +#endif + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Get the current time in microseconds from an unknown base. + */ +int64_t +os_time_get(void); + + +/* + * Sleep. + */ +#if defined(PIPE_OS_UNIX) +#define os_time_sleep(_usecs) usleep(_usecs) +#else +void +os_time_sleep(int64_t usecs); +#endif + + +/* + * Helper function for detecting time outs, taking in account overflow. + * + * Returns true the the current time has elapsed beyond the specified interval. + */ +static INLINE boolean +os_time_timeout(int64_t start, + int64_t end, + int64_t curr) +{ + if(start <= end) + return !(start <= curr && curr < end); + else + return !((start <= curr) || (curr < end)); +} + + +#ifdef __cplusplus +} +#endif + +#endif /* _OS_TIME_H_ */ -- cgit v1.2.3 From 5ee324ad3cfe62de661a1a6a7a60b2eec5738ef1 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 4 Feb 2010 18:24:36 +0000 Subject: util: Reimplement u_time on top of os_time. --- src/gallium/auxiliary/Makefile | 1 - src/gallium/auxiliary/SConscript | 1 - src/gallium/auxiliary/util/u_time.c | 229 ------------------------------------ src/gallium/auxiliary/util/u_time.h | 89 +++++++++----- 4 files changed, 59 insertions(+), 261 deletions(-) delete mode 100644 src/gallium/auxiliary/util/u_time.c (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 7d5e0096496..ac1872c720f 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -120,7 +120,6 @@ C_SOURCES = \ util/u_surface.c \ util/u_texture.c \ util/u_tile.c \ - util/u_time.c \ util/u_timed_winsys.c \ util/u_upload_mgr.c \ util/u_simple_screen.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 21b443591ce..d5b325e7a31 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -156,7 +156,6 @@ source = [ 'util/u_surface.c', 'util/u_texture.c', 'util/u_tile.c', - 'util/u_time.c', 'util/u_timed_winsys.c', 'util/u_upload_mgr.c', 'util/u_simple_screen.c', diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c deleted file mode 100644 index 806708b2dc8..00000000000 --- a/src/gallium/auxiliary/util/u_time.c +++ /dev/null @@ -1,229 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * OS independent time-manipulation functions. - * - * @author Jose Fonseca - */ - - -#include "pipe/p_config.h" - -#if !defined(PIPE_OS_EMBEDDED) - -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) -#include -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) -#include -#include -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) -#include -extern VOID KeQuerySystemTime(PLARGE_INTEGER); -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) -#include -#else -#error Unsupported OS -#endif - -#include "util/u_time.h" - - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - -static int64_t frequency = 0; - -static INLINE void -util_time_get_frequency(void) -{ - if(!frequency) { -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - LONGLONG temp; - EngQueryPerformanceFrequency(&temp); - frequency = temp; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - LARGE_INTEGER temp; - QueryPerformanceFrequency(&temp); - frequency = temp.QuadPart; -#endif - } -} -#endif - - -void -util_time_get(struct util_time *t) -{ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - gettimeofday(&t->tv, NULL); -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - LONGLONG temp; - EngQueryPerformanceCounter(&temp); - t->counter = temp; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - /* Updated every 10 miliseconds, measured in units of 100 nanoseconds. - * http://msdn.microsoft.com/en-us/library/ms801642.aspx */ - LARGE_INTEGER temp; - KeQuerySystemTime(&temp); - t->counter = temp.QuadPart; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - LARGE_INTEGER temp; - QueryPerformanceCounter(&temp); - t->counter = temp.QuadPart; -#endif -} - - -void -util_time_add(const struct util_time *t1, - int64_t usecs, - struct util_time *t2) -{ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; - t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - util_time_get_frequency(); - t2->counter = t1->counter + (usecs * frequency + INT64_C(999999))/INT64_C(1000000); -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - /* 1 tick = 100 nano seconds. */ - t2->counter = t1->counter + usecs * 10; -#else - LARGE_INTEGER temp; - LONGLONG freq; - freq = temp.QuadPart; - t2->counter = t1->counter + (usecs * freq)/1000000L; -#endif -} - - -int64_t -util_time_diff(const struct util_time *t1, - const struct util_time *t2) -{ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - return (t2->tv.tv_usec - t1->tv.tv_usec) + - (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - util_time_get_frequency(); - return (t2->counter - t1->counter)*INT64_C(1000000)/frequency; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - return (t2->counter - t1->counter)/10; -#endif -} - - - -uint64_t -util_time_micros( void ) -{ - struct util_time t1; - - util_time_get(&t1); - -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - util_time_get_frequency(); - return t1.counter*INT64_C(1000000)/frequency; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - return t1.counter/10; -#endif -} - - - -/** - * Compare two time values. - * - * Not publicly available because it does not take in account wrap-arounds. - * Use util_time_timeout instead. - */ -static INLINE int -util_time_compare(const struct util_time *t1, - const struct util_time *t2) -{ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - if (t1->tv.tv_sec < t2->tv.tv_sec) - return -1; - else if(t1->tv.tv_sec > t2->tv.tv_sec) - return 1; - else if (t1->tv.tv_usec < t2->tv.tv_usec) - return -1; - else if(t1->tv.tv_usec > t2->tv.tv_usec) - return 1; - else - return 0; -#elif defined(PIPE_OS_WINDOWS) - if (t1->counter < t2->counter) - return -1; - else if(t1->counter > t2->counter) - return 1; - else - return 0; -#endif -} - - -boolean -util_time_timeout(const struct util_time *start, - const struct util_time *end, - const struct util_time *curr) -{ - if(util_time_compare(start, end) <= 0) - return !(util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0); - else - return !(util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0); -} - - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) -void util_time_sleep(unsigned usecs) -{ - LONGLONG start, curr, end; - - EngQueryPerformanceCounter(&start); - - if(!frequency) - EngQueryPerformanceFrequency(&frequency); - - end = start + (usecs * frequency + 999999LL)/1000000LL; - - do { - EngQueryPerformanceCounter(&curr); - } while(start <= curr && curr < end || - end < start && (curr < end || start <= curr)); -} -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) -void util_time_sleep(unsigned usecs) -{ - Sleep((usecs + 999)/ 1000); -} -#endif - -#endif /* !PIPE_OS_EMBEDDED */ diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index a6189a247bb..7580ac0de4c 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -38,15 +38,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) -#include /* timeval */ -#include /* usleep */ -#endif - -#if defined(PIPE_OS_HAIKU) -#include /* timeval */ -#include -#endif +#include "os/os_time.h" #include "pipe/p_compiler.h" @@ -63,43 +55,80 @@ extern "C" { */ struct util_time { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - struct timeval tv; -#else int64_t counter; -#endif }; -void -util_time_get(struct util_time *t); +PIPE_DEPRECATED +static INLINE void +util_time_get(struct util_time *t) +{ + t->counter = os_time_get(); +} + -void +PIPE_DEPRECATED +static INLINE void util_time_add(const struct util_time *t1, int64_t usecs, - struct util_time *t2); + struct util_time *t2) +{ + t2->counter = t1->counter + usecs; +} -uint64_t -util_time_micros( void ); -int64_t +PIPE_DEPRECATED +static INLINE int64_t util_time_diff(const struct util_time *t1, - const struct util_time *t2); + const struct util_time *t2) +{ + return t2->counter - t1->counter; +} + /** - * Returns non-zero when the timeout expires. + * Compare two time values. + * + * Not publicly available because it does not take in account wrap-arounds. + * Use util_time_timeout instead. */ -boolean +static INLINE int +_util_time_compare(const struct util_time *t1, + const struct util_time *t2) +{ + if (t1->counter < t2->counter) + return -1; + else if(t1->counter > t2->counter) + return 1; + else + return 0; +} + + +PIPE_DEPRECATED +static INLINE boolean util_time_timeout(const struct util_time *start, const struct util_time *end, - const struct util_time *curr); + const struct util_time *curr) +{ + return os_time_timeout(start->counter, end->counter, curr->counter); +} -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) -#define util_time_sleep usleep -#else -void -util_time_sleep(unsigned usecs); -#endif + +PIPE_DEPRECATED +static INLINE int64_t +util_time_micros(void) +{ + return os_time_get(); +} + + +PIPE_DEPRECATED +static INLINE void +util_time_sleep(int64_t usecs) +{ + os_time_sleep(usecs); +} #ifdef __cplusplus -- cgit v1.2.3 From c75d64490e853a02952b5681ad17e7ddbbf8086c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 4 Feb 2010 18:44:51 +0000 Subject: auxiliary: util_stream -> os_stream --- src/gallium/auxiliary/Makefile | 4 +- src/gallium/auxiliary/SConscript | 4 +- src/gallium/auxiliary/os/os_stream.h | 61 ++++++++ src/gallium/auxiliary/os/os_stream_stdc.c | 105 ++++++++++++++ src/gallium/auxiliary/os/os_stream_wd.c | 222 ++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_debug.c | 14 +- src/gallium/auxiliary/util/u_stream.h | 61 -------- src/gallium/auxiliary/util/u_stream_stdc.c | 106 -------------- src/gallium/auxiliary/util/u_stream_wd.c | 224 ----------------------------- 9 files changed, 399 insertions(+), 402 deletions(-) create mode 100644 src/gallium/auxiliary/os/os_stream.h create mode 100644 src/gallium/auxiliary/os/os_stream_stdc.c create mode 100644 src/gallium/auxiliary/os/os_stream_wd.c delete mode 100644 src/gallium/auxiliary/util/u_stream.h delete mode 100644 src/gallium/auxiliary/util/u_stream_stdc.c delete mode 100644 src/gallium/auxiliary/util/u_stream_wd.c (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index ac1872c720f..66cfe9d89dd 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -49,6 +49,8 @@ C_SOURCES = \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ os/os_misc.c \ + os/os_stream_stdc.c \ + os/os_stream_wd.c \ os/os_time.c \ pipebuffer/pb_buffer_malloc.c \ pipebuffer/pb_bufmgr_alt.c \ @@ -115,8 +117,6 @@ C_SOURCES = \ util/u_ringbuffer.c \ util/u_simple_shaders.c \ util/u_snprintf.c \ - util/u_stream_stdc.c \ - util/u_stream_wd.c \ util/u_surface.c \ util/u_texture.c \ util/u_tile.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index d5b325e7a31..255739b0a2a 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -83,6 +83,8 @@ source = [ 'indices/u_indices_gen.c', 'indices/u_unfilled_gen.c', 'os/os_misc.c', + 'os/os_stream_stdc.c', + 'os/os_stream_wd.c', 'os/os_time.c', 'pipebuffer/pb_buffer_fenced.c', 'pipebuffer/pb_buffer_malloc.c', @@ -151,8 +153,6 @@ source = [ 'util/u_ringbuffer.c', 'util/u_simple_shaders.c', 'util/u_snprintf.c', - 'util/u_stream_stdc.c', - 'util/u_stream_wd.c', 'util/u_surface.c', 'util/u_texture.c', 'util/u_tile.c', diff --git a/src/gallium/auxiliary/os/os_stream.h b/src/gallium/auxiliary/os/os_stream.h new file mode 100644 index 00000000000..bf30e6542d3 --- /dev/null +++ b/src/gallium/auxiliary/os/os_stream.h @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Cross-platform sequential access stream abstraction. + */ + +#ifndef _OS_STREAM_H_ +#define _OS_STREAM_H_ + + +#include "pipe/p_compiler.h" + + +struct os_stream; + + +/** + * Create a stream + * @param filename relative or absolute path (necessary for windows) + * @param optional maximum file size (0 for a growable size). + */ +struct os_stream * +os_stream_create(const char *filename, size_t max_size); + +boolean +os_stream_write(struct os_stream *stream, const void *data, size_t size); + +void +os_stream_flush(struct os_stream *stream); + +void +os_stream_close(struct os_stream *stream); + + +#endif /* _OS_STREAM_H_ */ diff --git a/src/gallium/auxiliary/os/os_stream_stdc.c b/src/gallium/auxiliary/os/os_stream_stdc.c new file mode 100644 index 00000000000..caa60c0b50f --- /dev/null +++ b/src/gallium/auxiliary/os/os_stream_stdc.c @@ -0,0 +1,105 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation based on the Standard C Library. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_UNIX) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) + +#include +#include + +#include "os_stream.h" + + +struct os_stream +{ + FILE *file; +}; + + +struct os_stream * +os_stream_create(const char *filename, size_t max_size) +{ + struct os_stream *stream; + + (void)max_size; + + stream = (struct os_stream *)calloc(1, sizeof(struct os_stream)); + if(!stream) + goto no_stream; + + stream->file = fopen(filename, "w"); + if(!stream->file) + goto no_file; + + return stream; + +no_file: + free(stream); +no_stream: + return NULL; +} + + +boolean +os_stream_write(struct os_stream *stream, const void *data, size_t size) +{ + if(!stream) + return FALSE; + + return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE; +} + + +void +os_stream_flush(struct os_stream *stream) +{ + if(!stream) + return; + + fflush(stream->file); +} + + +void +os_stream_close(struct os_stream *stream) +{ + if(!stream) + return; + + fclose(stream->file); + + free(stream); +} + + +#endif diff --git a/src/gallium/auxiliary/os/os_stream_wd.c b/src/gallium/auxiliary/os/os_stream_wd.c new file mode 100644 index 00000000000..a64cbcab4cf --- /dev/null +++ b/src/gallium/auxiliary/os/os_stream_wd.c @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation for the Windows Display driver. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +#include +#include + +#include "os_memory.h" +#include "os_stream.h" + + +#define MAP_FILE_SIZE (4*1024*1024) + + +struct os_stream +{ + char filename[MAX_PATH + 1]; + WCHAR wFileName[MAX_PATH + 1]; + boolean growable; + size_t map_size; + ULONG_PTR iFile; + char *pMap; + size_t written; + unsigned suffix; +}; + + +static INLINE boolean +os_stream_map(struct os_stream *stream) +{ + ULONG BytesInUnicodeString; + static char filename[MAX_PATH + 1]; + unsigned filename_len; + + if(stream->growable) + filename_len = snprintf(filename, + sizeof(filename), + "%s.%04x", + stream->filename, + stream->suffix++); + else + filename_len = snprintf(filename, + sizeof(filename), + "%s", + stream->filename); + + EngMultiByteToUnicodeN( + stream->wFileName, + sizeof(stream->wFileName), + &BytesInUnicodeString, + filename, + filename_len); + + stream->pMap = EngMapFile(stream->wFileName, stream->map_size, &stream->iFile); + if(!stream->pMap) + return FALSE; + + memset(stream->pMap, 0, stream->map_size); + stream->written = 0; + + return TRUE; +} + + +static INLINE void +os_stream_unmap(struct os_stream *stream) +{ + EngUnmapFile(stream->iFile); + if(stream->written < stream->map_size) { + /* Truncate file size */ + stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); + if(stream->pMap) + EngUnmapFile(stream->iFile); + } + + stream->pMap = NULL; +} + + +static INLINE void +os_stream_full_qualified_filename(char *dst, size_t size, const char *src) +{ + boolean need_drive, need_root; + + if((('A' <= src[0] && src[0] <= 'Z') || ('a' <= src[0] && src[0] <= 'z')) && src[1] == ':') { + need_drive = FALSE; + need_root = src[2] == '\\' ? FALSE : TRUE; + } + else { + need_drive = TRUE; + need_root = src[0] == '\\' ? FALSE : TRUE; + } + + snprintf(dst, size, + "\\??\\%s%s%s", + need_drive ? "C:" : "", + need_root ? "\\" : "", + src); +} + + +struct os_stream * +os_stream_create(const char *filename, size_t max_size) +{ + struct os_stream *stream; + + stream = CALLOC_STRUCT(os_stream); + if(!stream) + goto error1; + + os_stream_full_qualified_filename(stream->filename, + sizeof(stream->filename), + filename); + + if(max_size) { + stream->growable = FALSE; + stream->map_size = max_size; + } + else { + stream->growable = TRUE; + stream->map_size = MAP_FILE_SIZE; + } + + if(!os_stream_map(stream)) + goto error2; + + return stream; + +error2: + FREE(stream); +error1: + return NULL; +} + + +static INLINE void +os_stream_copy(struct os_stream *stream, const char *data, size_t size) +{ + assert(stream->written + size <= stream->map_size); + memcpy(stream->pMap + stream->written, data, size); + stream->written += size; +} + + +boolean +os_stream_write(struct os_stream *stream, const void *data, size_t size) +{ + if(!stream) + return FALSE; + + if(!stream->pMap) + return FALSE; + + while(stream->written + size > stream->map_size) { + size_t step = stream->map_size - stream->written; + os_stream_copy(stream, data, step); + data = (const char *)data + step; + size -= step; + + os_stream_unmap(stream); + if(!stream->growable || !os_stream_map(stream)) + return FALSE; + } + + os_stream_copy(stream, data, size); + + return TRUE; +} + + +void +os_stream_flush(struct os_stream *stream) +{ + (void)stream; +} + + +void +os_stream_close(struct os_stream *stream) +{ + if(!stream) + return; + + os_stream_unmap(stream); + + FREE(stream); +} + + +#endif diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 1e92d69b9ad..a8d18333d89 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -30,6 +30,7 @@ #include "pipe/p_config.h" #include "pipe/p_compiler.h" +#include "os/os_stream.h" #include "util/u_debug.h" #include "pipe/p_format.h" #include "pipe/p_state.h" @@ -37,7 +38,6 @@ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_string.h" -#include "util/u_stream.h" #include "util/u_math.h" #include "util/u_tile.h" #include "util/u_prim.h" @@ -606,7 +606,7 @@ debug_dump_float_rgba_bmp(const char *filename, float *rgba, unsigned stride) { #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT - struct util_stream *stream; + struct os_stream *stream; struct bmp_file_header bmfh; struct bmp_info_header bmih; unsigned x, y; @@ -632,12 +632,12 @@ debug_dump_float_rgba_bmp(const char *filename, bmih.biClrUsed = 0; bmih.biClrImportant = 0; - stream = util_stream_create(filename, bmfh.bfSize); + stream = os_stream_create(filename, bmfh.bfSize); if(!stream) goto error1; - util_stream_write(stream, &bmfh, 14); - util_stream_write(stream, &bmih, 40); + os_stream_write(stream, &bmfh, 14); + os_stream_write(stream, &bmih, 40); y = height; while(y--) { @@ -649,11 +649,11 @@ debug_dump_float_rgba_bmp(const char *filename, pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]); pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]); pixel.rgbAlpha = 255; - util_stream_write(stream, &pixel, 4); + os_stream_write(stream, &pixel, 4); } } - util_stream_close(stream); + os_stream_close(stream); error1: ; #endif diff --git a/src/gallium/auxiliary/util/u_stream.h b/src/gallium/auxiliary/util/u_stream.h deleted file mode 100644 index a9d0f0121a6..00000000000 --- a/src/gallium/auxiliary/util/u_stream.h +++ /dev/null @@ -1,61 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Cross-platform sequential access stream abstraction. - */ - -#ifndef U_STREAM_H -#define U_STREAM_H - - -#include "pipe/p_compiler.h" - - -struct util_stream; - - -/** - * Create a stream - * @param filename relative or absolute path (necessary for windows) - * @param optional maximum file size (0 for a growable size). - */ -struct util_stream * -util_stream_create(const char *filename, size_t max_size); - -boolean -util_stream_write(struct util_stream *stream, const void *data, size_t size); - -void -util_stream_flush(struct util_stream *stream); - -void -util_stream_close(struct util_stream *stream); - - -#endif /* U_STREAM_H */ diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c deleted file mode 100644 index 4d976d6dca4..00000000000 --- a/src/gallium/auxiliary/util/u_stream_stdc.c +++ /dev/null @@ -1,106 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Stream implementation based on the Standard C Library. - */ - -#include "pipe/p_config.h" - -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) - -#include - -#include "util/u_memory.h" - -#include "u_stream.h" - - -struct util_stream -{ - FILE *file; -}; - - -struct util_stream * -util_stream_create(const char *filename, size_t max_size) -{ - struct util_stream *stream; - - (void)max_size; - - stream = CALLOC_STRUCT(util_stream); - if(!stream) - goto error1; - - stream->file = fopen(filename, "w"); - if(!stream->file) - goto error2; - - return stream; - -error2: - FREE(stream); -error1: - return NULL; -} - - -boolean -util_stream_write(struct util_stream *stream, const void *data, size_t size) -{ - if(!stream) - return FALSE; - - return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE; -} - - -void -util_stream_flush(struct util_stream *stream) -{ - if(!stream) - return; - - fflush(stream->file); -} - - -void -util_stream_close(struct util_stream *stream) -{ - if(!stream) - return; - - fclose(stream->file); - - FREE(stream); -} - - -#endif diff --git a/src/gallium/auxiliary/util/u_stream_wd.c b/src/gallium/auxiliary/util/u_stream_wd.c deleted file mode 100644 index 864489e7755..00000000000 --- a/src/gallium/auxiliary/util/u_stream_wd.c +++ /dev/null @@ -1,224 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Stream implementation for the Windows Display driver. - */ - -#include "pipe/p_config.h" - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -#include -#include - -#include "util/u_memory.h" -#include "util/u_string.h" - -#include "u_stream.h" - - -#define MAP_FILE_SIZE (4*1024*1024) - - -struct util_stream -{ - char filename[MAX_PATH + 1]; - WCHAR wFileName[MAX_PATH + 1]; - boolean growable; - size_t map_size; - ULONG_PTR iFile; - char *pMap; - size_t written; - unsigned suffix; -}; - - -static INLINE boolean -util_stream_map(struct util_stream *stream) -{ - ULONG BytesInUnicodeString; - static char filename[MAX_PATH + 1]; - unsigned filename_len; - - if(stream->growable) - filename_len = util_snprintf(filename, - sizeof(filename), - "%s.%04x", - stream->filename, - stream->suffix++); - else - filename_len = util_snprintf(filename, - sizeof(filename), - "%s", - stream->filename); - - EngMultiByteToUnicodeN( - stream->wFileName, - sizeof(stream->wFileName), - &BytesInUnicodeString, - filename, - filename_len); - - stream->pMap = EngMapFile(stream->wFileName, stream->map_size, &stream->iFile); - if(!stream->pMap) - return FALSE; - - memset(stream->pMap, 0, stream->map_size); - stream->written = 0; - - return TRUE; -} - - -static INLINE void -util_stream_unmap(struct util_stream *stream) -{ - EngUnmapFile(stream->iFile); - if(stream->written < stream->map_size) { - /* Truncate file size */ - stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); - if(stream->pMap) - EngUnmapFile(stream->iFile); - } - - stream->pMap = NULL; -} - - -static INLINE void -util_stream_full_qualified_filename(char *dst, size_t size, const char *src) -{ - boolean need_drive, need_root; - - if((('A' <= src[0] && src[0] <= 'Z') || ('a' <= src[0] && src[0] <= 'z')) && src[1] == ':') { - need_drive = FALSE; - need_root = src[2] == '\\' ? FALSE : TRUE; - } - else { - need_drive = TRUE; - need_root = src[0] == '\\' ? FALSE : TRUE; - } - - util_snprintf(dst, size, - "\\??\\%s%s%s", - need_drive ? "C:" : "", - need_root ? "\\" : "", - src); -} - - -struct util_stream * -util_stream_create(const char *filename, size_t max_size) -{ - struct util_stream *stream; - - stream = CALLOC_STRUCT(util_stream); - if(!stream) - goto error1; - - util_stream_full_qualified_filename(stream->filename, - sizeof(stream->filename), - filename); - - if(max_size) { - stream->growable = FALSE; - stream->map_size = max_size; - } - else { - stream->growable = TRUE; - stream->map_size = MAP_FILE_SIZE; - } - - if(!util_stream_map(stream)) - goto error2; - - return stream; - -error2: - FREE(stream); -error1: - return NULL; -} - - -static INLINE void -util_stream_copy(struct util_stream *stream, const char *data, size_t size) -{ - assert(stream->written + size <= stream->map_size); - memcpy(stream->pMap + stream->written, data, size); - stream->written += size; -} - - -boolean -util_stream_write(struct util_stream *stream, const void *data, size_t size) -{ - if(!stream) - return FALSE; - - if(!stream->pMap) - return FALSE; - - while(stream->written + size > stream->map_size) { - size_t step = stream->map_size - stream->written; - util_stream_copy(stream, data, step); - data = (const char *)data + step; - size -= step; - - util_stream_unmap(stream); - if(!stream->growable || !util_stream_map(stream)) - return FALSE; - } - - util_stream_copy(stream, data, size); - - return TRUE; -} - - -void -util_stream_flush(struct util_stream *stream) -{ - (void)stream; -} - - -void -util_stream_close(struct util_stream *stream) -{ - if(!stream) - return; - - util_stream_unmap(stream); - - FREE(stream); -} - - -#endif -- cgit v1.2.3 From c61bf363937f40624a5632745630d4f2b9907082 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 8 Feb 2010 18:05:22 -0500 Subject: llvmpipe: export the tgsi translation code to a common layer the llvmpipe tgsi translation is a lot more complete than what was in gallivm so replacing the latter with the former. this is needed since the draw llvm paths will use the same code. effectively the proven llvmpipe code becomes gallivm. --- SConstruct | 16 +- configs/linux-llvm | 2 +- src/gallium/auxiliary/Makefile | 55 +- src/gallium/auxiliary/SConscript | 33 +- src/gallium/auxiliary/draw/draw_private.h | 6 - src/gallium/auxiliary/draw/draw_vs_llvm.c | 31 - src/gallium/auxiliary/gallivm/gallivm.cpp | 332 ----- src/gallium/auxiliary/gallivm/gallivm.h | 118 -- src/gallium/auxiliary/gallivm/gallivm_builtins.cpp | 140 -- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 243 ---- src/gallium/auxiliary/gallivm/gallivm_p.h | 110 -- src/gallium/auxiliary/gallivm/instructions.cpp | 1193 ---------------- src/gallium/auxiliary/gallivm/instructions.h | 175 --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 525 ------- src/gallium/auxiliary/gallivm/instructionssoa.h | 116 -- src/gallium/auxiliary/gallivm/llvm_builtins.c | 114 -- src/gallium/auxiliary/gallivm/loweringpass.cpp | 17 - src/gallium/auxiliary/gallivm/loweringpass.h | 15 - src/gallium/auxiliary/gallivm/lp_bld_alpha.c | 63 + src/gallium/auxiliary/gallivm/lp_bld_alpha.h | 54 + src/gallium/auxiliary/gallivm/lp_bld_arit.c | 1325 ++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_arit.h | 203 +++ src/gallium/auxiliary/gallivm/lp_bld_blend.h | 107 ++ src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c | 360 +++++ .../auxiliary/gallivm/lp_bld_blend_logicop.c | 109 ++ src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c | 298 ++++ src/gallium/auxiliary/gallivm/lp_bld_const.c | 369 +++++ src/gallium/auxiliary/gallivm/lp_bld_const.h | 108 ++ src/gallium/auxiliary/gallivm/lp_bld_conv.c | 469 +++++++ src/gallium/auxiliary/gallivm/lp_bld_conv.h | 73 + src/gallium/auxiliary/gallivm/lp_bld_debug.c | 132 ++ src/gallium/auxiliary/gallivm/lp_bld_debug.h | 64 + src/gallium/auxiliary/gallivm/lp_bld_depth.c | 213 +++ src/gallium/auxiliary/gallivm/lp_bld_depth.h | 63 + src/gallium/auxiliary/gallivm/lp_bld_flow.c | 757 ++++++++++ src/gallium/auxiliary/gallivm/lp_bld_flow.h | 151 ++ src/gallium/auxiliary/gallivm/lp_bld_format.h | 83 ++ src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 383 +++++ .../auxiliary/gallivm/lp_bld_format_query.c | 72 + src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 149 ++ src/gallium/auxiliary/gallivm/lp_bld_interp.c | 407 ++++++ src/gallium/auxiliary/gallivm/lp_bld_interp.h | 96 ++ src/gallium/auxiliary/gallivm/lp_bld_intr.c | 192 +++ src/gallium/auxiliary/gallivm/lp_bld_intr.h | 102 ++ src/gallium/auxiliary/gallivm/lp_bld_logic.c | 421 ++++++ src/gallium/auxiliary/gallivm/lp_bld_logic.h | 80 ++ src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 75 + src/gallium/auxiliary/gallivm/lp_bld_misc.h | 56 + src/gallium/auxiliary/gallivm/lp_bld_pack.c | 418 ++++++ src/gallium/auxiliary/gallivm/lp_bld_pack.h | 95 ++ src/gallium/auxiliary/gallivm/lp_bld_sample.c | 190 +++ src/gallium/auxiliary/gallivm/lp_bld_sample.h | 155 +++ src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 598 ++++++++ src/gallium/auxiliary/gallivm/lp_bld_struct.c | 72 + src/gallium/auxiliary/gallivm/lp_bld_struct.h | 75 + src/gallium/auxiliary/gallivm/lp_bld_swizzle.c | 239 ++++ src/gallium/auxiliary/gallivm/lp_bld_swizzle.h | 91 ++ src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 84 ++ src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 1467 ++++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_type.c | 222 +++ src/gallium/auxiliary/gallivm/lp_bld_type.h | 273 ++++ src/gallium/auxiliary/gallivm/soabuiltins.c | 210 --- src/gallium/auxiliary/gallivm/storage.cpp | 364 ----- src/gallium/auxiliary/gallivm/storage.h | 133 -- src/gallium/auxiliary/gallivm/storagesoa.cpp | 438 ------ src/gallium/auxiliary/gallivm/storagesoa.h | 107 -- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 1136 --------------- src/gallium/auxiliary/gallivm/tgsitollvm.h | 20 - src/gallium/drivers/llvmpipe/SConscript | 24 - src/gallium/drivers/llvmpipe/lp_bld_alpha.c | 63 - src/gallium/drivers/llvmpipe/lp_bld_alpha.h | 54 - src/gallium/drivers/llvmpipe/lp_bld_arit.c | 1325 ------------------ src/gallium/drivers/llvmpipe/lp_bld_arit.h | 203 --- src/gallium/drivers/llvmpipe/lp_bld_blend.h | 107 -- src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 360 ----- .../drivers/llvmpipe/lp_bld_blend_logicop.c | 109 -- src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c | 298 ---- src/gallium/drivers/llvmpipe/lp_bld_const.c | 369 ----- src/gallium/drivers/llvmpipe/lp_bld_const.h | 108 -- src/gallium/drivers/llvmpipe/lp_bld_conv.c | 469 ------- src/gallium/drivers/llvmpipe/lp_bld_conv.h | 73 - src/gallium/drivers/llvmpipe/lp_bld_debug.c | 132 -- src/gallium/drivers/llvmpipe/lp_bld_debug.h | 64 - src/gallium/drivers/llvmpipe/lp_bld_depth.c | 213 --- src/gallium/drivers/llvmpipe/lp_bld_depth.h | 63 - src/gallium/drivers/llvmpipe/lp_bld_flow.c | 757 ---------- src/gallium/drivers/llvmpipe/lp_bld_flow.h | 151 -- src/gallium/drivers/llvmpipe/lp_bld_format.h | 83 -- src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 383 ----- src/gallium/drivers/llvmpipe/lp_bld_format_query.c | 72 - src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 149 -- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 407 ------ src/gallium/drivers/llvmpipe/lp_bld_interp.h | 96 -- src/gallium/drivers/llvmpipe/lp_bld_intr.c | 192 --- src/gallium/drivers/llvmpipe/lp_bld_intr.h | 102 -- src/gallium/drivers/llvmpipe/lp_bld_logic.c | 421 ------ src/gallium/drivers/llvmpipe/lp_bld_logic.h | 80 -- src/gallium/drivers/llvmpipe/lp_bld_misc.cpp | 75 - src/gallium/drivers/llvmpipe/lp_bld_misc.h | 56 - src/gallium/drivers/llvmpipe/lp_bld_pack.c | 418 ------ src/gallium/drivers/llvmpipe/lp_bld_pack.h | 95 -- src/gallium/drivers/llvmpipe/lp_bld_sample.c | 190 --- src/gallium/drivers/llvmpipe/lp_bld_sample.h | 155 --- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 598 -------- src/gallium/drivers/llvmpipe/lp_bld_struct.c | 72 - src/gallium/drivers/llvmpipe/lp_bld_struct.h | 75 - src/gallium/drivers/llvmpipe/lp_bld_swizzle.c | 239 ---- src/gallium/drivers/llvmpipe/lp_bld_swizzle.h | 91 -- src/gallium/drivers/llvmpipe/lp_bld_tgsi.h | 84 -- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 1467 -------------------- src/gallium/drivers/llvmpipe/lp_bld_type.c | 222 --- src/gallium/drivers/llvmpipe/lp_bld_type.h | 273 ---- src/gallium/drivers/llvmpipe/lp_jit.c | 2 +- src/gallium/drivers/llvmpipe/lp_jit.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- src/gallium/drivers/llvmpipe/lp_state.h | 2 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 26 +- src/gallium/drivers/llvmpipe/lp_test.h | 2 +- src/gallium/drivers/llvmpipe/lp_test_blend.c | 6 +- src/gallium/drivers/llvmpipe/lp_test_conv.c | 8 +- src/gallium/drivers/llvmpipe/lp_test_format.c | 2 +- src/gallium/drivers/llvmpipe/lp_test_main.c | 4 +- src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c | 8 +- 123 files changed, 11105 insertions(+), 16658 deletions(-) delete mode 100644 src/gallium/auxiliary/gallivm/gallivm.cpp delete mode 100644 src/gallium/auxiliary/gallivm/gallivm.h delete mode 100644 src/gallium/auxiliary/gallivm/gallivm_builtins.cpp delete mode 100644 src/gallium/auxiliary/gallivm/gallivm_cpu.cpp delete mode 100644 src/gallium/auxiliary/gallivm/gallivm_p.h delete mode 100644 src/gallium/auxiliary/gallivm/instructions.cpp delete mode 100644 src/gallium/auxiliary/gallivm/instructions.h delete mode 100644 src/gallium/auxiliary/gallivm/instructionssoa.cpp delete mode 100644 src/gallium/auxiliary/gallivm/instructionssoa.h delete mode 100644 src/gallium/auxiliary/gallivm/llvm_builtins.c delete mode 100644 src/gallium/auxiliary/gallivm/loweringpass.cpp delete mode 100644 src/gallium/auxiliary/gallivm/loweringpass.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_alpha.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_alpha.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_arit.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_arit.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_blend.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_const.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_const.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_conv.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_conv.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_debug.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_debug.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_depth.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_depth.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_flow.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_flow.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_format.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_format_aos.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_format_query.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_format_soa.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_interp.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_interp.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_intr.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_intr.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_logic.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_logic.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_misc.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_pack.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_pack.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_sample.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_sample.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_struct.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_struct.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_swizzle.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_swizzle.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_type.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_type.h delete mode 100644 src/gallium/auxiliary/gallivm/soabuiltins.c delete mode 100644 src/gallium/auxiliary/gallivm/storage.cpp delete mode 100644 src/gallium/auxiliary/gallivm/storage.h delete mode 100644 src/gallium/auxiliary/gallivm/storagesoa.cpp delete mode 100644 src/gallium/auxiliary/gallivm/storagesoa.h delete mode 100644 src/gallium/auxiliary/gallivm/tgsitollvm.cpp delete mode 100644 src/gallium/auxiliary/gallivm/tgsitollvm.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_alpha.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_alpha.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_arit.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_arit.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_const.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_const.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_conv.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_conv.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_debug.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_debug.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_depth.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_depth.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_flow.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_flow.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format_aos.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format_query.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_interp.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_interp.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_intr.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_intr.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_logic.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_logic.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_misc.cpp delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_misc.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_pack.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_pack.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_struct.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_struct.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_swizzle.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_swizzle.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_tgsi.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_type.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bld_type.h (limited to 'src/gallium/auxiliary/SConscript') diff --git a/SConstruct b/SConstruct index 1c7550d7836..76ab9025e23 100644 --- a/SConstruct +++ b/SConstruct @@ -79,9 +79,10 @@ Help(opts.GenerateHelpText(env)) # replicate options values in local variables debug = env['debug'] dri = env['dri'] -llvm = env['llvm'] machine = env['machine'] platform = env['platform'] +drawllvm = 'llvmpipe' in env['drivers'] + # derived options x86 = machine == 'x86' @@ -94,7 +95,7 @@ Export([ 'x86', 'ppc', 'dri', - 'llvm', + 'drawllvm', 'platform', 'gcc', 'msvc', @@ -165,13 +166,12 @@ if dri: 'GLX_INDIRECT_RENDERING', ]) -# LLVM -if llvm: +# LLVM support in the Draw module +if drawllvm: # See also http://www.scons.org/wiki/UsingPkgConfig - env.ParseConfig('llvm-config --cflags --ldflags --libs backend bitreader engine instrumentation interpreter ipo') - env.Append(CPPDEFINES = ['MESA_LLVM']) - # Force C++ linkage - env['LINK'] = env['CXX'] + # currently --ldflags --libsdisabled since the driver will force the correct linkage + env.ParseConfig('llvm-config --cflags backend bitreader engine instrumentation interpreter ipo') + env.Append(CPPDEFINES = ['DRAW_LLVM']) # libGL if platform in ('linux', 'freebsd', 'darwin'): diff --git a/configs/linux-llvm b/configs/linux-llvm index 4bb5ff8868f..27e082ebf7e 100644 --- a/configs/linux-llvm +++ b/configs/linux-llvm @@ -12,7 +12,7 @@ GALLIUM_DRIVERS_DIRS += llvmpipe OPT_FLAGS = -O3 -ansi -pedantic ARCH_FLAGS = -m32 -mmmx -msse -msse2 -mstackrealign -DEFINES += -DNDEBUG -DGALLIUM_LLVMPIPE -DHAVE_UDIS86 +DEFINES += -DNDEBUG -DGALLIUM_LLVMPIPE -DDRAW_LLVM -DHAVE_UDIS86 # override -std=c99 CFLAGS += -std=gnu99 diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 66cfe9d89dd..238daa25172 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -130,41 +130,40 @@ C_SOURCES = \ vl/vl_shader_build.c GALLIVM_SOURCES = \ - gallivm/gallivm.cpp \ - gallivm/gallivm_cpu.cpp \ - gallivm/instructions.cpp \ - gallivm/loweringpass.cpp \ - gallivm/tgsitollvm.cpp \ - gallivm/storage.cpp \ - gallivm/storagesoa.cpp \ - gallivm/instructionssoa.cpp + gallivm/lp_bld_alpha.c \ + gallivm/lp_bld_arit.c \ + gallivm/lp_bld_blend_aos.c \ + gallivm/lp_bld_blend_logicop.c \ + gallivm/lp_bld_blend_soa.c \ + gallivm/lp_bld_const.c \ + gallivm/lp_bld_conv.c \ + gallivm/lp_bld_debug.c \ + gallivm/lp_bld_depth.c \ + gallivm/lp_bld_flow.c \ + gallivm/lp_bld_format_aos.c \ + gallivm/lp_bld_format_query.c \ + gallivm/lp_bld_format_soa.c \ + gallivm/lp_bld_interp.c \ + gallivm/lp_bld_intr.c \ + gallivm/lp_bld_logic.c \ + gallivm/lp_bld_pack.c \ + gallivm/lp_bld_sample.c \ + gallivm/lp_bld_sample_soa.c \ + gallivm/lp_bld_struct.c \ + gallivm/lp_bld_swizzle.c \ + gallivm/lp_bld_tgsi_soa.c \ + gallivm/lp_bld_type.c -INC_SOURCES = \ - gallivm/gallivm_builtins.cpp \ - gallivm/gallivmsoabuiltins.cpp -# XXX: gallivm doesn't build correctly so disable for now -#ifeq ($(MESA_LLVM),1) -#DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS -#CPP_SOURCES += \ -# $(GALLIVM_SOURCES) -#endif +ifeq ($(MESA_LLVM),1) +C_SOURCES += \ + $(GALLIVM_SOURCES) +endif include ../Makefile.template -gallivm/gallivm_builtins.cpp: gallivm/llvm_builtins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin - (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp1.bin - -gallivm/gallivmsoabuiltins.cpp: gallivm/soabuiltins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin - (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp2.bin - - indices/u_indices_gen.c: indices/u_indices_gen.py python $< > $@ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 255739b0a2a..e952e6affd7 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -166,16 +166,31 @@ source = [ 'vl/vl_shader_build.c', ] -if env['llvm']: +if drawllvm: source += [ - 'gallivm/gallivm.cpp', - 'gallivm/gallivm_cpu.cpp', - 'gallivm/instructions.cpp', - 'gallivm/loweringpass.cpp', - 'gallivm/tgsitollvm.cpp', - 'gallivm/storage.cpp', - 'gallivm/storagesoa.cpp', - 'gallivm/instructionssoa.cpp', + 'gallivm/lp_bld_alpha.c', + 'gallivm/lp_bld_arit.c', + 'gallivm/lp_bld_blend_aos.c', + 'gallivm/lp_bld_blend_logicop.c', + 'gallivm/lp_bld_blend_soa.c', + 'gallivm/lp_bld_const.c', + 'gallivm/lp_bld_conv.c', + 'gallivm/lp_bld_debug.c', + 'gallivm/lp_bld_depth.c', + 'gallivm/lp_bld_flow.c', + 'gallivm/lp_bld_format_aos.c', + 'gallivm/lp_bld_format_query.c', + 'gallivm/lp_bld_format_soa.c', + 'gallivm/lp_bld_interp.c', + 'gallivm/lp_bld_intr.c', + 'gallivm/lp_bld_logic.c', + 'gallivm/lp_bld_pack.c', + 'gallivm/lp_bld_sample.c', + 'gallivm/lp_bld_sample_soa.c', + 'gallivm/lp_bld_struct.c', + 'gallivm/lp_bld_swizzle.c', + 'gallivm/lp_bld_tgsi_soa.c', + 'gallivm/lp_bld_type.c', ] gallium = env.ConvenienceLibrary( diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 69466d8749d..1e6e01af9e2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -48,8 +48,6 @@ struct pipe_context; -struct gallivm_prog; -struct gallivm_cpu_engine; struct draw_vertex_shader; struct draw_context; struct draw_stage; @@ -193,10 +191,6 @@ struct draw_context uint num_samplers; struct tgsi_sampler **samplers; - /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. - */ - struct gallivm_cpu_engine *engine; - /* Here's another one: */ struct aos_machine *aos_machine; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index fd9166fda58..5f7a645f5d8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -42,11 +42,8 @@ #ifdef MESA_LLVM -#include "gallivm/gallivm.h" - struct draw_llvm_vertex_shader { struct draw_vertex_shader base; - struct gallivm_prog *llvm_prog; struct tgsi_exec_machine *machine; }; @@ -58,8 +55,6 @@ vs_llvm_prepare( struct draw_vertex_shader *base, } - - static void vs_llvm_run_linear( struct draw_vertex_shader *base, const float (*input)[4], @@ -71,11 +66,6 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, { struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; - - gallivm_cpu_vs_exec(shader->llvm_prog, shader->machine, - input, base->info.num_inputs, output, base->info.num_outputs, - (const float (*)[4])constants[0], - count, input_stride, output_stride); } @@ -122,27 +112,6 @@ draw_create_vs_llvm(struct draw_context *draw, vs->base.delete = vs_llvm_delete; vs->machine = draw->vs.machine; - { - struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); - gallivm_ir_set_layout(ir, GALLIVM_SOA); - gallivm_ir_set_components(ir, 4); - gallivm_ir_fill_from_tgsi(ir, vs->base.state.tokens); - vs->llvm_prog = gallivm_ir_compile(ir); - gallivm_ir_delete(ir); - } - - draw->vs.engine = gallivm_global_cpu_engine(); - - /* XXX: Why are there two versions of this? Shouldn't creating the - * engine be a separate operation to compiling a shader? - */ - if (!draw->vs.engine) { - draw->vs.engine = gallivm_cpu_engine_create(vs->llvm_prog); - } - else { - gallivm_cpu_jit_compile(draw->vs.engine, vs->llvm_prog); - } - return &vs->base; } diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp deleted file mode 100644 index f4af5cc8ad5..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ /dev/null @@ -1,332 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -#ifdef MESA_LLVM - -#include "gallivm.h" -#include "gallivm_p.h" - -#include "instructions.h" -#include "loweringpass.h" -#include "storage.h" -#include "tgsitollvm.h" - -#include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" - -#include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_dump.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int GLOBAL_ID = 0; - -using namespace llvm; - -static inline -void AddStandardCompilePasses(PassManager &PM) -{ - PM.add(new LoweringPass()); - PM.add(createVerifierPass()); // Verify that input is correct - - PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp - - //PM.add(createStripSymbolsPass(true)); - - PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst - PM.add(createCFGSimplificationPass()); // Clean up disgusting code - PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas - PM.add(createGlobalOptimizerPass()); // Optimize out global vars - PM.add(createGlobalDCEPass()); // Remove unused fns and globs - PM.add(createIPConstantPropagationPass());// IP Constant Propagation - PM.add(createDeadArgEliminationPass()); // Dead argument elimination - PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE - PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE - - PM.add(createPruneEHPass()); // Remove dead EH info - - PM.add(createFunctionInliningPass()); // Inline small functions - PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args - - PM.add(createTailDuplicationPass()); // Simplify cfg by copying code - PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. - PM.add(createCFGSimplificationPass()); // Merge & remove BBs - PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas - PM.add(createInstructionCombiningPass()); // Combine silly seq's - PM.add(createCondPropagationPass()); // Propagate conditionals - - PM.add(createTailCallEliminationPass()); // Eliminate tail calls - PM.add(createCFGSimplificationPass()); // Merge & remove BBs - PM.add(createReassociatePass()); // Reassociate expressions - PM.add(createLoopRotatePass()); - PM.add(createLICMPass()); // Hoist loop invariants - PM.add(createLoopUnswitchPass()); // Unswitch loops. - PM.add(createLoopIndexSplitPass()); // Index split loops. - PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc - PM.add(createIndVarSimplifyPass()); // Canonicalize indvars - PM.add(createLoopUnrollPass()); // Unroll small loops - PM.add(createInstructionCombiningPass()); // Clean up after the unroller - PM.add(createGVNPass()); // Remove redundancies - PM.add(createSCCPPass()); // Constant prop with SCCP - - // Run instcombine after redundancy elimination to exploit opportunities - // opened up by them. - PM.add(createInstructionCombiningPass()); - PM.add(createCondPropagationPass()); // Propagate conditionals - - PM.add(createDeadStoreEliminationPass()); // Delete dead stores - PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE' - PM.add(createCFGSimplificationPass()); // Merge & remove BBs - PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations - PM.add(createDeadTypeEliminationPass()); // Eliminate dead types - PM.add(createConstantMergePass()); // Merge dup global constants -} - -void gallivm_prog_delete(struct gallivm_prog *prog) -{ - delete prog->module; - prog->module = 0; - prog->function = 0; - free(prog); -} - -static inline void -constant_interpolation(float (*inputs)[16][4], - const struct tgsi_interp_coef *coefs, - unsigned attrib, - unsigned chan) -{ - unsigned i; - - for (i = 0; i < QUAD_SIZE; ++i) { - inputs[i][attrib][chan] = coefs[attrib].a0[chan]; - } -} - -static inline void -linear_interpolation(float (*inputs)[16][4], - const struct tgsi_interp_coef *coefs, - unsigned attrib, - unsigned chan) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - const float x = inputs[i][0][0]; - const float y = inputs[i][0][1]; - - inputs[i][attrib][chan] = - coefs[attrib].a0[chan] + - coefs[attrib].dadx[chan] * x + - coefs[attrib].dady[chan] * y; - } -} - -static inline void -perspective_interpolation(float (*inputs)[16][4], - const struct tgsi_interp_coef *coefs, - unsigned attrib, - unsigned chan ) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - const float x = inputs[i][0][0]; - const float y = inputs[i][0][1]; - /* WPOS.w here is really 1/w */ - const float w = 1.0f / inputs[i][0][3]; - assert(inputs[i][0][3] != 0.0); - - inputs[i][attrib][chan] = - (coefs[attrib].a0[chan] + - coefs[attrib].dadx[chan] * x + - coefs[attrib].dady[chan] * y) * w; - } -} - -void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix) -{ - if (!ir || !ir->module) - return; - - if (file_prefix) { - std::ostringstream stream; - stream << file_prefix; - stream << ir->id; - stream << ".ll"; - std::string name = stream.str(); - std::ofstream out(name.c_str()); - if (!out) { - std::cerr<<"Can't open file : "<module); - out.close(); - } else { - const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList(); - llvm::Module::FunctionListType::const_iterator itr; - std::cout<<"; ---------- Start shader "<id<id<num_interp; ++i) { - const gallivm_interpolate &interp = prog->interpolators[i]; - switch (interp.type) { - case TGSI_INTERPOLATE_CONSTANT: - constant_interpolation(inputs, coef, interp.attrib, interp.chan); - break; - - case TGSI_INTERPOLATE_LINEAR: - linear_interpolation(inputs, coef, interp.attrib, interp.chan); - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - perspective_interpolation(inputs, coef, interp.attrib, interp.chan); - break; - - default: - assert( 0 ); - } - } -} - - -struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type) -{ - struct gallivm_ir *ir = - (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir)); - ++GLOBAL_ID; - ir->id = GLOBAL_ID; - ir->type = type; - - return ir; -} - -void gallivm_ir_set_layout(struct gallivm_ir *ir, - enum gallivm_vector_layout layout) -{ - ir->layout = layout; -} - -void gallivm_ir_set_components(struct gallivm_ir *ir, int num) -{ - ir->num_components = num; -} - -void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, - const struct tgsi_token *tokens) -{ - std::cout << "Creating llvm from: " <module = mod; - gallivm_ir_dump(ir, 0); -} - -void gallivm_ir_delete(struct gallivm_ir *ir) -{ - delete ir->module; - free(ir); -} - -struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) -{ - struct gallivm_prog *prog = - (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); - - std::cout << "Before optimizations:"<module->dump(); - std::cout<<"-------------------------------"<module); - llvm::Module *mod = llvm::CloneModule(ir->module); - prog->num_consts = ir->num_consts; - memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); - prog->num_interp = ir->num_interp; - - /* Run optimization passes over it */ - PassManager passes; - passes.add(new TargetData(mod)); - AddStandardCompilePasses(passes); - passes.run(*mod); - prog->module = mod; - - std::cout << "After optimizations:"<dump(); - - return prog; -} - -#endif /* MESA_LLVM */ diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h deleted file mode 100644 index 36a64a77471..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm.h +++ /dev/null @@ -1,118 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ - -#ifndef GALLIVM_H -#define GALLIVM_H - -/* - LLVM representation consists of two stages - layout independent - intermediate representation gallivm_ir and driver specific - gallivm_prog. TGSI is first being translated into gallivm_ir - after that driver can set number of options on gallivm_ir and - have it compiled into gallivm_prog. gallivm_prog can be either - executed (assuming there's LLVM JIT backend for the current - target) or machine code generation can be done (assuming there's - a LLVM code generator for thecurrent target) - */ -#if defined __cplusplus -extern "C" { -#endif - -#include "pipe/p_state.h" - -#ifdef MESA_LLVM - -struct tgsi_token; - -struct gallivm_ir; -struct gallivm_prog; -struct gallivm_cpu_engine; -struct tgsi_interp_coef; -struct tgsi_sampler; -struct tgsi_exec_vector; - -enum gallivm_shader_type { - GALLIVM_VS, - GALLIVM_FS -}; - -enum gallivm_vector_layout { - GALLIVM_AOS, - GALLIVM_SOA -}; - -struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type); -void gallivm_ir_set_layout(struct gallivm_ir *ir, - enum gallivm_vector_layout layout); -void gallivm_ir_set_components(struct gallivm_ir *ir, int num); -void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, - const struct tgsi_token *tokens); -void gallivm_ir_delete(struct gallivm_ir *ir); - - -struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir); - -void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, - float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], - const struct tgsi_interp_coef *coefs); -void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); - - -struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); -struct gallivm_cpu_engine *gallivm_global_cpu_engine(); -int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_machine *machine, - const float (*input)[4], - unsigned num_inputs, - float (*output)[4], - unsigned num_outputs, - const float (*constants)[4], - unsigned count, - unsigned input_stride, - unsigned output_stride); -int gallivm_cpu_fs_exec(struct gallivm_prog *prog, - float x, float y, - float (*dests)[PIPE_MAX_SHADER_INPUTS][4], - float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], - float (*consts)[4], - struct tgsi_sampler *samplers); -void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog); -void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); - - -#endif /* MESA_LLVM */ - -#if defined __cplusplus -} -#endif - -#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp deleted file mode 100644 index 634bac01507..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ /dev/null @@ -1,140 +0,0 @@ -static const unsigned char llvm_builtins_data[] = { -0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x27,0x02,0x00,0x00,0x01,0x10,0x00,0x00, -0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39, -0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02, -0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24, -0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64, -0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x03,0x00,0x00,0x00,0x0b,0x84,0xff,0xff, -0xff,0xff,0x1f,0xc0,0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00, -0x32,0x22,0x48,0x09,0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45, -0xc6,0x05,0x42,0x52,0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83, -0x29,0x80,0x21,0x00,0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47, -0x80,0x50,0x2b,0x03,0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40, -0x14,0x01,0x80,0x11,0x80,0x22,0x88,0x00,0x13,0x30,0x7c,0xc0,0x03,0x3b,0xf8,0x05, -0x3b,0xa0,0x83,0x36,0xa8,0x07,0x77,0x58,0x07,0x77,0x78,0x87,0x7b,0x70,0x87,0x36, -0x60,0x87,0x74,0x70,0x87,0x7a,0xc0,0x87,0x36,0x38,0x07,0x77,0xa8,0x87,0x0d,0xf7, -0x50,0x0e,0x6d,0x00,0x0f,0x7a,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, -0x07,0x74,0xd0,0x06,0xe9,0x10,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e, -0x78,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06,0xe9,0x10,0x07,0x76,0xa0,0x07,0x71, -0x60,0x07,0x7a,0x10,0x07,0x76,0xd0,0x06,0xe9,0x30,0x07,0x72,0xa0,0x07,0x73,0x20, -0x07,0x7a,0x30,0x07,0x72,0xd0,0x06,0xe9,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07, -0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x7a, -0x30,0x07,0x72,0xd0,0x06,0xe6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, -0x07,0x74,0xd0,0x06,0xf6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60,0x07, -0x74,0xd0,0x06,0xf6,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a, -0x10,0x07,0x72,0x80,0x07,0x6d,0x10,0x0e,0x70,0xa0,0x07,0x70,0xa0,0x07,0x76,0x40, -0x07,0x6d,0x60,0x0e,0x78,0x00,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07, -0x72,0x80,0x07,0x3a,0x0f,0x84,0x48,0x20,0x23,0x24,0x40,0x00,0x62,0x67,0x88,0x9f, -0x19,0x92,0x24,0x00,0x10,0x04,0x00,0x00,0x00,0x43,0x92,0x04,0x08,0x00,0x00,0x00, -0x00,0x60,0x48,0xa2,0x00,0x40,0x10,0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x02, -0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x41,0x00,0x00,0x00,0x30,0x24,0x61,0x80,0x00, -0x00,0x00,0x00,0x00,0x86,0x24,0x07,0x10,0x00,0x00,0x00,0x00,0xc0,0x90,0x44,0x01, -0x80,0x20,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, -0x05,0x00,0x82,0x00,0x00,0x00,0x60,0x48,0x52,0x00,0x40,0x10,0x00,0x00,0x00,0x64, -0x81,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, -0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02, -0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66, -0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95, -0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00, -0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60, -0x04,0x00,0x00,0x00,0xc3,0x0d,0xce,0x43,0x4c,0x37,0x3c,0x8e,0x34,0xdc,0x30,0x41, -0xc2,0x74,0x03,0x34,0x51,0xc3,0x0d,0x4d,0x44,0x4c,0x37,0x44,0x8d,0x35,0x56,0x01, -0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0xd6,0x10,0x00,0xe6, -0x10,0x04,0x76,0x81,0x00,0x3e,0x30,0x0c,0x91,0x4f,0x1b,0x05,0x21,0x30,0x8f,0x6d, -0x13,0x48,0xe0,0x03,0xc3,0x10,0xf9,0xb4,0x55,0x20,0x81,0x0f,0x0c,0x43,0xe4,0xd7, -0x66,0x41,0x08,0xcc,0xa3,0x1f,0x40,0x41,0x34,0x53,0x84,0x99,0xc4,0x20,0x30,0x8f, -0x61,0x10,0x02,0xb0,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x27,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84, -0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00, -0x63,0x08,0x0d,0x34,0xdc,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, -0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19, -0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x27,0x50,0x20,0x05, -0xd1,0x0c,0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d, -0x14,0x13,0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4, -0xba,0x35,0x0c,0x13,0xf3,0xd8,0x05,0x31,0x31,0x8f,0x6e,0x1c,0x84,0x00,0x2c,0xcb, -0x01,0x14,0x44,0x33,0x45,0x98,0x61,0x0c,0x02,0xf3,0x00,0x00,0x00,0x00,0x00,0x00, -0x61,0x20,0x00,0x00,0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91, -0x11,0x00,0x00,0x00,0x63,0x08,0x4d,0x64,0x16,0xc1,0xe1,0x86,0xab,0x22,0x66,0x19, -0x02,0x01,0x1b,0x43,0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81, -0x0a,0x20,0x0b,0x34,0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x70, -0x83,0x57,0x11,0xb3,0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0, -0x48,0xb3,0x04,0xc6,0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48, -0x63,0x08,0xcd,0x64,0x64,0x40,0x70,0xb8,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc, -0x60,0x0c,0xc1,0x99,0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80, -0x33,0x38,0xd0,0x00,0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80, -0xe0,0x70,0x03,0x1b,0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12, -0x30,0x63,0x08,0x0f,0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7, -0xa0,0x06,0x70,0x00,0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x76,0x52,0x4c,0xcc,0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46, -0xc6,0x50,0x8a,0x89,0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89, -0x79,0x68,0x73,0x20,0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62, -0x9e,0xdb,0x32,0x88,0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98, -0xa7,0xb7,0x95,0x62,0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a, -0x34,0x35,0x56,0x62,0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25, -0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7, -0x24,0x01,0x63,0xec,0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9, -0xfc,0xc4,0xd0,0x90,0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6, -0xc1,0x71,0x7b,0x29,0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01, -0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08, -0x4e,0x33,0x58,0x47,0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e, -0x33,0xe1,0xbc,0xa5,0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e, -0x3a,0x40,0xc6,0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee, -0x6f,0x20,0x11,0x9c,0x66,0xf0,0x05,0xa7,0x99,0xec,0x82,0x10,0x9c,0xa6,0x32,0x93, -0x42,0x60,0x1e,0x7b,0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a, -0xa7,0x6d,0xa4,0x98,0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10, -0x9c,0x66,0xc0,0x7b,0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06, -0x8b,0xe0,0x34,0x83,0x2f,0x38,0xcd,0x64,0xd3,0x07,0x50,0x10,0xcd,0x14,0x61,0xe6, -0x61,0x08,0x4e,0x53,0xd5,0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00, -0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10, -0x1e,0xe1,0x19,0xc6,0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63, -0x15,0xc1,0x31,0x84,0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1, -0x71,0x6c,0x23,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48, -0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58, -0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7, -0x30,0x06,0x64,0xe0,0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82, -0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00, -0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb, -0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6, -0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5, -0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18, -0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f, -0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59, -0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8, -0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86, -0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72, -0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33, -0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3, -0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x26,0x41,0x08,0xc0, -0xb2,0x18,0x45,0x21,0x00,0xcb,0xb2,0x5b,0x04,0x31,0x31,0x8f,0x6d,0x13,0xc4,0xc4, -0x3c,0xb9,0x35,0x0c,0x15,0xb0,0x58,0x05,0x31,0x31,0x4f,0x7f,0x00,0x05,0xd1,0x4c, -0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00, -0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04, -0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca, -0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c, -0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x76,0x51,0x4c,0xcc, -0x53,0xdb,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31, -0x31,0x8f,0x6e,0x16,0xc4,0xc4,0x3c,0xbd,0x51,0x10,0x02,0xb0,0x2c,0xd6,0x30,0x54, -0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x2c,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34, -0xdc,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xdc,0x50,0x49,0xc4, -0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xdc,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60, -0x63,0x08,0x8d,0x33,0xdc,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00, -0x17,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb, -0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x76, -0x41,0x4c,0xcc,0xb3,0x1f,0x81,0x11,0x11,0x13,0x15,0x35,0x37,0x90,0x2c,0x4e,0xf4, -0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44, -0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0, -0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03, -0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82, -0x23,0x19,0xc3,0xa0,0x20,0x8b,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3, -0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, -0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, -0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84, -0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp deleted file mode 100644 index 1bd00a0c2a6..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ /dev/null @@ -1,243 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -#ifdef MESA_LLVM - -#include "gallivm.h" -#include "gallivm_p.h" - -#include "instructions.h" -#include "loweringpass.h" -#include "storage.h" -#include "tgsitollvm.h" - -#include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" - -#include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_dump.h" - -#include "util/u_memory.h" -#include "util/u_math.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -struct gallivm_cpu_engine { - llvm::ExecutionEngine *engine; -}; - -static struct gallivm_cpu_engine *CPU = 0; - -typedef int (*fragment_shader_runner)(float x, float y, - float (*dests)[16][4], - float (*inputs)[16][4], - int num_attribs, - float (*consts)[4], int num_consts, - struct tgsi_sampler *samplers); - -int gallivm_cpu_fs_exec(struct gallivm_prog *prog, - float fx, float fy, - float (*dests)[16][4], - float (*inputs)[16][4], - float (*consts)[4], - struct tgsi_sampler *samplers) -{ - fragment_shader_runner runner = reinterpret_cast(prog->function); - assert(runner); - - return runner(fx, fy, dests, inputs, prog->num_interp, - consts, prog->num_consts, - samplers); -} - -static inline llvm::Function *func_for_shader(struct gallivm_prog *prog) -{ - llvm::Module *mod = prog->module; - llvm::Function *func = 0; - - switch (prog->type) { - case GALLIVM_VS: - func = mod->getFunction("vs_shader"); - break; - case GALLIVM_FS: - func = mod->getFunction("fs_shader"); - break; - default: - assert(!"Unknown shader type!"); - break; - } - return func; -} - -/*! - This function creates a CPU based execution engine for the given gallivm_prog. - gallivm_cpu_engine should be used as a singleton throughout the library. Before - executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. - The gallivm_prog instance which is being passed to the constructor is being - automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile - with it again. - */ -struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) -{ - struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) - calloc(1, sizeof(struct gallivm_cpu_engine)); - llvm::Module *mod = static_cast(prog->module); - llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); - llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); - ee->DisableLazyCompilation(); - cpu->engine = ee; - - llvm::Function *func = func_for_shader(prog); - - prog->function = ee->getPointerToFunction(func); - CPU = cpu; - return cpu; -} - - -/*! - This function JIT compiles the given gallivm_prog with the given cpu based execution engine. - The reference to the generated machine code entry point will be stored - in the gallivm_prog program. After executing this function one can call gallivm_prog_exec - in order to execute the gallivm_prog on the CPU. - */ -void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) -{ - llvm::Module *mod = static_cast(prog->module); - llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); - llvm::ExecutionEngine *ee = cpu->engine; - assert(ee); - /*FIXME : why was this disabled ? we need it for pow/sqrt/... */ - ee->DisableLazyCompilation(false); - ee->addModuleProvider(mp); - - llvm::Function *func = func_for_shader(prog); - prog->function = ee->getPointerToFunction(func); -} - -void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) -{ - free(cpu); -} - -struct gallivm_cpu_engine * gallivm_global_cpu_engine() -{ - return CPU; -} - - -typedef void (*vertex_shader_runner)(void *ainputs, - void *dests, - float (*aconsts)[4]); - -#define MAX_TGSI_VERTICES 4 -/*! - This function is used to execute the gallivm_prog in software. Before calling - this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile - function. - */ -int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_machine *machine, - const float (*input)[4], - unsigned num_inputs, - float (*output)[4], - unsigned num_outputs, - const float (*constants)[4], - unsigned count, - unsigned input_stride, - unsigned output_stride ) -{ - unsigned int i, j; - unsigned slot; - vertex_shader_runner runner = reinterpret_cast(prog->function); - assert(runner); - - for (i = 0; i < count; i += MAX_TGSI_VERTICES) { - unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); - - /* Swizzle inputs. - */ - for (j = 0; j < max_vertices; j++) { - for (slot = 0; slot < num_inputs; slot++) { - machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; - machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; - machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; - machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; - } - - input = (const float (*)[4])((const char *)input + input_stride); - } - - /* run shader */ - runner(machine->Inputs, - machine->Outputs, - (float (*)[4]) constants); - - /* Unswizzle all output results - */ - for (j = 0; j < max_vertices; j++) { - for (slot = 0; slot < num_outputs; slot++) { - output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - output = (float (*)[4])((char *)output + output_stride); - } - } - - return 0; -} - -#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h deleted file mode 100644 index d2c5852bdf7..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm_p.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef GALLIVM_P_H -#define GALLIVM_P_H - -#ifdef MESA_LLVM - -#include "gallivm.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_compiler.h" - -namespace llvm { - class Module; -} - -#if defined __cplusplus -extern "C" { -#endif - -enum gallivm_shader_type; -enum gallivm_vector_layout; - -struct gallivm_interpolate { - int attrib; - int chan; - int type; -}; - -struct gallivm_ir { - llvm::Module *module; - int id; - enum gallivm_shader_type type; - enum gallivm_vector_layout layout; - int num_components; - int num_consts; - - /* FIXME: this might not be enough for some shaders */ - struct gallivm_interpolate interpolators[32*4]; - int num_interp; -}; - -struct gallivm_prog { - llvm::Module *module; - void *function; - - int id; - enum gallivm_shader_type type; - - int num_consts; - - /* FIXME: this might not be enough for some shaders */ - struct gallivm_interpolate interpolators[32*4]; - int num_interp; -}; - -static INLINE void gallivm_swizzle_components(int swizzle, - int *xc, int *yc, - int *zc, int *wc) -{ - int x = swizzle / 1000; swizzle -= x * 1000; - int y = swizzle / 100; swizzle -= y * 100; - int z = swizzle / 10; swizzle -= z * 10; - int w = swizzle; - - if (xc) *xc = x; - if (yc) *yc = y; - if (zc) *zc = z; - if (wc) *wc = w; -} - -static INLINE boolean gallivm_is_swizzle(int swizzle) -{ - const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + - TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; - return swizzle != NO_SWIZZLE; -} - -static INLINE int gallivm_x_swizzle(int swizzle) -{ - int x; - gallivm_swizzle_components(swizzle, &x, 0, 0, 0); - return x; -} - -static INLINE int gallivm_y_swizzle(int swizzle) -{ - int y; - gallivm_swizzle_components(swizzle, 0, &y, 0, 0); - return y; -} - -static INLINE int gallivm_z_swizzle(int swizzle) -{ - int z; - gallivm_swizzle_components(swizzle, 0, 0, &z, 0); - return z; -} - -static INLINE int gallivm_w_swizzle(int swizzle) -{ - int w; - gallivm_swizzle_components(swizzle, 0, 0, 0, &w); - return w; -} - -#if defined __cplusplus -} -#endif - -#endif /* MESA_LLVM */ - -#endif diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp deleted file mode 100644 index ee8162efce5..00000000000 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ /dev/null @@ -1,1193 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -#ifdef MESA_LLVM - -#include "instructions.h" - -#include "storage.h" - -#include "util/u_memory.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -using namespace llvm; - -#include "gallivm_builtins.cpp" - -#if 0 -llvm::Value *arrayFromChannels(std::vector &vals) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - ArrayType *vectorArray = ArrayType::get(vectorType, 4); -} -#endif - -static inline std::string createFuncName(int label) -{ - std::ostringstream stream; - stream << "function"; - stream << label; - return stream.str(); -} - -Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, - Storage *storage) - : m_mod(mod), m_func(func), m_builder(block), m_idx(0), - m_storage(storage) -{ - m_floatVecType = VectorType::get(Type::FloatTy, 4); - - m_llvmFSqrt = 0; - m_llvmFAbs = 0; - m_llvmPow = 0; - m_llvmFloor = 0; - m_llvmFlog = 0; - m_llvmFexp = 0; - m_llvmLit = 0; - m_fmtPtr = 0; - - MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( - (const char*)&llvm_builtins_data[0], - (const char*)&llvm_builtins_data[Elements(llvm_builtins_data)-1]); - m_mod = ParseBitcodeFile(buffer); -} - -llvm::BasicBlock * Instructions::currentBlock() const -{ - return m_builder.GetInsertBlock(); -} - -llvm::Value * Instructions::abs(llvm::Value *in) -{ - std::vector vec = extractVector(in); - Value *xabs = callFAbs(vec[0]); - Value *yabs = callFAbs(vec[1]); - Value *zabs = callFAbs(vec[2]); - Value *wabs = callFAbs(vec[3]); - return vectorFromVals(xabs, yabs, zabs, wabs); -} - -llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) -{ - return m_builder.CreateAdd(in1, in2, name("add")); -} - -llvm::Value * Instructions::arl(llvm::Value *in) -{ - return floor(in); -} - -void Instructions::beginLoop() -{ - BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); - BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); - - m_builder.CreateBr(begin); - Loop loop; - loop.begin = begin; - loop.end = end; - m_builder.SetInsertPoint(begin); - m_loopStack.push(loop); -} - -void Instructions::bgnSub(unsigned label) -{ - llvm::Function *func = findFunction(label); - - Function::arg_iterator args = func->arg_begin(); - Value *ptr_INPUT = args++; - ptr_INPUT->setName("INPUT"); - m_storage->pushArguments(ptr_INPUT); - - llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); - - m_func = func; - m_builder.SetInsertPoint(entry); -} - -void Instructions::brk() -{ - assert(!m_loopStack.empty()); - BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); - m_builder.CreateBr(m_loopStack.top().end); - m_builder.SetInsertPoint(unr); -} - -void Instructions::cal(int label, llvm::Value *input) -{ - std::vector params; - params.push_back(input); - llvm::Function *func = findFunction(label); - - m_builder.CreateCall(func, params.begin(), params.end()); -} - -llvm::Value * Instructions::ceil(llvm::Value *in) -{ - std::vector vec = extractVector(in); - return vectorFromVals(callCeil(vec[0]), callCeil(vec[1]), - callCeil(vec[2]), callCeil(vec[3])); -} - -llvm::Value * Instructions::clamp(llvm::Value *in1) -{ - llvm::Value *zero = constVector(0.0f, 0.0f, 0.0f, 0.0f); - llvm::Value *one = constVector(1.0f, 1.0f, 1.0f, 1.0f); - return min( max(zero, in1), one); -} - -llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - llvm::Function *func = m_mod->getFunction("cmp"); - assert(func); - - std::vector params; - params.push_back(in1); - params.push_back(in2); - params.push_back(in3); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - std::vector vec3 = extractVector(in3); - Constant *half = ConstantFP::get(APFloat(0.5f)); - - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], half, name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], half, name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], half, name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], half, name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -llvm::Value * Instructions::cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - std::vector vec3 = extractVector(in3); - Constant *zero = Constant::getNullValue(Type::FloatTy); - - Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], zero, name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], zero, name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], zero, name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], zero, name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -llvm::Value * Instructions::cos(llvm::Value *in) -{ -#if 0 - llvm::Function *func = m_mod->getFunction("vcos"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("cosres")); - call->setTailCall(false); - return call; -#else - std::vector elems = extractVector(in); - Function *func = m_mod->getFunction("cosf"); - assert(func); - CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); - cos->setCallingConv(CallingConv::C); - cos->setTailCall(true); - return vectorFromVals(cos, cos, cos, cos); -#endif -} - -llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) -{ - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z1")); - - Value *x2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(0), - name("x2")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *z2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(2), - name("z2")); - Value *y1z2 = mul(y1, z2); - Value *z1y2 = mul(z1, y2); - - Value *z1x2 = mul(z1, x2); - Value *x1z2 = mul(x1, z2); - - Value *x1y2 = mul(x1, y2); - Value *y1x2 = mul(y1, x2); - - return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); -} - -llvm::Value * Instructions::ddx(llvm::Value *in) -{ - // FIXME - assert(0); -} - -llvm::Value * Instructions::ddy(llvm::Value *in) -{ - // FIXME - assert(0); -} - -llvm::Value * Instructions::div(llvm::Value *in1, llvm::Value *in2) -{ - return m_builder.CreateFDiv(in1, in2, name("div")); -} - -llvm::Value * Instructions::dot2add(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - Value *mulRes = mul(in1, in2); - Value *x = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(0), - name("extractx")); - Value *y = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(1), - name("extracty")); - Value *z = m_builder.CreateExtractElement(in3, - m_storage->constantInt(2), - name("extractz")); - Value *xy = m_builder.CreateAdd(x, y,name("xy")); - Value *dot2add = m_builder.CreateAdd(xy, z, name("dot2add")); - return vectorFromVals(dot2add, dot2add, dot2add, dot2add); -} - -llvm::Value * Instructions::dp2(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - Value *x = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(0), - name("extractx")); - Value *y = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(1), - name("extracty")); - Value *xy = m_builder.CreateAdd(x, y,name("xy")); - return vectorFromVals(xy, xy, xy, xy); -} - -llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - Value *x = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(0), - name("extractx")); - Value *y = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(1), - name("extracty")); - Value *z = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(2), - name("extractz")); - Value *xy = m_builder.CreateAdd(x, y,name("xy")); - Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); - return vectorFromVals(dot3, dot3, dot3, dot3); -} - -llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - std::vector vec = extractVector(mulRes); - Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy")); - Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz")); - Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4")); - return vectorFromVals(dot4, dot4, dot4, dot4); -} - -llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - std::vector vec1 = extractVector(mulRes); - Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy")); - Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz")); - Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph")); - return vectorFromVals(dph, dph, dph, dph); -} - -llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) -{ - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *w = m_builder.CreateExtractElement(in2, - m_storage->constantInt(3), - name("w")); - Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); - return vectorFromVals(ConstantFP::get(APFloat(1.f)), - ry, z, w); -} - -void Instructions::elseop() -{ - assert(!m_ifStack.empty()); - BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); - m_builder.CreateBr(ifend); - m_builder.SetInsertPoint(m_ifStack.top()); - currentBlock()->setName(name("ifelse")); - m_ifStack.pop(); - m_ifStack.push(ifend); -} - -void Instructions::endif() -{ - assert(!m_ifStack.empty()); - m_builder.CreateBr(m_ifStack.top()); - m_builder.SetInsertPoint(m_ifStack.top()); - m_ifStack.pop(); -} - -void Instructions::endLoop() -{ - assert(!m_loopStack.empty()); - Loop loop = m_loopStack.top(); - m_builder.CreateBr(loop.begin); - loop.end->moveAfter(currentBlock()); - m_builder.SetInsertPoint(loop.end); - m_loopStack.pop(); -} - -void Instructions::end() -{ - m_builder.CreateRetVoid(); -} - -void Instructions::endSub() -{ - m_func = 0; - m_builder.SetInsertPoint(0); -} - -llvm::Value * Instructions::exp(llvm::Value *in) -{ - std::vector vec = extractVector(in); - return vectorFromVals(callFExp(vec[0]), callFExp(vec[1]), - callFExp(vec[2]), callFExp(vec[3])); -} - -llvm::Value * Instructions::ex2(llvm::Value *in) -{ - llvm::Value *val = callPow(ConstantFP::get(APFloat(2.f)), - m_builder.CreateExtractElement( - in, m_storage->constantInt(0), - name("x1"))); - return vectorFromVals(val, val, val, val); -} - -llvm::Value * Instructions::floor(llvm::Value *in) -{ - std::vector vec = extractVector(in); - return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]), - callFloor(vec[2]), callFloor(vec[3])); -} - -llvm::Value * Instructions::frc(llvm::Value *in) -{ - llvm::Value *flr = floor(in); - return sub(in, flr); -} - -void Instructions::ifop(llvm::Value *in) -{ - BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); - BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); - - //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); - //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); - //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); - - Constant *float0 = Constant::getNullValue(Type::FloatTy); - - Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), - name("extractx")); - Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); - m_builder.CreateCondBr(xcmp, ifthen, ifend); - //m_builder.SetInsertPoint(yblock); - - m_builder.SetInsertPoint(ifthen); - m_ifStack.push(ifend); -} - -llvm::Value * Instructions::kil(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("kil"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) -{ - llvm::Value *m = mul(in1, in2); - llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); - llvm::Value *s = sub(vec1, in1); - return add(m, mul(s, in3)); -} - -llvm::Value * Instructions::lg2(llvm::Value *in) -{ - std::vector vec = extractVector(in); - llvm::Value *const_vec = constVector(1.442695f, 1.442695f, - 1.442695f, 1.442695f); - return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), - callFLog(vec[2]), callFLog(vec[3])), const_vec); -} - -llvm::Value * Instructions::lit(llvm::Value *in) -{ - if (!m_llvmLit) { - m_llvmLit = m_mod->getFunction("lit"); - } - CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::log(llvm::Value *in) -{ - std::vector vec = extractVector(in); - return vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), - callFLog(vec[2]), callFLog(vec[3])); -} - -llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) -{ - Value *mulRes = mul(in1, in2); - return add(mulRes, in3); -} - -llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) -{ - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], - name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], - name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], - name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], - name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) -{ - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) -{ - return m_builder.CreateMul(in1, in2, name("mul")); -} - -llvm::Value * Instructions::neg(llvm::Value *in) -{ - Value *neg = m_builder.CreateNeg(in, name("neg")); - return neg; -} - -llvm::Value * Instructions::nrm(llvm::Value *in) -{ - llvm::Value *v = rsq(in); - return mul(v, in); -} - -llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) -{ - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *x2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(0), - name("x2")); - llvm::Value *val = callPow(x1, x2); - return vectorFromVals(val, val, val, val); -} - -llvm::Value * Instructions::rcp(llvm::Value *in1) -{ - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), - x1, name("rcp")); - return vectorFromVals(res, res, res, res); -} - -llvm::Value * Instructions::rsq(llvm::Value *in1) -{ - Value *x = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("extractx")); - Value *abs = callFAbs(x); - Value *sqrt = callFSqrt(abs); - - Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), - sqrt, - name("rsqrt")); - return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); -} - -llvm::Value * Instructions::scs(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("scs"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("scsres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::seq(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOEQ(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOEQ(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOEQ(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOEQ(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::sfl(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - return vectorFromVals(const0f, const0f, const0f, const0f); -} - -llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::sin(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("vsin"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("sinres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::sle(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOLE(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOLE(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOLE(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOLE(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::sne(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpONE(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpONE(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpONE(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpONE(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::str(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - - return vectorFromVals(const1f, const1f, const1f, const1f); -} - -llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) -{ - Value *res = m_builder.CreateSub(in1, in2, name("sub")); - return res; -} - -llvm::Value * Instructions::trunc(llvm::Value *in) -{ - std::vector vec = extractVector(in); - Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), - name("ftoix")); - Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), - name("ftoiy")); - Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), - name("ftoiz")); - Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), - name("ftoiw")); - Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, - name("fx")); - Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, - name("fy")); - Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, - name("fz")); - Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, - name("fw")); - return vectorFromVals(fx, fy, fz, fw); -} - -llvm::Value * Instructions::x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - std::vector vec3 = extractVector(in3); - - Value *x2x3 = m_builder.CreateMul( vec2[0], vec3[0], name("x2x3")); - Value *y2y3 = m_builder.CreateMul( vec2[1], vec3[1], name("y2y3")); - Value *x1px2x3 = m_builder.CreateAdd (vec1[0], x2x3, name("x1 + x2x3")); - Value *x1px2x3py2y3 = m_builder.CreateAdd (x1px2x3, y2y3, name("x1 + x2x3 + y2y3")); - - Value *x2z3 = m_builder.CreateMul( vec2[0], vec3[2], name("x2z3")); - Value *y2w3 = m_builder.CreateMul( vec2[1], vec3[3], name("y2w3")); - Value *y1px2z3 = m_builder.CreateAdd (vec1[1], x2z3, name("y1 + x2z3")); - Value *y1px2z3py2w3 = m_builder.CreateAdd (y1px2z3, y2w3, name("y1 + x2z3 + y2w3")); - - return vectorFromVals(x1px2x3py2y3, y1px2z3py2w3, x1px2x3py2y3, y1px2z3py2w3); -} - -void Instructions::printVector(llvm::Value *val) -{ - static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; - - if (!m_fmtPtr) { - Constant *format = ConstantArray::get(frmt, true); - ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); - GlobalVariable* globalFormat = new GlobalVariable( - /*Type=*/arrayTy, - /*isConstant=*/true, - /*Linkage=*/GlobalValue::InternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name(".str"), - m_mod); - globalFormat->setInitializer(format); - - Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); - std::vector const_ptr_21_indices; - const_ptr_21_indices.push_back(const_int0); - const_ptr_21_indices.push_back(const_int0); - m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, - &const_ptr_21_indices[0], const_ptr_21_indices.size()); - } - - Function *func_printf = m_mod->getFunction("printf"); - if (!func_printf) - func_printf = declarePrintf(); - assert(func_printf); - std::vector vec = extractVector(val); - Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); - Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); - Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); - Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); - std::vector params; - params.push_back(m_fmtPtr); - params.push_back(dx); - params.push_back(dy); - params.push_back(dz); - params.push_back(dw); - CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), - name("printf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(true); -} - -const char * Instructions::name(const char *prefix) -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -llvm::Value * Instructions::callCeil(llvm::Value *val) -{ - if (!m_llvmCeil) { - // predeclare the intrinsic - std::vector ceilArgs; - ceilArgs.push_back(Type::FloatTy); - AttrListPtr ceilPal; - FunctionType* ceilType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/ceilArgs, - /*isVarArg=*/false); - m_llvmCeil = Function::Create( - /*Type=*/ceilType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"ceilf", m_mod); - m_llvmCeil->setCallingConv(CallingConv::C); - m_llvmCeil->setAttributes(ceilPal); - } - CallInst *call = m_builder.CreateCall(m_llvmCeil, val, - name("ceilf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value *Instructions::callFAbs(llvm::Value *val) -{ - if (!m_llvmFAbs) { - // predeclare the intrinsic - std::vector fabsArgs; - fabsArgs.push_back(Type::FloatTy); - AttrListPtr fabsPal; - FunctionType* fabsType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fabsArgs, - /*isVarArg=*/false); - m_llvmFAbs = Function::Create( - /*Type=*/fabsType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"fabs", m_mod); - m_llvmFAbs->setCallingConv(CallingConv::C); - m_llvmFAbs->setAttributes(fabsPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, - name("fabs")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::callFExp(llvm::Value *val) -{ - if (!m_llvmFexp) { - // predeclare the intrinsic - std::vector fexpArgs; - fexpArgs.push_back(Type::FloatTy); - AttrListPtr fexpPal; - FunctionType* fexpType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fexpArgs, - /*isVarArg=*/false); - m_llvmFexp = Function::Create( - /*Type=*/fexpType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"expf", m_mod); - m_llvmFexp->setCallingConv(CallingConv::C); - m_llvmFexp->setAttributes(fexpPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFexp, val, - name("expf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::callFLog(llvm::Value *val) -{ - if (!m_llvmFlog) { - // predeclare the intrinsic - std::vector flogArgs; - flogArgs.push_back(Type::FloatTy); - AttrListPtr flogPal; - FunctionType* flogType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/flogArgs, - /*isVarArg=*/false); - m_llvmFlog = Function::Create( - /*Type=*/flogType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"logf", m_mod); - m_llvmFlog->setCallingConv(CallingConv::C); - m_llvmFlog->setAttributes(flogPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFlog, val, - name("logf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::callFloor(llvm::Value *val) -{ - if (!m_llvmFloor) { - // predeclare the intrinsic - std::vector floorArgs; - floorArgs.push_back(Type::FloatTy); - AttrListPtr floorPal; - FunctionType* floorType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/floorArgs, - /*isVarArg=*/false); - m_llvmFloor = Function::Create( - /*Type=*/floorType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"floorf", m_mod); - m_llvmFloor->setCallingConv(CallingConv::C); - m_llvmFloor->setAttributes(floorPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFloor, val, - name("floorf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value *Instructions::callFSqrt(llvm::Value *val) -{ - if (!m_llvmFSqrt) { - // predeclare the intrinsic - std::vector fsqrtArgs; - fsqrtArgs.push_back(Type::FloatTy); - AttrListPtr fsqrtPal; - FunctionType* fsqrtType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fsqrtArgs, - /*isVarArg=*/false); - m_llvmFSqrt = Function::Create( - /*Type=*/fsqrtType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.sqrt.f32", m_mod); - m_llvmFSqrt->setCallingConv(CallingConv::C); - m_llvmFSqrt->setAttributes(fsqrtPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, - name("sqrt")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) -{ - if (!m_llvmPow) { - // predeclare the intrinsic - std::vector powArgs; - powArgs.push_back(Type::FloatTy); - powArgs.push_back(Type::FloatTy); - AttrListPtr powPal; - FunctionType* powType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/powArgs, - /*isVarArg=*/false); - m_llvmPow = Function::Create( - /*Type=*/powType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.pow.f32", m_mod); - m_llvmPow->setCallingConv(CallingConv::C); - m_llvmPow->setAttributes(powPal); - } - std::vector params; - params.push_back(val1); - params.push_back(val2); - CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), - name("pow")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w) -{ - Constant *const_vec = Constant::getNullValue(m_floatVecType); - Value *res = m_builder.CreateInsertElement(const_vec, x, - m_storage->constantInt(0), - name("vecx")); - res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), - name("vecxy")); - res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), - name("vecxyz")); - if (w) - res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), - name("vecxyzw")); - return res; -} - -llvm::Value * Instructions::constVector(float x, float y, float z, float w) -{ - std::vector vec(4); - vec[0] = ConstantFP::get(APFloat(x)); - vec[1] = ConstantFP::get(APFloat(y)); - vec[2] = ConstantFP::get(APFloat(z)); - vec[3] = ConstantFP::get(APFloat(w)); - return ConstantVector::get(m_floatVecType, vec); -} - -llvm::Function * Instructions::declarePrintf() -{ - std::vector args; - AttrListPtr params; - FunctionType* funcTy = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/args, - /*isVarArg=*/true); - Function* func_printf = Function::Create( - /*Type=*/funcTy, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"printf", m_mod); - func_printf->setCallingConv(CallingConv::C); - func_printf->setAttributes(params); - return func_printf; -} - -llvm::Function * Instructions::declareFunc(int label) -{ - PointerType *vecPtr = PointerType::getUnqual(m_floatVecType); - std::vector args; - args.push_back(vecPtr); - args.push_back(vecPtr); - args.push_back(vecPtr); - args.push_back(vecPtr); - AttrListPtr params; - FunctionType *funcType = FunctionType::get( - /*Result=*/Type::VoidTy, - /*Params=*/args, - /*isVarArg=*/false); - std::string name = createFuncName(label); - Function *func = Function::Create( - /*Type=*/funcType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/name.c_str(), m_mod); - func->setCallingConv(CallingConv::C); - func->setAttributes(params); - return func; -} - -llvm::Function * Instructions::findFunction(int label) -{ - llvm::Function *func = m_functions[label]; - if (!func) { - func = declareFunc(label); - m_functions[label] = func; - } - return func; -} - -std::vector Instructions::extractVector(llvm::Value *vec) -{ - std::vector elems(4); - elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0), - name("x")); - elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1), - name("y")); - elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2), - name("z")); - elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3), - name("w")); - return elems; -} - - -#endif //MESA_LLVM - - diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h deleted file mode 100644 index e18571251ee..00000000000 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ /dev/null @@ -1,175 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ - -#ifndef INSTRUCTIONS_H -#define INSTRUCTIONS_H - -#include -#include -#include -#include - -#include -#include - -namespace llvm { - class VectorType; - class Function; -} - -class Storage; - -class Instructions -{ -public: - Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, - Storage *storage); - - llvm::BasicBlock *currentBlock() const; - - llvm::Value *abs(llvm::Value *in1); - llvm::Value *add(llvm::Value *in1, llvm::Value *in2); - llvm::Value *arl(llvm::Value *in1); - void beginLoop(); - void bgnSub(unsigned); - void brk(); - void cal(int label, llvm::Value *input); - llvm::Value *ceil(llvm::Value *in); - llvm::Value *clamp(llvm::Value *in); - llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *cos(llvm::Value *in); - llvm::Value *cross(llvm::Value *in1, llvm::Value *in2); - llvm::Value *ddx(llvm::Value *in); - llvm::Value *ddy(llvm::Value *in); - llvm::Value *div(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dot2add(llvm::Value *in, llvm::Value *in2, llvm::Value *in3); - llvm::Value *dp2(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dst(llvm::Value *in1, llvm::Value *in2); - void elseop(); - void endif(); - void endLoop(); - void end(); - void endSub(); - llvm::Value *exp(llvm::Value *in); - llvm::Value *ex2(llvm::Value *in); - llvm::Value *floor(llvm::Value *in); - llvm::Value *frc(llvm::Value *in); - void ifop(llvm::Value *in); - llvm::Value *kil(llvm::Value *in); - llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3); - llvm::Value *lg2(llvm::Value *in); - llvm::Value *lit(llvm::Value *in); - llvm::Value *log(llvm::Value *in); - llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3); - llvm::Value *max(llvm::Value *in1, llvm::Value *in2); - llvm::Value *min(llvm::Value *in1, llvm::Value *in2); - llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); - llvm::Value *neg(llvm::Value *in); - llvm::Value *nrm(llvm::Value *in); - llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); - llvm::Value *rcp(llvm::Value *in); - llvm::Value *rsq(llvm::Value *in); - llvm::Value *scs(llvm::Value *in); - llvm::Value *seq(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sfl(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sge(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sin(llvm::Value *in); - llvm::Value *sle(llvm::Value *in1, llvm::Value *in2); - llvm::Value *slt(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sne(llvm::Value *in1, llvm::Value *in2); - llvm::Value *str(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sub(llvm::Value *in1, llvm::Value *in2); - llvm::Value *trunc(llvm::Value *in); - llvm::Value *x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - - void printVector(llvm::Value *val); -private: - const char *name(const char *prefix); - - llvm::Value *callCeil(llvm::Value *val); - llvm::Value *callFAbs(llvm::Value *val); - llvm::Value *callFExp(llvm::Value *val); - llvm::Value *callFLog(llvm::Value *val); - llvm::Value *callFloor(llvm::Value *val); - llvm::Value *callFSqrt(llvm::Value *val); - llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2); - - llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w=0); - - llvm::Value *constVector(float x, float y, float z, float w); - - llvm::Function *declarePrintf(); - llvm::Function *declareFunc(int label); - - llvm::Function *findFunction(int label); - - std::vector extractVector(llvm::Value *vec); -private: - llvm::Module *m_mod; - llvm::Function *m_func; - char m_name[32]; - llvm::IRBuilder<> m_builder; - int m_idx; - - llvm::VectorType *m_floatVecType; - - llvm::Function *m_llvmCeil; - llvm::Function *m_llvmFSqrt; - llvm::Function *m_llvmFAbs; - llvm::Function *m_llvmPow; - llvm::Function *m_llvmFloor; - llvm::Function *m_llvmFlog; - llvm::Function *m_llvmFexp; - llvm::Function *m_llvmLit; - - llvm::Constant *m_fmtPtr; - - std::stack m_ifStack; - struct Loop { - llvm::BasicBlock *begin; - llvm::BasicBlock *end; - }; - std::stack m_loopStack; - std::map m_functions; - Storage *m_storage; -}; - -#endif diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp deleted file mode 100644 index 721b7d2d833..00000000000 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ /dev/null @@ -1,525 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include -#include "instructionssoa.h" - -#include "storagesoa.h" - -#include "pipe/p_shader_tokens.h" -#include "util/u_memory.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include - - -/* disable some warnings. this file is autogenerated */ -#if defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wunused-variable" -#endif -using namespace llvm; -#include "gallivmsoabuiltins.cpp" -#if defined(__GNUC__) -#pragma GCC diagnostic warning "-Wunused-variable" -#endif - -InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, - llvm::BasicBlock *block, StorageSoa *storage) - : m_builder(block), - m_storage(storage), - m_idx(0) -{ - createFunctionMap(); - createBuiltins(); -} - -const char * InstructionsSoa::name(const char *prefix) const -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - Constant *constVector = Constant::getNullValue(vectorType); - Value *res = m_builder.CreateInsertElement(constVector, x, - m_storage->constantInt(0), - name("vecx")); - res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), - name("vecxy")); - res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), - name("vecxyz")); - if (w) - res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), - name("vecxyzw")); - return res; -} - -void InstructionsSoa::end() -{ - m_builder.CreateRetVoid(); -} - -std::vector InstructionsSoa::extractVector(llvm::Value *vector) -{ - std::vector res(4); - res[0] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(0), - name("extract1X")); - res[1] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(1), - name("extract2X")); - res[2] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(2), - name("extract3X")); - res[3] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(3), - name("extract4X")); - - return res; -} - -llvm::IRBuilder<>* InstructionsSoa::getIRBuilder() -{ - return &m_builder; -} - -void InstructionsSoa::createFunctionMap() -{ - m_functionsMap[TGSI_OPCODE_ABS] = "abs"; - m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; - m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; - m_functionsMap[TGSI_OPCODE_MIN] = "min"; - m_functionsMap[TGSI_OPCODE_MAX] = "max"; - m_functionsMap[TGSI_OPCODE_POW] = "pow"; - m_functionsMap[TGSI_OPCODE_LIT] = "lit"; - m_functionsMap[TGSI_OPCODE_RSQ] = "rsq"; - m_functionsMap[TGSI_OPCODE_SLT] = "slt"; -} - -void InstructionsSoa::createDependencies() -{ - { - std::vector powDeps(2); - powDeps[0] = "powf"; - powDeps[1] = "powvec"; - m_builtinDependencies["pow"] = powDeps; - } - { - std::vector absDeps(2); - absDeps[0] = "fabsf"; - absDeps[1] = "absvec"; - m_builtinDependencies["abs"] = absDeps; - } - { - std::vector maxDeps(1); - maxDeps[0] = "maxvec"; - m_builtinDependencies["max"] = maxDeps; - } - { - std::vector minDeps(1); - minDeps[0] = "minvec"; - m_builtinDependencies["min"] = minDeps; - } - { - std::vector litDeps(4); - litDeps[0] = "minvec"; - litDeps[1] = "maxvec"; - litDeps[2] = "powf"; - litDeps[3] = "powvec"; - m_builtinDependencies["lit"] = litDeps; - } - { - std::vector rsqDeps(4); - rsqDeps[0] = "sqrtf"; - rsqDeps[1] = "sqrtvec"; - rsqDeps[2] = "fabsf"; - rsqDeps[3] = "absvec"; - m_builtinDependencies["rsq"] = rsqDeps; - } -} - -llvm::Function * InstructionsSoa::function(int op) -{ - if (m_functions.find(op) != m_functions.end()) - return m_functions[op]; - - std::string name = m_functionsMap[op]; - - std::cout <<"For op = "< deps = m_builtinDependencies[name]; - for (unsigned int i = 0; i < deps.size(); ++i) { - llvm::Function *func = m_builtins->getFunction(deps[i]); - std::cout <<"\tinjecting dep = '"<getName()<<"'"<getFunction(name); - injectFunction(originalFunc, op); - return m_functions[op]; -} - -llvm::Module * InstructionsSoa::currentModule() const -{ - BasicBlock *block = m_builder.GetInsertBlock(); - if (!block || !block->getParent()) - return 0; - - return block->getParent()->getParent(); -} - -void InstructionsSoa::createBuiltins() -{ - std::string ErrMsg; - MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( - (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data) - 1]); - m_builtins = ParseBitcodeFile(buffer, &ErrMsg); - std::cout<<"Builtins created at "< InstructionsSoa::abs(const std::vector in1) -{ - llvm::Function *func = function(TGSI_OPCODE_ABS); - return callBuiltin(func, in1); -} - -std::vector InstructionsSoa::add(const std::vector in1, - const std::vector in2) -{ - std::vector res(4); - - res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); - res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); - res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); - res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); - - return res; -} - -std::vector InstructionsSoa::arl(const std::vector in) -{ - std::vector res(4); - - //Extract x's - llvm::Value *x1 = m_builder.CreateExtractElement(in[0], - m_storage->constantInt(0), - name("extractX")); - //cast it to an unsigned int - x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); - - res[0] = x1;//vectorFromVals(x1, x2, x3, x4); - //only x is valid. the others shouldn't be necessary - /* - res[1] = Constant::getNullValue(m_floatVecType); - res[2] = Constant::getNullValue(m_floatVecType); - res[3] = Constant::getNullValue(m_floatVecType); - */ - - return res; -} - -std::vector InstructionsSoa::dp3(const std::vector in1, - const std::vector in2) -{ - llvm::Function *func = function(TGSI_OPCODE_DP3); - return callBuiltin(func, in1, in2); -} - -std::vector InstructionsSoa::lit(const std::vector in) -{ - llvm::Function *func = function(TGSI_OPCODE_LIT); - return callBuiltin(func, in); -} - -std::vector InstructionsSoa::madd(const std::vector in1, - const std::vector in2, - const std::vector in3) -{ - std::vector res = mul(in1, in2); - return add(res, in3); -} - -std::vector InstructionsSoa::max(const std::vector in1, - const std::vector in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MAX); - return callBuiltin(func, in1, in2); -} - -std::vector InstructionsSoa::min(const std::vector in1, - const std::vector in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MIN); - return callBuiltin(func, in1, in2); -} - -std::vector InstructionsSoa::mul(const std::vector in1, - const std::vector in2) -{ - std::vector res(4); - - res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); - res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); - res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); - res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); - - return res; -} - -std::vector InstructionsSoa::pow(const std::vector in1, - const std::vector in2) -{ - llvm::Function *func = function(TGSI_OPCODE_POW); - return callBuiltin(func, in1, in2); -} - -std::vector InstructionsSoa::rsq(const std::vector in) -{ - llvm::Function *func = function(TGSI_OPCODE_RSQ); - return callBuiltin(func, in); -} - -std::vector InstructionsSoa::slt(const std::vector in1, - const std::vector in2) -{ - llvm::Function *func = function(TGSI_OPCODE_SLT); - return callBuiltin(func, in1, in2); -} - -std::vector InstructionsSoa::sub(const std::vector in1, - const std::vector in2) -{ - std::vector res(4); - - res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); - res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); - res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); - res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); - - return res; -} - -void checkFunction(Function *func) -{ - for (Function::const_iterator BI = func->begin(), BE = func->end(); - BI != BE; ++BI) { - const BasicBlock &BB = *BI; - for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); - II != IE; ++II) { - const Instruction &I = *II; - std::cout<< "Instr = "<setOperand(op, V); - } - } - } -} - -llvm::Value * InstructionsSoa::allocaTemp() -{ - VectorType *vector = VectorType::get(Type::FloatTy, 4); - ArrayType *vecArray = ArrayType::get(vector, 4); - AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"), - m_builder.GetInsertBlock()); - - std::vector indices; - indices.push_back(m_storage->constantInt(0)); - indices.push_back(m_storage->constantInt(0)); - GetElementPtrInst *getElem = GetElementPtrInst::Create(alloca, - indices.begin(), - indices.end(), - name("allocaPtr"), - m_builder.GetInsertBlock()); - return getElem; -} - -std::vector InstructionsSoa::allocaToResult(llvm::Value *allocaPtr) -{ - GetElementPtrInst *xElemPtr = GetElementPtrInst::Create(allocaPtr, - m_storage->constantInt(0), - name("xPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *yElemPtr = GetElementPtrInst::Create(allocaPtr, - m_storage->constantInt(1), - name("yPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *zElemPtr = GetElementPtrInst::Create(allocaPtr, - m_storage->constantInt(2), - name("zPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *wElemPtr = GetElementPtrInst::Create(allocaPtr, - m_storage->constantInt(3), - name("wPtr"), - m_builder.GetInsertBlock()); - - std::vector res(4); - res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock()); - res[1] = new LoadInst(yElemPtr, name("yRes"), false, m_builder.GetInsertBlock()); - res[2] = new LoadInst(zElemPtr, name("zRes"), false, m_builder.GetInsertBlock()); - res[3] = new LoadInst(wElemPtr, name("wRes"), false, m_builder.GetInsertBlock()); - - return res; -} - -std::vector InstructionsSoa::dp4(const std::vector in1, - const std::vector in2) -{ - llvm::Function *func = function(TGSI_OPCODE_DP4); - return callBuiltin(func, in1, in2); -} - -std::vector InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector in1) -{ - std::vector params; - - llvm::Value *allocaPtr = allocaTemp(); - params.push_back(allocaPtr); - params.push_back(in1[0]); - params.push_back(in1[1]); - params.push_back(in1[2]); - params.push_back(in1[3]); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - - return allocaToResult(allocaPtr); -} - -std::vector InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector in1, - const std::vector in2) -{ - std::vector params; - - llvm::Value *allocaPtr = allocaTemp(); - params.push_back(allocaPtr); - params.push_back(in1[0]); - params.push_back(in1[1]); - params.push_back(in1[2]); - params.push_back(in1[3]); - params.push_back(in2[0]); - params.push_back(in2[1]); - params.push_back(in2[2]); - params.push_back(in2[3]); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - - return allocaToResult(allocaPtr); -} - -std::vector InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector in1, - const std::vector in2, - const std::vector in3) -{ - std::vector params; - - llvm::Value *allocaPtr = allocaTemp(); - params.push_back(allocaPtr); - params.push_back(in1[0]); - params.push_back(in1[1]); - params.push_back(in1[2]); - params.push_back(in1[3]); - params.push_back(in2[0]); - params.push_back(in2[1]); - params.push_back(in2[2]); - params.push_back(in2[3]); - params.push_back(in3[0]); - params.push_back(in3[1]); - params.push_back(in3[2]); - params.push_back(in3[3]); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - - return allocaToResult(allocaPtr); -} - -void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) -{ - assert(originalFunc); - std::cout << "injecting function originalFunc " <getName() <getFunction(originalFunc->getName()); - if (func) { - m_functions[op] = func; - return; - } - } - llvm::Function *func = 0; - if (originalFunc->isDeclaration()) { - func = Function::Create(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage, - originalFunc->getName(), currentModule()); - func->setCallingConv(CallingConv::C); - const AttrListPtr pal; - func->setAttributes(pal); - currentModule()->dump(); - } else { - DenseMap val; - val[m_builtins->getFunction("fabsf")] = currentModule()->getFunction("fabsf"); - val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf"); - val[m_builtins->getFunction("sqrtf")] = currentModule()->getFunction("sqrtf"); - func = CloneFunction(originalFunc, val); -#if 0 - std::cout <<" replacing "<getFunction("powf") - <<", with " <getFunction("powf")<getFunctionList().push_back(func); - } - if (op != TGSI_OPCODE_LAST) { - m_functions[op] = func; - } -} - - diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h deleted file mode 100644 index d6831e0a6b9..00000000000 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ /dev/null @@ -1,116 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INSTRUCTIONSSOA_H -#define INSTRUCTIONSSOA_H - -#include -#include - -#include -#include - -namespace llvm { - class Module; - class Function; - class BasicBlock; - class Value; -} -class StorageSoa; - -class InstructionsSoa -{ -public: - InstructionsSoa(llvm::Module *mod, llvm::Function *func, - llvm::BasicBlock *block, StorageSoa *storage); - - std::vector abs(const std::vector in1); - std::vector arl(const std::vector in); - std::vector add(const std::vector in1, - const std::vector in2); - std::vector dp3(const std::vector in1, - const std::vector in2); - std::vector dp4(const std::vector in1, - const std::vector in2); - std::vector lit(const std::vector in); - std::vector madd(const std::vector in1, - const std::vector in2, - const std::vector in3); - std::vector max(const std::vector in1, - const std::vector in2); - std::vector min(const std::vector in1, - const std::vector in2); - std::vector mul(const std::vector in1, - const std::vector in2); - std::vector pow(const std::vector in1, - const std::vector in2); - std::vector rsq(const std::vector in1); - std::vector slt(const std::vector in1, - const std::vector in2); - std::vector sub(const std::vector in1, - const std::vector in2); - void end(); - - std::vector extractVector(llvm::Value *vector); - llvm::IRBuilder<>* getIRBuilder(); -private: - const char * name(const char *prefix) const; - llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w); - void createFunctionMap(); - void createBuiltins(); - void createDependencies(); - llvm::Function *function(int); - llvm::Module *currentModule() const; - llvm::Value *allocaTemp(); - std::vector allocaToResult(llvm::Value *allocaPtr); - std::vector callBuiltin(llvm::Function *func, - const std::vector in1); - std::vector callBuiltin(llvm::Function *func, - const std::vector in1, - const std::vector in2); - std::vector callBuiltin(llvm::Function *func, - const std::vector in1, - const std::vector in2, - const std::vector in3); - void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST); -private: - llvm::IRBuilder<> m_builder; - StorageSoa *m_storage; - - std::map m_functionsMap; - std::map m_functions; - llvm::Module *m_builtins; - std::map > m_builtinDependencies; - -private: - mutable int m_idx; - mutable char m_name[32]; -}; - - -#endif diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c deleted file mode 100644 index d5a003a48b2..00000000000 --- a/src/gallium/auxiliary/gallivm/llvm_builtins.c +++ /dev/null @@ -1,114 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -typedef __attribute__(( ext_vector_type(4) )) float float4; - -extern float powf(float a, float b); - -inline float approx(float a, float b) -{ - if (b < -128.0f) b = -128.0f; - if (b > 128.0f) b = 128.0f; - if (a < 0) a = 0; - return powf(a, b); -} - -inline float4 lit(float4 tmp) -{ - float4 result; - result.x = 1.0; - result.w = 1.0; - if (tmp.x > 0) { - result.y = tmp.x; - result.z = approx(tmp.y, tmp.w); - } else { - result.y = 0; - result.z = 0; - } - return result; -} - -inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2) -{ - float4 result; - - result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x; - result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y; - result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z; - result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w; - - return result; -} - -extern float cosf(float val); -extern float sinf(float val); - -inline float4 vcos(float4 val) -{ - float4 result; - printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w); - result.x = cosf(val.x); - result.y = cosf(val.x); - result.z = cosf(val.x); - result.w = cosf(val.x); - printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w); - return result; -} - -inline float4 scs(float4 val) -{ - float4 result; - float tmp = val.x; - result.x = cosf(tmp); - result.y = sinf(tmp); - return result; -} - - -inline float4 vsin(float4 val) -{ - float4 result; - float tmp = val.x; - float res = sinf(tmp); - result.x = res; - result.y = res; - result.z = res; - result.w = res; - return result; -} - -inline int kil(float4 val) -{ - if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0) - return 1; - else - return 0; -} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.cpp b/src/gallium/auxiliary/gallivm/loweringpass.cpp deleted file mode 100644 index 556dbec3661..00000000000 --- a/src/gallium/auxiliary/gallivm/loweringpass.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "loweringpass.h" - -using namespace llvm; - -char LoweringPass::ID = 0; -RegisterPass X("lowering", "Lowering Pass"); - -LoweringPass::LoweringPass() - : ModulePass((intptr_t)&ID) -{ -} - -bool LoweringPass::runOnModule(Module &m) -{ - llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n"; - return false; -} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.h b/src/gallium/auxiliary/gallivm/loweringpass.h deleted file mode 100644 index f62dcf6ba73..00000000000 --- a/src/gallium/auxiliary/gallivm/loweringpass.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef LOWERINGPASS_H -#define LOWERINGPASS_H - -#include "llvm/Pass.h" -#include "llvm/Module.h" - -struct LoweringPass : public llvm::ModulePass -{ - static char ID; - LoweringPass(); - - virtual bool runOnModule(llvm::Module &m); -}; - -#endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.c b/src/gallium/auxiliary/gallivm/lp_bld_alpha.c new file mode 100644 index 00000000000..7245730350c --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.c @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Alpha testing to LLVM IR translation. + * + * @author Jose Fonseca + */ + +#include "pipe/p_state.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_logic.h" +#include "lp_bld_flow.h" +#include "lp_bld_debug.h" +#include "lp_bld_alpha.h" + + +void +lp_build_alpha_test(LLVMBuilderRef builder, + const struct pipe_alpha_state *state, + struct lp_type type, + struct lp_build_mask_context *mask, + LLVMValueRef alpha, + LLVMValueRef ref) +{ + struct lp_build_context bld; + + lp_build_context_init(&bld, builder, type); + + if(state->enabled) { + LLVMValueRef test = lp_build_cmp(&bld, state->func, alpha, ref); + + lp_build_name(test, "alpha_mask"); + + lp_build_mask_update(mask, test); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h new file mode 100644 index 00000000000..634575670db --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Alpha testing to LLVM IR translation. + * + * @author Jose Fonseca + */ + +#ifndef LP_BLD_ALPHA_H +#define LP_BLD_ALPHA_H + + +#include + +struct pipe_alpha_state; +struct lp_type; +struct lp_build_mask_context; + + +void +lp_build_alpha_test(LLVMBuilderRef builder, + const struct pipe_alpha_state *state, + struct lp_type type, + struct lp_build_mask_context *mask, + LLVMValueRef alpha, + LLVMValueRef ref); + + +#endif /* !LP_BLD_ALPHA_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c new file mode 100644 index 00000000000..54b31befe6d --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -0,0 +1,1325 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helper + * + * LLVM IR doesn't support all basic arithmetic operations we care about (most + * notably min/max and saturated operations), and it is often necessary to + * resort machine-specific intrinsics directly. The functions here hide all + * these implementation details from the other modules. + * + * We also do simple expressions simplification here. Reasons are: + * - it is very easy given we have all necessary information readily available + * - LLVM optimization passes fail to simplify several vector expressions + * - We often know value constraints which the optimization passes have no way + * of knowing, such as when source arguments are known to be in [0, 1] range. + * + * @author Jose Fonseca + */ + + +#include "util/u_memory.h" +#include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_string.h" +#include "util/u_cpu_detect.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_intr.h" +#include "lp_bld_logic.h" +#include "lp_bld_pack.h" +#include "lp_bld_debug.h" +#include "lp_bld_arit.h" + + +/** + * Generate min(a, b) + * No checks for special case values of a or b = 1 or 0 are done. + */ +static LLVMValueRef +lp_build_min_simple(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + const struct lp_type type = bld->type; + const char *intrinsic = NULL; + LLVMValueRef cond; + + /* TODO: optimize the constant case */ + + if(type.width * type.length == 128) { + if(type.floating) { + if(type.width == 32 && util_cpu_caps.has_sse) + intrinsic = "llvm.x86.sse.min.ps"; + if(type.width == 64 && util_cpu_caps.has_sse2) + intrinsic = "llvm.x86.sse2.min.pd"; + } + else { + if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) + intrinsic = "llvm.x86.sse2.pminu.b"; + if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) + intrinsic = "llvm.x86.sse41.pminsb"; + if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) + intrinsic = "llvm.x86.sse41.pminuw"; + if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) + intrinsic = "llvm.x86.sse2.pmins.w"; + if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) + intrinsic = "llvm.x86.sse41.pminud"; + if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) + intrinsic = "llvm.x86.sse41.pminsd"; + } + } + + if(intrinsic) + return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); + + cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b); + return lp_build_select(bld, cond, a, b); +} + + +/** + * Generate max(a, b) + * No checks for special case values of a or b = 1 or 0 are done. + */ +static LLVMValueRef +lp_build_max_simple(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + const struct lp_type type = bld->type; + const char *intrinsic = NULL; + LLVMValueRef cond; + + /* TODO: optimize the constant case */ + + if(type.width * type.length == 128) { + if(type.floating) { + if(type.width == 32 && util_cpu_caps.has_sse) + intrinsic = "llvm.x86.sse.max.ps"; + if(type.width == 64 && util_cpu_caps.has_sse2) + intrinsic = "llvm.x86.sse2.max.pd"; + } + else { + if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) + intrinsic = "llvm.x86.sse2.pmaxu.b"; + if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) + intrinsic = "llvm.x86.sse41.pmaxsb"; + if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) + intrinsic = "llvm.x86.sse41.pmaxuw"; + if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) + intrinsic = "llvm.x86.sse2.pmaxs.w"; + if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) + intrinsic = "llvm.x86.sse41.pmaxud"; + if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) + intrinsic = "llvm.x86.sse41.pmaxsd"; + } + } + + if(intrinsic) + return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); + + cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b); + return lp_build_select(bld, cond, a, b); +} + + +/** + * Generate 1 - a, or ~a depending on bld->type. + */ +LLVMValueRef +lp_build_comp(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + if(a == bld->one) + return bld->zero; + if(a == bld->zero) + return bld->one; + + if(type.norm && !type.floating && !type.fixed && !type.sign) { + if(LLVMIsConstant(a)) + return LLVMConstNot(a); + else + return LLVMBuildNot(bld->builder, a, ""); + } + + if(LLVMIsConstant(a)) + return LLVMConstSub(bld->one, a); + else + return LLVMBuildSub(bld->builder, bld->one, a, ""); +} + + +/** + * Generate a + b + */ +LLVMValueRef +lp_build_add(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + const struct lp_type type = bld->type; + LLVMValueRef res; + + if(a == bld->zero) + return b; + if(b == bld->zero) + return a; + if(a == bld->undef || b == bld->undef) + return bld->undef; + + if(bld->type.norm) { + const char *intrinsic = NULL; + + if(a == bld->one || b == bld->one) + return bld->one; + + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && + !type.floating && !type.fixed) { + if(type.width == 8) + intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; + if(type.width == 16) + intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; + } + + if(intrinsic) + return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); + } + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) + res = LLVMConstAdd(a, b); + else + res = LLVMBuildAdd(bld->builder, a, b, ""); + + /* clamp to ceiling of 1.0 */ + if(bld->type.norm && (bld->type.floating || bld->type.fixed)) + res = lp_build_min_simple(bld, res, bld->one); + + /* XXX clamp to floor of -1 or 0??? */ + + return res; +} + + +/** + * Generate a - b + */ +LLVMValueRef +lp_build_sub(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + const struct lp_type type = bld->type; + LLVMValueRef res; + + if(b == bld->zero) + return a; + if(a == bld->undef || b == bld->undef) + return bld->undef; + if(a == b) + return bld->zero; + + if(bld->type.norm) { + const char *intrinsic = NULL; + + if(b == bld->one) + return bld->zero; + + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && + !type.floating && !type.fixed) { + if(type.width == 8) + intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; + if(type.width == 16) + intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; + } + + if(intrinsic) + return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); + } + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) + res = LLVMConstSub(a, b); + else + res = LLVMBuildSub(bld->builder, a, b, ""); + + if(bld->type.norm && (bld->type.floating || bld->type.fixed)) + res = lp_build_max_simple(bld, res, bld->zero); + + return res; +} + + +/** + * Normalized 8bit multiplication. + * + * - alpha plus one + * + * makes the following approximation to the division (Sree) + * + * a*b/255 ~= (a*(b + 1)) >> 256 + * + * which is the fastest method that satisfies the following OpenGL criteria + * + * 0*0 = 0 and 255*255 = 255 + * + * - geometric series + * + * takes the geometric series approximation to the division + * + * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. + * + * in this case just the first two terms to fit in 16bit arithmetic + * + * t/255 ~= (t + (t >> 8)) >> 8 + * + * note that just by itself it doesn't satisfies the OpenGL criteria, as + * 255*255 = 254, so the special case b = 255 must be accounted or roundoff + * must be used + * + * - geometric series plus rounding + * + * when using a geometric series division instead of truncating the result + * use roundoff in the approximation (Jim Blinn) + * + * t/255 ~= (t + (t >> 8) + 0x80) >> 8 + * + * achieving the exact results + * + * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995, + * ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf + * @sa Michael Herf, The "double blend trick", May 2000, + * http://www.stereopsis.com/doubleblend.html + */ +static LLVMValueRef +lp_build_mul_u8n(LLVMBuilderRef builder, + struct lp_type i16_type, + LLVMValueRef a, LLVMValueRef b) +{ + LLVMValueRef c8; + LLVMValueRef ab; + + c8 = lp_build_int_const_scalar(i16_type, 8); + +#if 0 + + /* a*b/255 ~= (a*(b + 1)) >> 256 */ + b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); + ab = LLVMBuildMul(builder, a, b, ""); + +#else + + /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ + ab = LLVMBuildMul(builder, a, b, ""); + ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); + ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); + +#endif + + ab = LLVMBuildLShr(builder, ab, c8, ""); + + return ab; +} + + +/** + * Generate a * b + */ +LLVMValueRef +lp_build_mul(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + const struct lp_type type = bld->type; + LLVMValueRef shift; + LLVMValueRef res; + + if(a == bld->zero) + return bld->zero; + if(a == bld->one) + return b; + if(b == bld->zero) + return bld->zero; + if(b == bld->one) + return a; + if(a == bld->undef || b == bld->undef) + return bld->undef; + + if(!type.floating && !type.fixed && type.norm) { + if(type.width == 8) { + struct lp_type i16_type = lp_wider_type(type); + LLVMValueRef al, ah, bl, bh, abl, abh, ab; + + lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah); + lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh); + + /* PMULLW, PSRLW, PADDW */ + abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl); + abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh); + + ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh); + + return ab; + } + + /* FIXME */ + assert(0); + } + + if(type.fixed) + shift = lp_build_int_const_scalar(type, type.width/2); + else + shift = NULL; + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) { + res = LLVMConstMul(a, b); + if(shift) { + if(type.sign) + res = LLVMConstAShr(res, shift); + else + res = LLVMConstLShr(res, shift); + } + } + else { + res = LLVMBuildMul(bld->builder, a, b, ""); + if(shift) { + if(type.sign) + res = LLVMBuildAShr(bld->builder, res, shift, ""); + else + res = LLVMBuildLShr(bld->builder, res, shift, ""); + } + } + + return res; +} + + +/** + * Small vector x scale multiplication optimization. + */ +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b) +{ + LLVMValueRef factor; + + if(b == 0) + return bld->zero; + + if(b == 1) + return a; + + if(b == -1) + return LLVMBuildNeg(bld->builder, a, ""); + + if(b == 2 && bld->type.floating) + return lp_build_add(bld, a, a); + + if(util_is_pot(b)) { + unsigned shift = ffs(b) - 1; + + if(bld->type.floating) { +#if 0 + /* + * Power of two multiplication by directly manipulating the mantissa. + * + * XXX: This might not be always faster, it will introduce a small error + * for multiplication by zero, and it will produce wrong results + * for Inf and NaN. + */ + unsigned mantissa = lp_mantissa(bld->type); + factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); + a = LLVMBuildAdd(bld->builder, a, factor, ""); + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); + return a; +#endif + } + else { + factor = lp_build_const_scalar(bld->type, shift); + return LLVMBuildShl(bld->builder, a, factor, ""); + } + } + + factor = lp_build_const_scalar(bld->type, (double)b); + return lp_build_mul(bld, a, factor); +} + + +/** + * Generate a / b + */ +LLVMValueRef +lp_build_div(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + const struct lp_type type = bld->type; + + if(a == bld->zero) + return bld->zero; + if(a == bld->one) + return lp_build_rcp(bld, b); + if(b == bld->zero) + return bld->undef; + if(b == bld->one) + return a; + if(a == bld->undef || b == bld->undef) + return bld->undef; + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) + return LLVMConstFDiv(a, b); + + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) + return lp_build_mul(bld, a, lp_build_rcp(bld, b)); + + return LLVMBuildFDiv(bld->builder, a, b, ""); +} + + +/** + * Linear interpolation. + * + * This also works for integer values with a few caveats. + * + * @sa http://www.stereopsis.com/doubleblend.html + */ +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) +{ + LLVMValueRef delta; + LLVMValueRef res; + + delta = lp_build_sub(bld, v1, v0); + + res = lp_build_mul(bld, x, delta); + + res = lp_build_add(bld, v0, res); + + if(bld->type.fixed) + /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, + * but it will be wrong for other uses. Basically we need a more + * powerful lp_type, capable of further distinguishing the values + * interpretation from the value storage. */ + res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); + + return res; +} + + +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11) +{ + LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); + LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); + return lp_build_lerp(bld, y, v0, v1); +} + + +/** + * Generate min(a, b) + * Do checks for special cases. + */ +LLVMValueRef +lp_build_min(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + if(a == bld->undef || b == bld->undef) + return bld->undef; + + if(a == b) + return a; + + if(bld->type.norm) { + if(a == bld->zero || b == bld->zero) + return bld->zero; + if(a == bld->one) + return b; + if(b == bld->one) + return a; + } + + return lp_build_min_simple(bld, a, b); +} + + +/** + * Generate max(a, b) + * Do checks for special cases. + */ +LLVMValueRef +lp_build_max(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + if(a == bld->undef || b == bld->undef) + return bld->undef; + + if(a == b) + return a; + + if(bld->type.norm) { + if(a == bld->one || b == bld->one) + return bld->one; + if(a == bld->zero) + return b; + if(b == bld->zero) + return a; + } + + return lp_build_max_simple(bld, a, b); +} + + +/** + * Generate abs(a) + */ +LLVMValueRef +lp_build_abs(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + + if(!type.sign) + return a; + + if(type.floating) { + /* Mask out the sign bit */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + unsigned long long absMask = ~(1ULL << (type.width - 1)); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } + + if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { + switch(type.width) { + case 8: + return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); + case 16: + return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); + case 32: + return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); + } + } + + return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, "")); +} + + +LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef cond; + LLVMValueRef res; + + /* Handle non-zero case */ + if(!type.sign) { + /* if not zero then sign must be positive */ + res = bld->one; + } + else if(type.floating) { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef one; + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + one = LLVMConstBitCast(bld->one, int_vec_type); + res = LLVMBuildOr(bld->builder, sign, one, ""); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + else + { + LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); + res = lp_build_select(bld, cond, bld->one, minus_one); + } + + /* Handle zero */ + cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); + res = lp_build_select(bld, cond, bld->zero, bld->one); + + return res; +} + + +enum lp_build_round_sse41_mode +{ + LP_BUILD_ROUND_SSE41_NEAREST = 0, + LP_BUILD_ROUND_SSE41_FLOOR = 1, + LP_BUILD_ROUND_SSE41_CEIL = 2, + LP_BUILD_ROUND_SSE41_TRUNCATE = 3 +}; + + +static INLINE LLVMValueRef +lp_build_round_sse41(struct lp_build_context *bld, + LLVMValueRef a, + enum lp_build_round_sse41_mode mode) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + const char *intrinsic; + + assert(type.floating); + assert(type.width*type.length == 128); + assert(lp_check_value(type, a)); + assert(util_cpu_caps.has_sse4_1); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ps"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.pd"; + break; + default: + assert(0); + return bld->undef; + } + + return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, + LLVMConstInt(LLVMInt32Type(), mode, 0)); +} + + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } +} + + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_iround(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } +} + + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_ifloor(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } +} + + +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_iceil(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } +} + + +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_itrunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + assert(lp_check_value(type, a)); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); +} + + +LLVMValueRef +lp_build_iround(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); + } + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef half; + + /* get sign bit */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + + /* sign * 0.5 */ + half = lp_build_const_scalar(type, 0.5); + half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); + half = LLVMBuildOr(bld->builder, sign, half, ""); + half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); + + res = LLVMBuildAdd(bld->builder, a, half, ""); + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; +} + + +/** + * Convert float[] to int[] with floor(). + */ +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); + } + else { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef vec_type = lp_build_vec_type(type); + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef offset; + + /* sign = a < 0 ? ~0 : 0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + lp_build_name(sign, "floor.sign"); + + /* offset = -0.99999(9)f */ + offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); + offset = LLVMConstBitCast(offset, int_vec_type); + + /* offset = a < 0 ? -0.99999(9)f : 0.0f */ + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); + lp_build_name(offset, "floor.offset"); + + res = LLVMBuildAdd(bld->builder, a, offset, ""); + lp_build_name(res, "floor.res"); + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + lp_build_name(res, "floor"); + + return res; +} + + +LLVMValueRef +lp_build_iceil(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); + } + else { + assert(0); + res = bld->undef; + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; +} + + +LLVMValueRef +lp_build_sqrt(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + char intrinsic[32]; + + /* TODO: optimize the constant case */ + /* TODO: optimize the constant case */ + + assert(type.floating); + util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width); + + return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +} + + +LLVMValueRef +lp_build_rcp(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + if(a == bld->zero) + return bld->undef; + if(a == bld->one) + return bld->one; + if(a == bld->undef) + return bld->undef; + + assert(type.floating); + + if(LLVMIsConstant(a)) + return LLVMConstFDiv(bld->one, a); + + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) + /* FIXME: improve precision */ + return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); + + return LLVMBuildFDiv(bld->builder, bld->one, a, ""); +} + + +/** + * Generate 1/sqrt(a) + */ +LLVMValueRef +lp_build_rsqrt(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) + return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); + + return lp_build_rcp(bld, lp_build_sqrt(bld, a)); +} + + +/** + * Generate cos(a) + */ +LLVMValueRef +lp_build_cos(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + char intrinsic[32]; + + /* TODO: optimize the constant case */ + + assert(type.floating); + util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width); + + return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +} + + +/** + * Generate sin(a) + */ +LLVMValueRef +lp_build_sin(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + char intrinsic[32]; + + /* TODO: optimize the constant case */ + + assert(type.floating); + util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width); + + return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +} + + +/** + * Generate pow(x, y) + */ +LLVMValueRef +lp_build_pow(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y) +{ + /* TODO: optimize the constant case */ + if(LLVMIsConstant(x) && LLVMIsConstant(y)) + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); + + return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y)); +} + + +/** + * Generate exp(x) + */ +LLVMValueRef +lp_build_exp(struct lp_build_context *bld, + LLVMValueRef x) +{ + /* log2(e) = 1/log(2) */ + LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634); + + return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); +} + + +/** + * Generate log(x) + */ +LLVMValueRef +lp_build_log(struct lp_build_context *bld, + LLVMValueRef x) +{ + /* log(2) */ + LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529); + + return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); +} + + +#define EXP_POLY_DEGREE 3 +#define LOG_POLY_DEGREE 5 + + +/** + * Generate polynomial. + * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2]. + */ +static LLVMValueRef +lp_build_polynomial(struct lp_build_context *bld, + LLVMValueRef x, + const double *coeffs, + unsigned num_coeffs) +{ + const struct lp_type type = bld->type; + LLVMValueRef res = NULL; + unsigned i; + + /* TODO: optimize the constant case */ + if(LLVMIsConstant(x)) + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); + + for (i = num_coeffs; i--; ) { + LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); + if(res) + res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); + else + res = coeff; + } + + if(res) + return res; + else + return bld->undef; +} + + +/** + * Minimax polynomial fit of 2**x, in range [-0.5, 0.5[ + */ +const double lp_build_exp2_polynomial[] = { +#if EXP_POLY_DEGREE == 5 + 9.9999994e-1, 6.9315308e-1, 2.4015361e-1, 5.5826318e-2, 8.9893397e-3, 1.8775767e-3 +#elif EXP_POLY_DEGREE == 4 + 1.0000026, 6.9300383e-1, 2.4144275e-1, 5.2011464e-2, 1.3534167e-2 +#elif EXP_POLY_DEGREE == 3 + 9.9992520e-1, 6.9583356e-1, 2.2606716e-1, 7.8024521e-2 +#elif EXP_POLY_DEGREE == 2 + 1.0017247, 6.5763628e-1, 3.3718944e-1 +#else +#error +#endif +}; + + +void +lp_build_exp2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp2_int_part, + LLVMValueRef *p_frac_part, + LLVMValueRef *p_exp2) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef ipart = NULL; + LLVMValueRef fpart = NULL; + LLVMValueRef expipart = NULL; + LLVMValueRef expfpart = NULL; + LLVMValueRef res = NULL; + + if(p_exp2_int_part || p_frac_part || p_exp2) { + /* TODO: optimize the constant case */ + if(LLVMIsConstant(x)) + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); + + assert(type.floating && type.width == 32); + + x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); + x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); + + /* ipart = int(x - 0.5) */ + ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); + ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); + + /* fpart = x - ipart */ + fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); + fpart = LLVMBuildSub(bld->builder, x, fpart, ""); + } + + if(p_exp2_int_part || p_exp2) { + /* expipart = (float) (1 << ipart) */ + expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); + expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); + expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); + } + + if(p_exp2) { + expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, + Elements(lp_build_exp2_polynomial)); + + res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); + } + + if(p_exp2_int_part) + *p_exp2_int_part = expipart; + + if(p_frac_part) + *p_frac_part = fpart; + + if(p_exp2) + *p_exp2 = res; +} + + +LLVMValueRef +lp_build_exp2(struct lp_build_context *bld, + LLVMValueRef x) +{ + LLVMValueRef res; + lp_build_exp2_approx(bld, x, NULL, NULL, &res); + return res; +} + + +/** + * Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ + * These coefficients can be generate with + * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html + */ +const double lp_build_log2_polynomial[] = { +#if LOG_POLY_DEGREE == 6 + 3.11578814719469302614, -3.32419399085241980044, 2.59883907202499966007, -1.23152682416275988241, 0.318212422185251071475, -0.0344359067839062357313 +#elif LOG_POLY_DEGREE == 5 + 2.8882704548164776201, -2.52074962577807006663, 1.48116647521213171641, -0.465725644288844778798, 0.0596515482674574969533 +#elif LOG_POLY_DEGREE == 4 + 2.61761038894603480148, -1.75647175389045657003, 0.688243882994381274313, -0.107254423828329604454 +#elif LOG_POLY_DEGREE == 3 + 2.28330284476918490682, -1.04913055217340124191, 0.204446009836232697516 +#else +#error +#endif +}; + + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +void +lp_build_log2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp, + LLVMValueRef *p_floor_log2, + LLVMValueRef *p_log2) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); + LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); + LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); + + LLVMValueRef i = NULL; + LLVMValueRef exp = NULL; + LLVMValueRef mant = NULL; + LLVMValueRef logexp = NULL; + LLVMValueRef logmant = NULL; + LLVMValueRef res = NULL; + + if(p_exp || p_floor_log2 || p_log2) { + /* TODO: optimize the constant case */ + if(LLVMIsConstant(x)) + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); + + assert(type.floating && type.width == 32); + + i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); + + /* exp = (float) exponent(x) */ + exp = LLVMBuildAnd(bld->builder, i, expmask, ""); + } + + if(p_floor_log2 || p_log2) { + logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); + logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); + logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); + } + + if(p_log2) { + /* mant = (float) mantissa(x) */ + mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); + mant = LLVMBuildOr(bld->builder, mant, one, ""); + mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); + + logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, + Elements(lp_build_log2_polynomial)); + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); + + res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); + } + + if(p_exp) + *p_exp = exp; + + if(p_floor_log2) + *p_floor_log2 = logexp; + + if(p_log2) + *p_log2 = res; +} + + +LLVMValueRef +lp_build_log2(struct lp_build_context *bld, + LLVMValueRef x) +{ + LLVMValueRef res; + lp_build_log2_approx(bld, x, NULL, NULL, &res); + return res; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h new file mode 100644 index 00000000000..62be4b9aee1 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -0,0 +1,203 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper arithmetic functions. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_ARIT_H +#define LP_BLD_ARIT_H + + +#include + + +struct lp_type; +struct lp_build_context; + + +/** + * Complement, i.e., 1 - a. + */ +LLVMValueRef +lp_build_comp(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_add(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); + +LLVMValueRef +lp_build_sub(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); + +LLVMValueRef +lp_build_mul(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); + +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b); + +LLVMValueRef +lp_build_div(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); + +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1); + +/** + * Bilinear interpolation. + * + * Values indices are in v_{yx}. + */ +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11); + +LLVMValueRef +lp_build_min(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); + +LLVMValueRef +lp_build_max(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); + +LLVMValueRef +lp_build_abs(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); +LLVMValueRef +lp_build_iceil(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_iround(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_itrunc(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_sqrt(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_rcp(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_rsqrt(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_cos(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_sin(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_pow(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); + +LLVMValueRef +lp_build_exp(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_log(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_exp2(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_log2(struct lp_build_context *bld, + LLVMValueRef a); + +void +lp_build_exp2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp2_int_part, + LLVMValueRef *p_frac_part, + LLVMValueRef *p_exp2); + +void +lp_build_log2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp, + LLVMValueRef *p_floor_log2, + LLVMValueRef *p_log2); + +#endif /* !LP_BLD_ARIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h new file mode 100644 index 00000000000..da272e549f3 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_BLEND_H +#define LP_BLD_BLEND_H + + +/** + * @file + * LLVM IR building helpers interfaces. + * + * We use LLVM-C bindings for now. They are not documented, but follow the C++ + * interfaces very closely, and appear to be complete enough for code + * genration. See + * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html + * for a standalone example. + */ + +#include + +#include "pipe/p_format.h" + + +struct pipe_blend_state; +struct lp_type; +struct lp_build_context; + + +/** + * Whether the blending function is commutative or not. + */ +boolean +lp_build_blend_func_commutative(unsigned func); + + +/** + * Whether the blending functions are the reverse of each other. + */ +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func); + + +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2); + + +LLVMValueRef +lp_build_blend_aos(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + struct lp_type type, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef const_, + unsigned alpha_swizzle); + + +void +lp_build_blend_soa(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + struct lp_type type, + LLVMValueRef src[4], + LLVMValueRef dst[4], + LLVMValueRef const_[4], + LLVMValueRef res[4]); + + +/** + * Apply a logic op. + * + * src/dst parameters are packed values. It should work regardless the inputs + * are scalars, or a vector. + */ +LLVMValueRef +lp_build_logicop(LLVMBuilderRef builder, + unsigned logicop_func, + LLVMValueRef src, + LLVMValueRef dst); + + +#endif /* !LP_BLD_BLEND_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c new file mode 100644 index 00000000000..0215bb72ac6 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c @@ -0,0 +1,360 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- AoS layout. + * + * AoS blending is in general much slower than SoA, but there are some cases + * where it might be faster. In particular, if a pixel is rendered only once + * then the overhead of tiling and untiling will dominate over the speedup that + * SoA gives. So we might want to detect such cases and fallback to AoS in the + * future, but for now this function is here for historical/benchmarking + * purposes. + * + * Run lp_blend_test after any change to this file. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_state.h" +#include "util/u_debug.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_blend.h" +#include "lp_bld_debug.h" + + +/** + * We may the same values several times, so we keep them here to avoid + * recomputing them. Also reusing the values allows us to do simplifications + * that LLVM optimization passes wouldn't normally be able to do. + */ +struct lp_build_blend_aos_context +{ + struct lp_build_context base; + + LLVMValueRef src; + LLVMValueRef dst; + LLVMValueRef const_; + + LLVMValueRef inv_src; + LLVMValueRef inv_dst; + LLVMValueRef inv_const; + LLVMValueRef saturate; + + LLVMValueRef rgb_src_factor; + LLVMValueRef alpha_src_factor; + LLVMValueRef rgb_dst_factor; + LLVMValueRef alpha_dst_factor; +}; + + +static LLVMValueRef +lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, + unsigned factor, + boolean alpha) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return bld->base.zero; + case PIPE_BLENDFACTOR_ONE: + return bld->base.one; + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_SRC_ALPHA: + return bld->src; + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_DST_ALPHA: + return bld->dst; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + if(alpha) + return bld->base.one; + else { + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + if(!bld->saturate) + bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); + return bld->saturate; + } + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_CONST_ALPHA: + return bld->const_; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + if(!bld->inv_src) + bld->inv_src = lp_build_comp(&bld->base, bld->src); + return bld->inv_src; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + return bld->inv_dst; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + if(!bld->inv_const) + bld->inv_const = lp_build_comp(&bld->base, bld->const_); + return bld->inv_const; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + default: + assert(0); + return bld->base.zero; + } +} + + +enum lp_build_blend_swizzle { + LP_BUILD_BLEND_SWIZZLE_RGBA = 0, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1 +}; + + +/** + * How should we shuffle the base factor. + */ +static enum lp_build_blend_swizzle +lp_build_blend_factor_swizzle(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + case PIPE_BLENDFACTOR_ZERO: + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return LP_BUILD_BLEND_SWIZZLE_RGBA; + case PIPE_BLENDFACTOR_SRC_ALPHA: + case PIPE_BLENDFACTOR_DST_ALPHA: + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return LP_BUILD_BLEND_SWIZZLE_AAAA; + default: + assert(0); + return LP_BUILD_BLEND_SWIZZLE_RGBA; + } +} + + +static LLVMValueRef +lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, + LLVMValueRef rgb, + LLVMValueRef alpha, + enum lp_build_blend_swizzle rgb_swizzle, + unsigned alpha_swizzle) +{ + if(rgb == alpha) { + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) + return rgb; + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) + return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); + } + else { + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) { + boolean cond[4] = {0, 0, 0, 0}; + cond[alpha_swizzle] = 1; + return lp_build_select_aos(&bld->base, alpha, rgb, cond); + } + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { + unsigned char swizzle[4]; + swizzle[0] = alpha_swizzle; + swizzle[1] = alpha_swizzle; + swizzle[2] = alpha_swizzle; + swizzle[3] = alpha_swizzle; + swizzle[alpha_swizzle] += 4; + return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle); + } + } + assert(0); + return bld->base.undef; +} + + +/** + * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml + */ +static LLVMValueRef +lp_build_blend_factor(struct lp_build_blend_aos_context *bld, + LLVMValueRef factor1, + unsigned rgb_factor, + unsigned alpha_factor, + unsigned alpha_swizzle) +{ + LLVMValueRef rgb_factor_; + LLVMValueRef alpha_factor_; + LLVMValueRef factor2; + enum lp_build_blend_swizzle rgb_swizzle; + + rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); + alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); + + rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); + + factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); + + return lp_build_mul(&bld->base, factor1, factor2); +} + + +boolean +lp_build_blend_func_commutative(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + case PIPE_BLEND_MIN: + case PIPE_BLEND_MAX: + return TRUE; + case PIPE_BLEND_SUBTRACT: + case PIPE_BLEND_REVERSE_SUBTRACT: + return FALSE; + default: + assert(0); + return TRUE; + } +} + + +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) +{ + if(rgb_func == alpha_func) + return FALSE; + if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) + return TRUE; + if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) + return TRUE; + return FALSE; +} + + +/** + * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml + */ +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2) +{ + switch (func) { + case PIPE_BLEND_ADD: + return lp_build_add(bld, term1, term2); + break; + case PIPE_BLEND_SUBTRACT: + return lp_build_sub(bld, term1, term2); + case PIPE_BLEND_REVERSE_SUBTRACT: + return lp_build_sub(bld, term2, term1); + case PIPE_BLEND_MIN: + return lp_build_min(bld, term1, term2); + case PIPE_BLEND_MAX: + return lp_build_max(bld, term1, term2); + default: + assert(0); + return bld->zero; + } +} + + +LLVMValueRef +lp_build_blend_aos(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + struct lp_type type, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef const_, + unsigned alpha_swizzle) +{ + struct lp_build_blend_aos_context bld; + LLVMValueRef src_term; + LLVMValueRef dst_term; + + /* FIXME */ + assert(blend->independent_blend_enable == 0); + assert(blend->rt[0].colormask == 0xf); + + if(!blend->rt[0].blend_enable) + return src; + + /* It makes no sense to blend unless values are normalized */ + assert(type.norm); + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + bld.src = src; + bld.dst = dst; + bld.const_ = const_; + + /* TODO: There are still a few optimization opportunities here. For certain + * combinations it is possible to reorder the operations and therefore saving + * some instructions. */ + + src_term = lp_build_blend_factor(&bld, src, blend->rt[0].rgb_src_factor, + blend->rt[0].alpha_src_factor, alpha_swizzle); + dst_term = lp_build_blend_factor(&bld, dst, blend->rt[0].rgb_dst_factor, + blend->rt[0].alpha_dst_factor, alpha_swizzle); + + lp_build_name(src_term, "src_term"); + lp_build_name(dst_term, "dst_term"); + + if(blend->rt[0].rgb_func == blend->rt[0].alpha_func) { + return lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); + } + else { + /* Seperate RGB / A functions */ + + LLVMValueRef rgb; + LLVMValueRef alpha; + + rgb = lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); + alpha = lp_build_blend_func(&bld.base, blend->rt[0].alpha_func, src_term, dst_term); + + return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c b/src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c new file mode 100644 index 00000000000..1eac0a5c891 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- logic ops. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_state.h" +#include "util/u_debug.h" + +#include "lp_bld_blend.h" + + +LLVMValueRef +lp_build_logicop(LLVMBuilderRef builder, + unsigned logicop_func, + LLVMValueRef src, + LLVMValueRef dst) +{ + LLVMTypeRef type; + LLVMValueRef res; + + type = LLVMTypeOf(src); + + switch (logicop_func) { + case PIPE_LOGICOP_CLEAR: + res = LLVMConstNull(type); + break; + case PIPE_LOGICOP_NOR: + res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), ""); + break; + case PIPE_LOGICOP_AND_INVERTED: + res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, ""); + break; + case PIPE_LOGICOP_COPY_INVERTED: + res = LLVMBuildNot(builder, src, ""); + break; + case PIPE_LOGICOP_AND_REVERSE: + res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), ""); + break; + case PIPE_LOGICOP_INVERT: + res = LLVMBuildNot(builder, dst, ""); + break; + case PIPE_LOGICOP_XOR: + res = LLVMBuildXor(builder, src, dst, ""); + break; + case PIPE_LOGICOP_NAND: + res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), ""); + break; + case PIPE_LOGICOP_AND: + res = LLVMBuildAnd(builder, src, dst, ""); + break; + case PIPE_LOGICOP_EQUIV: + res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), ""); + break; + case PIPE_LOGICOP_NOOP: + res = dst; + break; + case PIPE_LOGICOP_OR_INVERTED: + res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, ""); + break; + case PIPE_LOGICOP_COPY: + res = src; + break; + case PIPE_LOGICOP_OR_REVERSE: + res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), ""); + break; + case PIPE_LOGICOP_OR: + res = LLVMBuildOr(builder, src, dst, ""); + break; + case PIPE_LOGICOP_SET: + res = LLVMConstAllOnes(type); + break; + default: + assert(0); + res = src; + } + + return res; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c new file mode 100644 index 00000000000..6d5a45db7a3 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c @@ -0,0 +1,298 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- SoA layout. + * + * Blending in SoA is much faster than AoS, especially when separate rgb/alpha + * factors/functions are used, since no channel masking/shuffling is necessary + * and we can achieve the full throughput of the SIMD operations. Furthermore + * the fragment shader output is also in SoA, so it fits nicely with the rest of + * the fragment pipeline. + * + * The drawback is that to be displayed the color buffer needs to be in AoS + * layout, so we need to tile/untile the color buffer before/after rendering. + * A color buffer like + * + * R11 G11 B11 A11 R12 G12 B12 A12 R13 G13 B13 A13 R14 G14 B14 A14 ... + * R21 G21 B21 A21 R22 G22 B22 A22 R23 G23 B23 A23 R24 G24 B24 A24 ... + * + * R31 G31 B31 A31 R32 G32 B32 A32 R33 G33 B33 A33 R34 G34 B34 A34 ... + * R41 G41 B41 A41 R42 G42 B42 A42 R43 G43 B43 A43 R44 G44 B44 A44 ... + * + * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... + * + * will actually be stored in memory as + * + * R11 R12 R21 R22 R13 R14 R23 R24 ... G11 G12 G21 G22 G13 G14 G23 G24 ... B11 B12 B21 B22 B13 B14 B23 B24 ... A11 A12 A21 A22 A13 A14 A23 A24 ... + * R31 R32 R41 R42 R33 R34 R43 R44 ... G31 G32 G41 G42 G33 G34 G43 G44 ... B31 B32 B41 B42 B33 B34 B43 B44 ... A31 A32 A41 A42 A33 A34 A43 A44 ... + * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... + * + * NOTE: Run lp_blend_test after any change to this file. + * + * You can also run lp_blend_test to obtain AoS vs SoA benchmarks. Invoking it + * as: + * + * lp_blend_test -o blend.tsv + * + * will generate a tab-seperated-file with the test results and performance + * measurements. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_state.h" +#include "util/u_debug.h" + +#include "lp_bld_type.h" +#include "lp_bld_arit.h" +#include "lp_bld_blend.h" + + +/** + * We may the same values several times, so we keep them here to avoid + * recomputing them. Also reusing the values allows us to do simplifications + * that LLVM optimization passes wouldn't normally be able to do. + */ +struct lp_build_blend_soa_context +{ + struct lp_build_context base; + + LLVMValueRef src[4]; + LLVMValueRef dst[4]; + LLVMValueRef con[4]; + + LLVMValueRef inv_src[4]; + LLVMValueRef inv_dst[4]; + LLVMValueRef inv_con[4]; + + LLVMValueRef src_alpha_saturate; + + /** + * We store all factors in a table in order to eliminate redundant + * multiplications later. + */ + LLVMValueRef factor[2][2][4]; + + /** + * Table with all terms. + */ + LLVMValueRef term[2][4]; +}; + + +static LLVMValueRef +lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, + unsigned factor, unsigned i) +{ + /* + * Compute src/first term RGB + */ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + return bld->base.one; + case PIPE_BLENDFACTOR_SRC_COLOR: + return bld->src[i]; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return bld->src[3]; + case PIPE_BLENDFACTOR_DST_COLOR: + return bld->dst[i]; + case PIPE_BLENDFACTOR_DST_ALPHA: + return bld->dst[3]; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + if(i == 3) + return bld->base.one; + else { + if(!bld->inv_dst[3]) + bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); + if(!bld->src_alpha_saturate) + bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]); + return bld->src_alpha_saturate; + } + case PIPE_BLENDFACTOR_CONST_COLOR: + return bld->con[i]; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return bld->con[3]; + case PIPE_BLENDFACTOR_SRC1_COLOR: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_ZERO: + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + if(!bld->inv_src[i]) + bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]); + return bld->inv_src[i]; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + if(!bld->inv_src[3]) + bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]); + return bld->inv_src[3]; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + if(!bld->inv_dst[i]) + bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]); + return bld->inv_dst[i]; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + if(!bld->inv_dst[3]) + bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); + return bld->inv_dst[3]; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + if(!bld->inv_con[i]) + bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]); + return bld->inv_con[i]; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + if(!bld->inv_con[3]) + bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]); + return bld->inv_con[3]; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + default: + assert(0); + return bld->base.zero; + } +} + + +/** + * Generate blend code in SOA mode. + * \param src src/fragment color + * \param dst dst/framebuffer color + * \param con constant blend color + * \param res the result/output + */ +void +lp_build_blend_soa(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + struct lp_type type, + LLVMValueRef src[4], + LLVMValueRef dst[4], + LLVMValueRef con[4], + LLVMValueRef res[4]) +{ + struct lp_build_blend_soa_context bld; + unsigned i, j, k; + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + for (i = 0; i < 4; ++i) { + bld.src[i] = src[i]; + bld.dst[i] = dst[i]; + bld.con[i] = con[i]; + } + + for (i = 0; i < 4; ++i) { + if (blend->rt[0].colormask & (1 << i)) { + if (blend->logicop_enable) { + if(!type.floating) { + res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); + } + else + res[i] = dst[i]; + } + else if (blend->rt[0].blend_enable) { + unsigned src_factor = i < 3 ? blend->rt[0].rgb_src_factor : blend->rt[0].alpha_src_factor; + unsigned dst_factor = i < 3 ? blend->rt[0].rgb_dst_factor : blend->rt[0].alpha_dst_factor; + unsigned func = i < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; + boolean func_commutative = lp_build_blend_func_commutative(func); + + /* It makes no sense to blend unless values are normalized */ + assert(type.norm); + + /* + * Compute src/dst factors. + */ + + bld.factor[0][0][i] = src[i]; + bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i); + bld.factor[1][0][i] = dst[i]; + bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i); + + /* + * Compute src/dst terms + */ + + for(k = 0; k < 2; ++k) { + /* See if this multiplication has been previously computed */ + for(j = 0; j < i; ++j) { + if((bld.factor[k][0][j] == bld.factor[k][0][i] && + bld.factor[k][1][j] == bld.factor[k][1][i]) || + (bld.factor[k][0][j] == bld.factor[k][1][i] && + bld.factor[k][1][j] == bld.factor[k][0][i])) + break; + } + + if(j < i) + bld.term[k][i] = bld.term[k][j]; + else + bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); + } + + /* + * Combine terms + */ + + /* See if this function has been previously applied */ + for(j = 0; j < i; ++j) { + unsigned prev_func = j < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; + unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); + + if((!func_reverse && + bld.term[0][j] == bld.term[0][i] && + bld.term[1][j] == bld.term[1][i]) || + ((func_commutative || func_reverse) && + bld.term[0][j] == bld.term[1][i] && + bld.term[1][j] == bld.term[0][i])) + break; + } + + if(j < i) + res[i] = res[j]; + else + res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]); + } + else { + res[i] = src[i]; + } + } + else { + res[i] = dst[i]; + } + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c new file mode 100644 index 00000000000..c8eaa8c3940 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -0,0 +1,369 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helper functions for constant building. + * + * @author Jose Fonseca + */ + +#include + +#include "util/u_debug.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" + + +unsigned +lp_mantissa(struct lp_type type) +{ + assert(type.floating); + + if(type.floating) { + switch(type.width) { + case 32: + return 23; + case 64: + return 53; + default: + assert(0); + return 0; + } + } + else { + if(type.sign) + return type.width - 1; + else + return type.width; + } +} + + +/** + * Shift of the unity. + * + * Same as lp_const_scale(), but in terms of shifts. + */ +unsigned +lp_const_shift(struct lp_type type) +{ + if(type.floating) + return 0; + else if(type.fixed) + return type.width/2; + else if(type.norm) + return type.sign ? type.width - 1 : type.width; + else + return 0; +} + + +unsigned +lp_const_offset(struct lp_type type) +{ + if(type.floating || type.fixed) + return 0; + else if(type.norm) + return 1; + else + return 0; +} + + +/** + * Scaling factor between the LLVM native value and its interpretation. + * + * This is 1.0 for all floating types and unnormalized integers, and something + * else for the fixed points types and normalized integers. + */ +double +lp_const_scale(struct lp_type type) +{ + unsigned long long llscale; + double dscale; + + llscale = (unsigned long long)1 << lp_const_shift(type); + llscale -= lp_const_offset(type); + dscale = (double)llscale; + assert((unsigned long long)dscale == llscale); + + return dscale; +} + + +/** + * Minimum value representable by the type. + */ +double +lp_const_min(struct lp_type type) +{ + unsigned bits; + + if(!type.sign) + return 0.0; + + if(type.norm) + return -1.0; + + if (type.floating) { + switch(type.width) { + case 32: + return -FLT_MAX; + case 64: + return -DBL_MAX; + default: + assert(0); + return 0.0; + } + } + + if(type.fixed) + /* FIXME: consider the fractional bits? */ + bits = type.width / 2 - 1; + else + bits = type.width - 1; + + return (double)-((long long)1 << bits); +} + + +/** + * Maximum value representable by the type. + */ +double +lp_const_max(struct lp_type type) +{ + unsigned bits; + + if(type.norm) + return 1.0; + + if (type.floating) { + switch(type.width) { + case 32: + return FLT_MAX; + case 64: + return DBL_MAX; + default: + assert(0); + return 0.0; + } + } + + if(type.fixed) + bits = type.width / 2; + else + bits = type.width; + + if(type.sign) + bits -= 1; + + return (double)(((unsigned long long)1 << bits) - 1); +} + + +double +lp_const_eps(struct lp_type type) +{ + if (type.floating) { + switch(type.width) { + case 32: + return FLT_EPSILON; + case 64: + return DBL_EPSILON; + default: + assert(0); + return 0.0; + } + } + else { + double scale = lp_const_scale(type); + return 1.0/scale; + } +} + + +LLVMValueRef +lp_build_undef(struct lp_type type) +{ + LLVMTypeRef vec_type = lp_build_vec_type(type); + return LLVMGetUndef(vec_type); +} + + +LLVMValueRef +lp_build_zero(struct lp_type type) +{ + LLVMTypeRef vec_type = lp_build_vec_type(type); + return LLVMConstNull(vec_type); +} + + +LLVMValueRef +lp_build_one(struct lp_type type) +{ + LLVMTypeRef elem_type; + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + + elem_type = lp_build_elem_type(type); + + if(type.floating) + elems[0] = LLVMConstReal(elem_type, 1.0); + else if(type.fixed) + elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0); + else if(!type.norm) + elems[0] = LLVMConstInt(elem_type, 1, 0); + else if(type.sign) + elems[0] = LLVMConstInt(elem_type, (1LL << (type.width - 1)) - 1, 0); + else { + /* special case' -- 1.0 for normalized types is more easily attained if + * we start with a vector consisting of all bits set */ + LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length); + LLVMValueRef vec = LLVMConstAllOnes(vec_type); + +#if 0 + if(type.sign) + /* TODO: Unfortunately this caused "Tried to create a shift operation + * on a non-integer type!" */ + vec = LLVMConstLShr(vec, lp_build_int_const_scalar(type, 1)); +#endif + + return vec; + } + + for(i = 1; i < type.length; ++i) + elems[i] = elems[0]; + + return LLVMConstVector(elems, type.length); +} + + +LLVMValueRef +lp_build_const_scalar(struct lp_type type, + double val) +{ + LLVMTypeRef elem_type = lp_build_elem_type(type); + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + + if(type.floating) { + elems[0] = LLVMConstReal(elem_type, val); + } + else { + double dscale = lp_const_scale(type); + + elems[0] = LLVMConstInt(elem_type, val*dscale + 0.5, 0); + } + + for(i = 1; i < type.length; ++i) + elems[i] = elems[0]; + + return LLVMConstVector(elems, type.length); +} + + +LLVMValueRef +lp_build_int_const_scalar(struct lp_type type, + long long val) +{ + LLVMTypeRef elem_type = lp_build_int_elem_type(type); + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + + for(i = 0; i < type.length; ++i) + elems[i] = LLVMConstInt(elem_type, val, type.sign ? 1 : 0); + + return LLVMConstVector(elems, type.length); +} + + +LLVMValueRef +lp_build_const_aos(struct lp_type type, + double r, double g, double b, double a, + const unsigned char *swizzle) +{ + const unsigned char default_swizzle[4] = {0, 1, 2, 3}; + LLVMTypeRef elem_type; + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(type.length % 4 == 0); + assert(type.length <= LP_MAX_VECTOR_LENGTH); + + elem_type = lp_build_elem_type(type); + + if(swizzle == NULL) + swizzle = default_swizzle; + + if(type.floating) { + elems[swizzle[0]] = LLVMConstReal(elem_type, r); + elems[swizzle[1]] = LLVMConstReal(elem_type, g); + elems[swizzle[2]] = LLVMConstReal(elem_type, b); + elems[swizzle[3]] = LLVMConstReal(elem_type, a); + } + else { + double dscale = lp_const_scale(type); + + elems[swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0); + elems[swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0); + elems[swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0); + elems[swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0); + } + + for(i = 4; i < type.length; ++i) + elems[i] = elems[i % 4]; + + return LLVMConstVector(elems, type.length); +} + + +LLVMValueRef +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]) +{ + LLVMTypeRef elem_type = LLVMIntType(type.width); + LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; + unsigned i, j; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + + for(j = 0; j < type.length; j += 4) + for(i = 0; i < 4; ++i) + masks[j + i] = LLVMConstInt(elem_type, cond[i] ? ~0 : 0, 0); + + return LLVMConstVector(masks, type.length); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h new file mode 100644 index 00000000000..cb8e1c7b006 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -0,0 +1,108 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for constant building. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_CONST_H +#define LP_BLD_CONST_H + + +#include + +#include + + +struct lp_type; + + +unsigned +lp_mantissa(struct lp_type type); + + +unsigned +lp_const_shift(struct lp_type type); + + +unsigned +lp_const_offset(struct lp_type type); + + +double +lp_const_scale(struct lp_type type); + +double +lp_const_min(struct lp_type type); + + +double +lp_const_max(struct lp_type type); + + +double +lp_const_eps(struct lp_type type); + + +LLVMValueRef +lp_build_undef(struct lp_type type); + + +LLVMValueRef +lp_build_zero(struct lp_type type); + + +LLVMValueRef +lp_build_one(struct lp_type type); + + +LLVMValueRef +lp_build_const_scalar(struct lp_type type, + double val); + + +LLVMValueRef +lp_build_int_const_scalar(struct lp_type type, + long long val); + + +LLVMValueRef +lp_build_const_aos(struct lp_type type, + double r, double g, double b, double a, + const unsigned char *swizzle); + + +LLVMValueRef +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]); + + +#endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c new file mode 100644 index 00000000000..f77cf787213 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -0,0 +1,469 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helper functions for type conversions. + * + * We want to use the fastest type for a given computation whenever feasible. + * The other side of this is that we need to be able convert between several + * types accurately and efficiently. + * + * Conversion between types of different bit width is quite complex since a + * + * To remember there are a few invariants in type conversions: + * + * - register width must remain constant: + * + * src_type.width * src_type.length == dst_type.width * dst_type.length + * + * - total number of elements must remain constant: + * + * src_type.length * num_srcs == dst_type.length * num_dsts + * + * It is not always possible to do the conversion both accurately and + * efficiently, usually due to lack of adequate machine instructions. In these + * cases it is important not to cut shortcuts here and sacrifice accuracy, as + * there this functions can be used anywhere. In the future we might have a + * precision parameter which can gauge the accuracy vs efficiency compromise, + * but for now if the data conversion between two stages happens to be the + * bottleneck, then most likely should just avoid converting at all and run + * both stages with the same type. + * + * Make sure to run lp_test_conv unit test after any change to this file. + * + * @author Jose Fonseca + */ + + +#include "util/u_debug.h" +#include "util/u_math.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_pack.h" +#include "lp_bld_conv.h" + + +/** + * Special case for converting clamped IEEE-754 floats to unsigned norms. + * + * The mathematical voodoo below may seem excessive but it is actually + * paramount we do it this way for several reasons. First, there is no single + * precision FP to unsigned integer conversion Intel SSE instruction. Second, + * secondly, even if there was, since the FP's mantissa takes only a fraction + * of register bits the typically scale and cast approach would require double + * precision for accurate results, and therefore half the throughput + * + * Although the result values can be scaled to an arbitrary bit width specified + * by dst_width, the actual result type will have the same width. + */ +LLVMValueRef +lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, + struct lp_type src_type, + unsigned dst_width, + LLVMValueRef src) +{ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); + LLVMValueRef res; + unsigned mantissa; + unsigned n; + unsigned long long ubound; + unsigned long long mask; + double scale; + double bias; + + assert(src_type.floating); + + mantissa = lp_mantissa(src_type); + + /* We cannot carry more bits than the mantissa */ + n = MIN2(mantissa, dst_width); + + /* This magic coefficients will make the desired result to appear in the + * lowest significant bits of the mantissa. + */ + ubound = ((unsigned long long)1 << n); + mask = ubound - 1; + scale = (double)mask/ubound; + bias = (double)((unsigned long long)1 << (mantissa - n)); + + res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); + res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); + res = LLVMBuildBitCast(builder, res, int_vec_type, ""); + + if(dst_width > n) { + int shift = dst_width - n; + res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); + + /* TODO: Fill in the empty lower bits for additional precision? */ + /* YES: this fixes progs/trivial/tri-z-eq.c. + * Otherwise vertex Z=1.0 values get converted to something like + * 0xfffffb00 and the test for equality with 0xffffffff fails. + */ +#if 0 + { + LLVMValueRef msb; + msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); + msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); + msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); + res = LLVMBuildOr(builder, res, msb, ""); + } +#elif 0 + while(shift > 0) { + res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); + shift -= n; + n *= 2; + } +#endif + } + else + res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); + + return res; +} + + +/** + * Inverse of lp_build_clamped_float_to_unsigned_norm above. + */ +LLVMValueRef +lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, + unsigned src_width, + struct lp_type dst_type, + LLVMValueRef src) +{ + LLVMTypeRef vec_type = lp_build_vec_type(dst_type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type); + LLVMValueRef bias_; + LLVMValueRef res; + unsigned mantissa; + unsigned n; + unsigned long long ubound; + unsigned long long mask; + double scale; + double bias; + + mantissa = lp_mantissa(dst_type); + + n = MIN2(mantissa, src_width); + + ubound = ((unsigned long long)1 << n); + mask = ubound - 1; + scale = (double)ubound/mask; + bias = (double)((unsigned long long)1 << (mantissa - n)); + + res = src; + + if(src_width > mantissa) { + int shift = src_width - mantissa; + res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), ""); + } + + bias_ = lp_build_const_scalar(dst_type, bias); + + res = LLVMBuildOr(builder, + res, + LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); + + res = LLVMBuildBitCast(builder, res, vec_type, ""); + + res = LLVMBuildSub(builder, res, bias_, ""); + res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), ""); + + return res; +} + + +/** + * Generic type conversion. + * + * TODO: Take a precision argument, or even better, add a new precision member + * to the lp_type union. + */ +void +lp_build_conv(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts) +{ + struct lp_type tmp_type; + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned num_tmps; + unsigned i; + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length * num_dsts); + + assert(src_type.length <= LP_MAX_VECTOR_LENGTH); + assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); + + tmp_type = src_type; + for(i = 0; i < num_srcs; ++i) + tmp[i] = src[i]; + num_tmps = num_srcs; + + /* + * Clamp if necessary + */ + + if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { + struct lp_build_context bld; + double src_min = lp_const_min(src_type); + double dst_min = lp_const_min(dst_type); + double src_max = lp_const_max(src_type); + double dst_max = lp_const_max(dst_type); + LLVMValueRef thres; + + lp_build_context_init(&bld, builder, tmp_type); + + if(src_min < dst_min) { + if(dst_min == 0.0) + thres = bld.zero; + else + thres = lp_build_const_scalar(src_type, dst_min); + for(i = 0; i < num_tmps; ++i) + tmp[i] = lp_build_max(&bld, tmp[i], thres); + } + + if(src_max > dst_max) { + if(dst_max == 1.0) + thres = bld.one; + else + thres = lp_build_const_scalar(src_type, dst_max); + for(i = 0; i < num_tmps; ++i) + tmp[i] = lp_build_min(&bld, tmp[i], thres); + } + } + + /* + * Scale to the narrowest range + */ + + if(dst_type.floating) { + /* Nothing to do */ + } + else if(tmp_type.floating) { + if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { + for(i = 0; i < num_tmps; ++i) { + tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder, + tmp_type, + dst_type.width, + tmp[i]); + } + tmp_type.floating = FALSE; + } + else { + double dst_scale = lp_const_scale(dst_type); + LLVMTypeRef tmp_vec_type; + + if (dst_scale != 1.0) { + LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale); + for(i = 0; i < num_tmps; ++i) + tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); + } + + /* Use an equally sized integer for intermediate computations */ + tmp_type.floating = FALSE; + tmp_vec_type = lp_build_vec_type(tmp_type); + for(i = 0; i < num_tmps; ++i) { +#if 0 + if(dst_type.sign) + tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); + else + tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); +#else + /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ + tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); +#endif + } + } + } + else { + unsigned src_shift = lp_const_shift(src_type); + unsigned dst_shift = lp_const_shift(dst_type); + + /* FIXME: compensate different offsets too */ + if(src_shift > dst_shift) { + LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift); + for(i = 0; i < num_tmps; ++i) + if(src_type.sign) + tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); + else + tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); + } + } + + /* + * Truncate or expand bit width + */ + + assert(!tmp_type.floating || tmp_type.width == dst_type.width); + + if(tmp_type.width > dst_type.width) { + assert(num_dsts == 1); + tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); + tmp_type.width = dst_type.width; + tmp_type.length = dst_type.length; + num_tmps = 1; + } + + if(tmp_type.width < dst_type.width) { + assert(num_tmps == 1); + lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); + tmp_type.width = dst_type.width; + tmp_type.length = dst_type.length; + num_tmps = num_dsts; + } + + assert(tmp_type.width == dst_type.width); + assert(tmp_type.length == dst_type.length); + assert(num_tmps == num_dsts); + + /* + * Scale to the widest range + */ + + if(src_type.floating) { + /* Nothing to do */ + } + else if(!src_type.floating && dst_type.floating) { + if(!src_type.fixed && !src_type.sign && src_type.norm) { + for(i = 0; i < num_tmps; ++i) { + tmp[i] = lp_build_unsigned_norm_to_float(builder, + src_type.width, + dst_type, + tmp[i]); + } + tmp_type.floating = TRUE; + } + else { + double src_scale = lp_const_scale(src_type); + LLVMTypeRef tmp_vec_type; + + /* Use an equally sized integer for intermediate computations */ + tmp_type.floating = TRUE; + tmp_type.sign = TRUE; + tmp_vec_type = lp_build_vec_type(tmp_type); + for(i = 0; i < num_tmps; ++i) { +#if 0 + if(dst_type.sign) + tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); + else + tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, ""); +#else + /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */ + tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); +#endif + } + + if (src_scale != 1.0) { + LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale); + for(i = 0; i < num_tmps; ++i) + tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); + } + } + } + else { + unsigned src_shift = lp_const_shift(src_type); + unsigned dst_shift = lp_const_shift(dst_type); + + /* FIXME: compensate different offsets too */ + if(src_shift < dst_shift) { + LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift); + for(i = 0; i < num_tmps; ++i) + tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); + } + } + + for(i = 0; i < num_dsts; ++i) + dst[i] = tmp[i]; +} + + +/** + * Bit mask conversion. + * + * This will convert the integer masks that match the given types. + * + * The mask values should 0 or -1, i.e., all bits either set to zero or one. + * Any other value will likely cause in unpredictable results. + * + * This is basically a very trimmed down version of lp_build_conv. + */ +void +lp_build_conv_mask(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts) +{ + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length * num_dsts); + + /* + * Drop + * + * We assume all values are 0 or -1 + */ + + src_type.floating = FALSE; + src_type.fixed = FALSE; + src_type.sign = TRUE; + src_type.norm = FALSE; + + dst_type.floating = FALSE; + dst_type.fixed = FALSE; + dst_type.sign = TRUE; + dst_type.norm = FALSE; + + /* + * Truncate or expand bit width + */ + + if(src_type.width > dst_type.width) { + assert(num_dsts == 1); + dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); + } + else if(src_type.width < dst_type.width) { + assert(num_srcs == 1); + lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts); + } + else { + assert(num_srcs == num_dsts); + memcpy(dst, src, num_dsts * sizeof *dst); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h new file mode 100644 index 00000000000..948e68fae4f --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for type conversions. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_CONV_H +#define LP_BLD_CONV_H + + +#include + + +struct lp_type; + + +LLVMValueRef +lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, + struct lp_type src_type, + unsigned dst_width, + LLVMValueRef src); + +LLVMValueRef +lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, + unsigned src_width, + struct lp_type dst_type, + LLVMValueRef src); + + +void +lp_build_conv(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *srcs, unsigned num_srcs, + LLVMValueRef *dsts, unsigned num_dsts); + +void +lp_build_conv_mask(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts); + +#endif /* !LP_BLD_CONV_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c new file mode 100644 index 00000000000..39dfc51e503 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c @@ -0,0 +1,132 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifdef HAVE_UDIS86 +#include +#endif + +#include "util/u_math.h" +#include "util/u_debug.h" +#include "lp_bld_debug.h" + + +/** + * Check alignment. + * + * It is important that this check is not implemented as a macro or inlined + * function, as the compiler assumptions in respect to alignment of global + * and stack variables would often make the check a no op, defeating the + * whole purpose of the exercise. + */ +boolean +lp_check_alignment(const void *ptr, unsigned alignment) +{ + assert(util_is_pot(alignment)); + return ((uintptr_t)ptr & (alignment - 1)) == 0; +} + + +void +lp_disassemble(const void* func) +{ +#ifdef HAVE_UDIS86 + ud_t ud_obj; + uint64_t max_jmp_pc; + + ud_init(&ud_obj); + + ud_set_input_buffer(&ud_obj, (void*)func, 0xffff); + + max_jmp_pc = (uint64_t) (uintptr_t) func; + ud_set_pc(&ud_obj, max_jmp_pc); + +#ifdef PIPE_ARCH_X86 + ud_set_mode(&ud_obj, 32); +#endif +#ifdef PIPE_ARCH_X86_64 + ud_set_mode(&ud_obj, 64); +#endif + + ud_set_syntax(&ud_obj, UD_SYN_ATT); + + while (ud_disassemble(&ud_obj)) { + +#ifdef PIPE_ARCH_X86 + debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); +#endif +#ifdef PIPE_ARCH_X86_64 + debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); +#endif + +#if 0 + debug_printf("%-16s ", ud_insn_hex(&ud_obj)); +#endif + + debug_printf("%s\n", ud_insn_asm(&ud_obj)); + + if(ud_obj.mnemonic != UD_Icall) { + unsigned i; + for(i = 0; i < 3; ++i) { + const struct ud_operand *op = &ud_obj.operand[i]; + if (op->type == UD_OP_JIMM){ + uint64_t pc = ud_obj.pc; + + switch (op->size) { + case 8: + pc += op->lval.sbyte; + break; + case 16: + pc += op->lval.sword; + break; + case 32: + pc += op->lval.sdword; + break; + default: + break; + } + if(pc > max_jmp_pc) + max_jmp_pc = pc; + } + } + } + + if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) || + ud_obj.mnemonic == UD_Iinvalid) + break; + } + +#if 0 + /* Print GDB command, useful to verify udis86 output */ + debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc); +#endif + + debug_printf("\n"); +#else + (void)func; +#endif +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h new file mode 100644 index 00000000000..583e6132b4b --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -0,0 +1,64 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_BLD_DEBUG_H +#define LP_BLD_DEBUG_H + + +#include + +#include "pipe/p_compiler.h" +#include "util/u_string.h" + + +static INLINE void +lp_build_name(LLVMValueRef val, const char *format, ...) +{ +#ifdef DEBUG + char name[32]; + va_list ap; + va_start(ap, format); + util_vsnprintf(name, sizeof name, format, ap); + va_end(ap); + LLVMSetValueName(val, name); +#else + (void)val; + (void)format; +#endif +} + + +boolean +lp_check_alignment(const void *ptr, unsigned alignment); + + +void +lp_disassemble(const void* func); + + +#endif /* !LP_BLD_DEBUG_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c new file mode 100644 index 00000000000..d438c0e63d7 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.c @@ -0,0 +1,213 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Depth/stencil testing to LLVM IR translation. + * + * To be done accurately/efficiently the depth/stencil test must be done with + * the same type/format of the depth/stencil buffer, which implies massaging + * the incoming depths to fit into place. Using a more straightforward + * type/format for depth/stencil values internally and only convert when + * flushing would avoid this, but it would most likely result in depth fighting + * artifacts. + * + * We are free to use a different pixel layout though. Since our basic + * processing unit is a quad (2x2 pixel block) we store the depth/stencil + * values tiled, a quad at time. That is, a depth buffer containing + * + * Z11 Z12 Z13 Z14 ... + * Z21 Z22 Z23 Z24 ... + * Z31 Z32 Z33 Z34 ... + * Z41 Z42 Z43 Z44 ... + * ... ... ... ... ... + * + * will actually be stored in memory as + * + * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ... + * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... + * ... ... ... ... ... ... ... ... ... + * + * FIXME: Code generate stencil test + * + * @author Jose Fonseca + */ + +#include "pipe/p_state.h" +#include "util/u_format.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_logic.h" +#include "lp_bld_flow.h" +#include "lp_bld_debug.h" +#include "lp_bld_depth.h" + + +/** + * Return a type appropriate for depth/stencil testing. + */ +struct lp_type +lp_depth_type(const struct util_format_description *format_desc, + unsigned length) +{ + struct lp_type type; + unsigned swizzle; + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + swizzle = format_desc->swizzle[0]; + assert(swizzle < 4); + + memset(&type, 0, sizeof type); + type.width = format_desc->block.bits; + + if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { + type.floating = TRUE; + assert(swizzle == 0); + assert(format_desc->channel[swizzle].size == format_desc->block.bits); + } + else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { + assert(format_desc->block.bits <= 32); + if(format_desc->channel[swizzle].normalized) + type.norm = TRUE; + } + else + assert(0); + + assert(type.width <= length); + type.length = length / type.width; + + return type; +} + + +/** + * Depth test. + */ +void +lp_build_depth_test(LLVMBuilderRef builder, + const struct pipe_depth_state *state, + struct lp_type type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef src, + LLVMValueRef dst_ptr) +{ + struct lp_build_context bld; + unsigned z_swizzle; + LLVMValueRef dst; + LLVMValueRef z_bitmask = NULL; + LLVMValueRef test; + + if(!state->enabled) + return; + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + z_swizzle = format_desc->swizzle[0]; + if(z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return; + + /* Sanity checking */ + assert(z_swizzle < 4); + assert(format_desc->block.bits == type.width); + if(type.floating) { + assert(z_swizzle == 0); + assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); + assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); + } + else { + assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); + assert(format_desc->channel[z_swizzle].normalized); + assert(!type.fixed); + assert(!type.sign); + assert(type.norm); + } + + /* Setup build context */ + lp_build_context_init(&bld, builder, type); + + dst = LLVMBuildLoad(builder, dst_ptr, ""); + + lp_build_name(dst, "zsbuf"); + + /* Align the source depth bits with the destination's, and mask out any + * stencil or padding bits from both */ + if(format_desc->channel[z_swizzle].size == format_desc->block.bits) { + assert(z_swizzle == 0); + /* nothing to do */ + } + else { + unsigned padding_left; + unsigned padding_right; + unsigned chan; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); + assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits); + assert(format_desc->channel[z_swizzle].normalized); + + padding_right = 0; + for(chan = 0; chan < z_swizzle; ++chan) + padding_right += format_desc->channel[chan].size; + padding_left = format_desc->block.bits - + (padding_right + format_desc->channel[z_swizzle].size); + + if(padding_left || padding_right) { + const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; + const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; + z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); + } + + if(padding_left) + src = LLVMBuildLShr(builder, src, lp_build_int_const_scalar(type, padding_left), ""); + if(padding_right) + src = LLVMBuildAnd(builder, src, z_bitmask, ""); + if(padding_left || padding_right) + dst = LLVMBuildAnd(builder, dst, z_bitmask, ""); + } + + lp_build_name(dst, "zsbuf.z"); + + test = lp_build_cmp(&bld, state->func, src, dst); + lp_build_mask_update(mask, test); + + if(state->writemask) { + if(z_bitmask) + z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, ""); + else + z_bitmask = mask->value; + + dst = lp_build_select(&bld, z_bitmask, src, dst); + LLVMBuildStore(builder, dst, dst_ptr); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h new file mode 100644 index 00000000000..79d6981bb51 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Depth/stencil testing to LLVM IR translation. + * + * @author Jose Fonseca + */ + +#ifndef LP_BLD_DEPTH_H +#define LP_BLD_DEPTH_H + + +#include + + +struct pipe_depth_state; +struct util_format_description; +struct lp_type; +struct lp_build_mask_context; + + +struct lp_type +lp_depth_type(const struct util_format_description *format_desc, + unsigned length); + + +void +lp_build_depth_test(LLVMBuilderRef builder, + const struct pipe_depth_state *state, + struct lp_type type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef src, + LLVMValueRef dst_ptr); + + +#endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c new file mode 100644 index 00000000000..bc831389085 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -0,0 +1,757 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * LLVM control flow build helpers. + * + * @author Jose Fonseca + */ + +#include "util/u_debug.h" +#include "util/u_memory.h" + +#include "lp_bld_type.h" +#include "lp_bld_flow.h" + + +#define LP_BUILD_FLOW_MAX_VARIABLES 32 +#define LP_BUILD_FLOW_MAX_DEPTH 32 + +/** + * Enumeration of all possible flow constructs. + */ +enum lp_build_flow_construct_kind { + LP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SKIP, + LP_BUILD_FLOW_IF +}; + + +/** + * Variable declaration scope. + */ +struct lp_build_flow_scope +{ + /** Number of variables declared in this scope */ + unsigned num_variables; +}; + + +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_flow_skip +{ + /** Block to skip to */ + LLVMBasicBlockRef block; + + /** Number of variables declared at the beginning */ + unsigned num_variables; + + LLVMValueRef *phi; /**< array [num_variables] */ +}; + + +/** + * if/else/endif. + */ +struct lp_build_flow_if +{ + unsigned num_variables; + + LLVMValueRef *phi; /**< array [num_variables] */ + + LLVMValueRef condition; + LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; +}; + + +/** + * Union of all possible flow constructs' data + */ +union lp_build_flow_construct_data +{ + struct lp_build_flow_scope scope; + struct lp_build_flow_skip skip; + struct lp_build_flow_if ifthen; +}; + + +/** + * Element of the flow construct stack. + */ +struct lp_build_flow_construct +{ + enum lp_build_flow_construct_kind kind; + union lp_build_flow_construct_data data; +}; + + +/** + * All necessary data to generate LLVM control flow constructs. + * + * Besides keeping track of the control flow construct themselves we also + * need to keep track of variables in order to generate SSA Phi values. + */ +struct lp_build_flow_context +{ + LLVMBuilderRef builder; + + /** + * Control flow stack. + */ + struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; + unsigned num_constructs; + + /** + * Variable stack + */ + LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; + unsigned num_variables; +}; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder) +{ + struct lp_build_flow_context *flow; + + flow = CALLOC_STRUCT(lp_build_flow_context); + if(!flow) + return NULL; + + flow->builder = builder; + + return flow; +} + + +void +lp_build_flow_destroy(struct lp_build_flow_context *flow) +{ + assert(flow->num_constructs == 0); + assert(flow->num_variables == 0); + FREE(flow); +} + + +/** + * Begin/push a new flow control construct, such as a loop, skip block + * or variable scope. + */ +static union lp_build_flow_construct_data * +lp_build_flow_push(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); + if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) + return NULL; + + flow->constructs[flow->num_constructs].kind = kind; + return &flow->constructs[flow->num_constructs++].data; +} + + +/** + * Return the current/top flow control construct on the stack. + * \param kind the expected type of the top-most construct + */ +static union lp_build_flow_construct_data * +lp_build_flow_peek(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[flow->num_constructs - 1].data; +} + + +/** + * End/pop the current/top flow control construct on the stack. + * \param kind the expected type of the top-most construct + */ +static union lp_build_flow_construct_data * +lp_build_flow_pop(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[--flow->num_constructs].data; +} + + +/** + * Begin a variable scope. + * + * + */ +void +lp_build_flow_scope_begin(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; + + scope->num_variables = 0; +} + + +/** + * Declare a variable. + * + * A variable is a named entity which can have different LLVMValueRef's at + * different points of the program. This is relevant for control flow because + * when there are multiple branches to a same location we need to replace + * the variable's value with a Phi function as explained in + * http://en.wikipedia.org/wiki/Static_single_assignment_form . + * + * We keep track of variables by keeping around a pointer to where they're + * current. + * + * There are a few cautions to observe: + * + * - Variable's value must not be NULL. If there is no initial value then + * LLVMGetUndef() should be used. + * + * - Variable's value must be kept up-to-date. If the variable is going to be + * modified by a function then a pointer should be passed so that its value + * is accurate. Failure to do this will cause some of the variables' + * transient values to be lost, leading to wrong results. + * + * - A program should be written from top to bottom, by always appending + * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or + * modifying existing statements will most likely lead to wrong results. + * + */ +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; + + assert(*variable); + if(!*variable) + return; + + assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); + if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) + return; + + flow->variables[flow->num_variables++] = variable; + ++scope->num_variables; +} + + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; + + assert(flow->num_variables >= scope->num_variables); + if(flow->num_variables < scope->num_variables) { + flow->num_variables = 0; + return; + } + + flow->num_variables -= scope->num_variables; +} + + +/** + * Note: this function has no dependencies on the flow code and could + * be used elsewhere. + */ +static LLVMBasicBlockRef +lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + /* get current basic block */ + current_block = LLVMGetInsertBlock(builder); + + /* check if there's another block after this one */ + next_block = LLVMGetNextBasicBlock(current_block); + if (next_block) { + /* insert the new block before the next block */ + new_block = LLVMInsertBasicBlock(next_block, name); + } + else { + /* append new block after current block */ + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + new_block = LLVMAppendBasicBlock(function, name); + } + + return new_block; +} + + +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + return lp_build_insert_new_block(flow->builder, ""); +} + + +/** + * Begin a "skip" block. Inside this block we can test a condition and + * skip to the end of the block if the condition is false. + */ +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBuilderRef builder; + unsigned i; + + skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + /* create new basic block */ + skip->block = lp_build_flow_insert_block(flow); + + skip->num_variables = flow->num_variables; + if(!skip->num_variables) { + skip->phi = NULL; + return; + } + + /* Allocate a Phi node for each variable in this skip scope */ + skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); + if(!skip->phi) { + skip->num_variables = 0; + return; + } + + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, skip->block); + + /* create a Phi node for each variable */ + for(i = 0; i < skip->num_variables; ++i) + skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + LLVMDisposeBuilder(builder); +} + + +/** + * Insert code to test a condition and branch to the end of the current + * skip block if the condition is true. + */ +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond) +{ + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef new_block; + unsigned i; + + skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + current_block = LLVMGetInsertBlock(flow->builder); + + new_block = lp_build_flow_insert_block(flow); + + /* for each variable, update the Phi node with a (variable, block) pair */ + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + } + + /* if cond is true, goto skip->block, else goto new_block */ + LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + + LLVMPositionBuilderAtEnd(flow->builder, new_block); +} + + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + unsigned i; + + skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + current_block = LLVMGetInsertBlock(flow->builder); + + /* add (variable, block) tuples to the phi nodes */ + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + *flow->variables[i] = skip->phi[i]; + } + + /* goto block */ + LLVMBuildBr(flow->builder, skip->block); + LLVMPositionBuilderAtEnd(flow->builder, skip->block); + + FREE(skip->phi); +} + + +/** + * Check if the mask predicate is zero. If so, jump to the end of the block. + */ +static void +lp_build_mask_check(struct lp_build_mask_context *mask) +{ + LLVMBuilderRef builder = mask->flow->builder; + LLVMValueRef cond; + + /* cond = (mask == 0) */ + cond = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMConstNull(mask->reg_type), + ""); + + /* if cond, goto end of block */ + lp_build_flow_skip_cond_break(mask->flow, cond); +} + + +/** + * Begin a section of code which is predicated on a mask. + * \param mask the mask context, initialized here + * \param flow the flow context + * \param type the type of the mask + * \param value storage for the mask + */ +void +lp_build_mask_begin(struct lp_build_mask_context *mask, + struct lp_build_flow_context *flow, + struct lp_type type, + LLVMValueRef value) +{ + memset(mask, 0, sizeof *mask); + + mask->flow = flow; + mask->reg_type = LLVMIntType(type.width * type.length); + mask->value = value; + + lp_build_flow_scope_begin(flow); + lp_build_flow_scope_declare(flow, &mask->value); + lp_build_flow_skip_begin(flow); + + lp_build_mask_check(mask); +} + + +/** + * Update boolean mask with given value (bitwise AND). + * Typically used to update the quad's pixel alive/killed mask + * after depth testing, alpha testing, TGSI_OPCODE_KIL, etc. + */ +void +lp_build_mask_update(struct lp_build_mask_context *mask, + LLVMValueRef value) +{ + mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); + + lp_build_mask_check(mask); +} + + +/** + * End section of code which is predicated on a mask. + */ +LLVMValueRef +lp_build_mask_end(struct lp_build_mask_context *mask) +{ + lp_build_flow_skip_end(mask->flow); + lp_build_flow_scope_end(mask->flow); + return mask->value; +} + + + +void +lp_build_loop_begin(LLVMBuilderRef builder, + LLVMValueRef start, + struct lp_build_loop_state *state) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + + state->block = LLVMAppendBasicBlock(function, "loop"); + + LLVMBuildBr(builder, state->block); + + LLVMPositionBuilderAtEnd(builder, state->block); + + state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), ""); + + LLVMAddIncoming(state->counter, &start, &block, 1); + +} + + +void +lp_build_loop_end(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + struct lp_build_loop_state *state) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + LLVMValueRef next; + LLVMValueRef cond; + LLVMBasicBlockRef after_block; + + if (!step) + step = LLVMConstInt(LLVMTypeOf(end), 1, 0); + + next = LLVMBuildAdd(builder, state->counter, step, ""); + + cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, ""); + + after_block = LLVMAppendBasicBlock(function, ""); + + LLVMBuildCondBr(builder, cond, after_block, state->block); + + LLVMAddIncoming(state->counter, &next, &block, 1); + + LLVMPositionBuilderAtEnd(builder, after_block); +} + + + +/* + Example of if/then/else building: + + int x; + if (cond) { + x = 1 + 2; + } + else { + x = 2 + 3; + } + + Is built with: + + LLVMValueRef x = LLVMGetUndef(); // or something else + + flow = lp_build_flow_create(builder); + + lp_build_flow_scope_begin(flow); + + // x needs a phi node + lp_build_flow_scope_declare(flow, &x); + + lp_build_if(ctx, flow, builder, cond); + x = LLVMAdd(1, 2); + lp_build_else(ctx); + x = LLVMAdd(2, 3); + lp_build_endif(ctx); + + lp_build_flow_scope_end(flow); + + lp_build_flow_destroy(flow); + */ + + + +/** + * Begin an if/else/endif construct. + */ +void +lp_build_if(struct lp_build_if_state *ctx, + struct lp_build_flow_context *flow, + LLVMBuilderRef builder, + LLVMValueRef condition) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + struct lp_build_flow_if *ifthen; + unsigned i; + + memset(ctx, 0, sizeof(*ctx)); + ctx->builder = builder; + ctx->flow = flow; + + /* push/create new scope */ + ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + ifthen->num_variables = flow->num_variables; + ifthen->condition = condition; + ifthen->entry_block = block; + + /* create a Phi node for each variable in this flow scope */ + ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); + if (!ifthen->phi) { + ifthen->num_variables = 0; + return; + } + + /* create endif/merge basic block for the phi functions */ + ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); + LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); + + /* create a phi node for each variable */ + for (i = 0; i < flow->num_variables; i++) { + ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + /* add add the initial value of the var from the entry block */ + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1); + } + + /* create/insert true_block before merge_block */ + ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); + + /* successive code goes into the true block */ + LLVMPositionBuilderAtEnd(builder, ifthen->true_block); +} + + +/** + * Begin else-part of a conditional + */ +void +lp_build_else(struct lp_build_if_state *ctx) +{ + struct lp_build_flow_context *flow = ctx->flow; + struct lp_build_flow_if *ifthen; + unsigned i; + + ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + /* for each variable, update the Phi node with a (variable, block) pair */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); + } + + /* create/insert false_block before the merge block */ + ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); + + /* successive code goes into the else block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); +} + + +/** + * End a conditional. + */ +void +lp_build_endif(struct lp_build_if_state *ctx) +{ + struct lp_build_flow_context *flow = ctx->flow; + struct lp_build_flow_if *ifthen; + unsigned i; + + ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + if (ifthen->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); + /* for each variable, update the Phi node with a (variable, block) pair */ + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); + + /* replace the variable ref with the phi function */ + *flow->variables[i] = ifthen->phi[i]; + } + } + else { + /* no else clause */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); + + /* replace the variable ref with the phi function */ + *flow->variables[i] = ifthen->phi[i]; + } + } + + FREE(ifthen->phi); + + /*** + *** Now patch in the various branch instructions. + ***/ + + /* Insert the conditional branch instruction at the end of entry_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block); + if (ifthen->false_block) { + /* we have an else clause */ + LLVMBuildCondBr(ctx->builder, ifthen->condition, + ifthen->true_block, ifthen->false_block); + } + else { + /* no else clause */ + LLVMBuildCondBr(ctx->builder, ifthen->condition, + ifthen->true_block, ifthen->merge_block); + } + + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); + LLVMBuildBr(ctx->builder, ifthen->merge_block); + if (ifthen->false_block) { + /* Append an unconditional Br(anch) instruction on the false_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + LLVMBuildBr(ctx->builder, ifthen->merge_block); + } + + + /* Resume building code at end of the ifthen->merge_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h new file mode 100644 index 00000000000..4c225a0d4f9 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -0,0 +1,151 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * LLVM control flow build helpers. + * + * @author Jose Fonseca + */ + +#ifndef LP_BLD_FLOW_H +#define LP_BLD_FLOW_H + + +#include + + +struct lp_type; + + +struct lp_build_flow_context; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder); + +void +lp_build_flow_destroy(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable); + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond); + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow); + + +struct lp_build_mask_context +{ + struct lp_build_flow_context *flow; + + LLVMTypeRef reg_type; + + LLVMValueRef value; +}; + + +void +lp_build_mask_begin(struct lp_build_mask_context *mask, + struct lp_build_flow_context *flow, + struct lp_type type, + LLVMValueRef value); + +/** + * Bitwise AND the mask with the given value, if a previous mask was set. + */ +void +lp_build_mask_update(struct lp_build_mask_context *mask, + LLVMValueRef value); + +LLVMValueRef +lp_build_mask_end(struct lp_build_mask_context *mask); + + +/** + * LLVM's IR doesn't represent for-loops directly. Furthermore it + * it requires creating code blocks, branches, phi variables, so it + * requires a fair amount of code. + * + * @sa http://www.llvm.org/docs/tutorial/LangImpl5.html#for + */ +struct lp_build_loop_state +{ + LLVMBasicBlockRef block; + LLVMValueRef counter; +}; + + +void +lp_build_loop_begin(LLVMBuilderRef builder, + LLVMValueRef start, + struct lp_build_loop_state *state); + + +void +lp_build_loop_end(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + struct lp_build_loop_state *state); + + + + +struct lp_build_if_state +{ + LLVMBuilderRef builder; + struct lp_build_flow_context *flow; +}; + + +void +lp_build_if(struct lp_build_if_state *ctx, + struct lp_build_flow_context *flow, + LLVMBuilderRef builder, + LLVMValueRef condition); + +void +lp_build_else(struct lp_build_if_state *ctx); + +void +lp_build_endif(struct lp_build_if_state *ctx); + + + +#endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h new file mode 100644 index 00000000000..970bee379f5 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_FORMAT_H +#define LP_BLD_FORMAT_H + + +/** + * @file + * Pixel format helpers. + */ + +#include + +#include "pipe/p_format.h" + +struct util_format_description; +struct lp_type; + + +boolean +lp_format_is_rgba8(const struct util_format_description *desc); + + +void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled); + + +LLVMValueRef +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef packed); + + +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed); + + +LLVMValueRef +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef rgba); + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba); + + +#endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c new file mode 100644 index 00000000000..dfa080b8533 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -0,0 +1,383 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * AoS pixel format manipulation. + * + * @author Jose Fonseca + */ + + +#include "util/u_cpu_detect.h" +#include "util/u_format.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_format.h" + + +/** + * Unpack a single pixel into its RGBA components. + * + * @param packed integer. + * + * @return RGBA in a 4 floats vector. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ +LLVMValueRef +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef packed) +{ + LLVMTypeRef type; + LLVMValueRef shifted, casted, scaled, masked; + LLVMValueRef shifts[4]; + LLVMValueRef masks[4]; + LLVMValueRef scales[4]; + LLVMValueRef swizzles[4]; + LLVMValueRef aux[4]; + bool normalized; + int empty_channel; + unsigned shift; + unsigned i; + + /* FIXME: Support more formats */ + assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->block.width == 1); + assert(desc->block.height == 1); + assert(desc->block.bits <= 32); + + type = LLVMIntType(desc->block.bits); + + /* Do the intermediate integer computations with 32bit integers since it + * matches floating point size */ + if (desc->block.bits < 32) + packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); + + /* Broadcast the packed value to all four channels */ + packed = LLVMBuildInsertElement(builder, + LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), + packed, + LLVMConstNull(LLVMInt32Type()), + ""); + packed = LLVMBuildShuffleVector(builder, + packed, + LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), + LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)), + ""); + + /* Initialize vector constants */ + normalized = FALSE; + empty_channel = -1; + shift = 0; + for (i = 0; i < 4; ++i) { + unsigned bits = desc->channel[i].size; + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { + shifts[i] = LLVMGetUndef(LLVMInt32Type()); + masks[i] = LLVMConstNull(LLVMInt32Type()); + scales[i] = LLVMConstNull(LLVMFloatType()); + empty_channel = i; + } + else { + unsigned mask = (1 << bits) - 1; + + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); + assert(bits < 32); + + shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); + masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0); + + if (desc->channel[i].normalized) { + scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask); + normalized = TRUE; + } + else + scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); + } + + shift += bits; + } + + shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); + masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); + /* UIToFP can't be expressed in SSE2 */ + casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); + + if (normalized) + scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), ""); + else + scaled = casted; + + for (i = 0; i < 4; ++i) + aux[i] = LLVMGetUndef(LLVMFloatType()); + + for (i = 0; i < 4; ++i) { + enum util_format_swizzle swizzle = desc->swizzle[i]; + + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0); + break; + case UTIL_FORMAT_SWIZZLE_0: + assert(empty_channel >= 0); + swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0); + break; + case UTIL_FORMAT_SWIZZLE_1: + swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0); + aux[0] = LLVMConstReal(LLVMFloatType(), 1.0); + break; + case UTIL_FORMAT_SWIZZLE_NONE: + swizzles[i] = LLVMGetUndef(LLVMFloatType()); + assert(0); + break; + } + } + + return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), ""); +} + + +/** + * Take a vector with packed pixels and unpack into a rgba8 vector. + * + * Formats with bit depth smaller than 32bits are accepted, but they must be + * padded to 32bits. + */ +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed) +{ + struct lp_build_context bld; + bool rgba8; + LLVMValueRef res; + unsigned i; + + lp_build_context_init(&bld, builder, type); + + /* FIXME: Support more formats */ + assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->block.width == 1); + assert(desc->block.height == 1); + assert(desc->block.bits <= 32); + + assert(!type.floating); + assert(!type.fixed); + assert(type.norm); + assert(type.width == 8); + assert(type.length % 4 == 0); + + rgba8 = TRUE; + for(i = 0; i < 4; ++i) { + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_VOID); + if(desc->channel[0].size != 8) + rgba8 = FALSE; + } + + if(rgba8) { + /* + * The pixel is already in a rgba8 format variant. All it is necessary + * is to swizzle the channels. + */ + + unsigned char swizzles[4]; + boolean zeros[4]; /* bitwise AND mask */ + boolean ones[4]; /* bitwise OR mask */ + boolean swizzles_needed = FALSE; + boolean zeros_needed = FALSE; + boolean ones_needed = FALSE; + + for(i = 0; i < 4; ++i) { + enum util_format_swizzle swizzle = desc->swizzle[i]; + + /* Initialize with the no-op case */ + swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i; + zeros[i] = TRUE; + ones[i] = FALSE; + + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + if(swizzle != swizzles[i]) { + swizzles[i] = swizzle; + swizzles_needed = TRUE; + } + break; + case UTIL_FORMAT_SWIZZLE_0: + zeros[i] = FALSE; + zeros_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_1: + ones[i] = TRUE; + ones_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_NONE: + assert(0); + break; + } + } + + res = packed; + + if(swizzles_needed) + res = lp_build_swizzle1_aos(&bld, res, swizzles); + + if(zeros_needed) { + /* Mask out zero channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, zeros); + res = LLVMBuildAnd(builder, res, mask, ""); + } + + if(ones_needed) { + /* Or one channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, ones); + res = LLVMBuildOr(builder, res, mask, ""); + } + } + else { + /* FIXME */ + assert(0); + res = lp_build_undef(type); + } + + return res; +} + + +/** + * Pack a single pixel. + * + * @param rgba 4 float vector with the unpacked components. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ +LLVMValueRef +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef rgba) +{ + LLVMTypeRef type; + LLVMValueRef packed = NULL; + LLVMValueRef swizzles[4]; + LLVMValueRef shifted, casted, scaled, unswizzled; + LLVMValueRef shifts[4]; + LLVMValueRef scales[4]; + bool normalized; + unsigned shift; + unsigned i, j; + + assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->block.width == 1); + assert(desc->block.height == 1); + + type = LLVMIntType(desc->block.bits); + + /* Unswizzle the color components into the source vector. */ + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + if (desc->swizzle[j] == i) + break; + } + if (j < 4) + swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0); + else + swizzles[i] = LLVMGetUndef(LLVMInt32Type()); + } + + unswizzled = LLVMBuildShuffleVector(builder, rgba, + LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)), + LLVMConstVector(swizzles, 4), ""); + + normalized = FALSE; + shift = 0; + for (i = 0; i < 4; ++i) { + unsigned bits = desc->channel[i].size; + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { + shifts[i] = LLVMGetUndef(LLVMInt32Type()); + scales[i] = LLVMGetUndef(LLVMFloatType()); + } + else { + unsigned mask = (1 << bits) - 1; + + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); + assert(bits < 32); + + shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); + + if (desc->channel[i].normalized) { + scales[i] = LLVMConstReal(LLVMFloatType(), mask); + normalized = TRUE; + } + else + scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); + } + + shift += bits; + } + + if (normalized) + scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); + else + scaled = unswizzled; + + casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), ""); + + shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); + + /* Bitwise or all components */ + for (i = 0; i < 4; ++i) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { + LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), ""); + if (packed) + packed = LLVMBuildOr(builder, packed, component, ""); + else + packed = component; + } + } + + if (!packed) + packed = LLVMGetUndef(LLVMInt32Type()); + + if (desc->block.bits < 32) + packed = LLVMBuildTrunc(builder, packed, type, ""); + + return packed; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_query.c b/src/gallium/auxiliary/gallivm/lp_bld_format_query.c new file mode 100644 index 00000000000..f3832d07ff9 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_query.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Utility functions to make assertions about formats. + * + * This module centralizes most of logic used when determining what algorithm + * is most suitable (i.e., most efficient yet correct) for a given format. + * + * It might be possible to move some of these functions to u_format module, + * but since tiny differences in the format my render it more/less + * appropriate to a given algorithm it is impossible to make any long term + * guarantee about the semantics of these functions. + * + * @author Jose Fonseca + */ + + +#include "util/u_format.h" + +#include "lp_bld_format.h" + + +/** + * Whether this format is a 4 rgba8 variant + */ +boolean +lp_format_is_rgba8(const struct util_format_description *desc) +{ + unsigned chan; + + if(desc->block.width != 1 || + desc->block.height != 1 || + desc->block.bits != 32) + return FALSE; + + for(chan = 0; chan < 4; ++chan) { + if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) + return FALSE; + if(desc->channel[chan].size != 8) + return FALSE; + } + + return TRUE; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c new file mode 100644 index 00000000000..64151d169da --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -0,0 +1,149 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_format.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_format.h" + + +static LLVMValueRef +lp_build_format_swizzle_chan_soa(struct lp_type type, + const LLVMValueRef *unswizzled, + enum util_format_swizzle swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + return unswizzled[swizzle]; + case UTIL_FORMAT_SWIZZLE_0: + return lp_build_zero(type); + case UTIL_FORMAT_SWIZZLE_1: + return lp_build_one(type); + case UTIL_FORMAT_SWIZZLE_NONE: + return lp_build_undef(type); + default: + assert(0); + return lp_build_undef(type); + } +} + + +void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled) +{ + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + swizzled[2] = swizzled[1] = swizzled[0] = depth; + swizzled[3] = lp_build_one(type); + } + else { + unsigned chan; + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + } + } +} + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef inputs[4]; + unsigned start; + unsigned chan; + + /* FIXME: Support more formats */ + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH || + (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY && + format_desc->block.bits == format_desc->channel[0].size)); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + /* Decode the input vector components */ + start = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned width = format_desc->channel[chan].size; + unsigned stop = start + width; + LLVMValueRef input; + + input = packed; + + switch(format_desc->channel[chan].type) { + case UTIL_FORMAT_TYPE_VOID: + input = NULL; + break; + + case UTIL_FORMAT_TYPE_UNSIGNED: + if(type.floating) { + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); + if(stop < format_desc->block.bits) { + unsigned mask = ((unsigned long long)1 << width) - 1; + input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); + } + + if(format_desc->channel[chan].normalized) + input = lp_build_unsigned_norm_to_float(builder, width, type, input); + else + input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), ""); + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + break; + + default: + /* fall through */ + input = lp_build_undef(type); + break; + } + + inputs[chan] = input; + + start = stop; + } + + lp_build_format_swizzle_soa(format_desc, type, inputs, rgba); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.c b/src/gallium/auxiliary/gallivm/lp_bld_interp.c new file mode 100644 index 00000000000..a6acaead887 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.c @@ -0,0 +1,407 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Position and shader input interpolation. + * + * @author Jose Fonseca + */ + +#include "pipe/p_shader_tokens.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "tgsi/tgsi_parse.h" +#include "lp_bld_debug.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_interp.h" + + +/* + * The shader JIT function operates on blocks of quads. + * Each block has 2x2 quads and each quad has 2x2 pixels. + * + * We iterate over the quads in order 0, 1, 2, 3: + * + * ################# + * # | # | # + * #---0---#---1---# + * # | # | # + * ################# + * # | # | # + * #---2---#---3---# + * # | # | # + * ################# + * + * Within each quad, we have four pixels which are represented in SOA + * order: + * + * ######### + * # 0 | 1 # + * #---+---# + * # 2 | 3 # + * ######### + * + * So the green channel (for example) of the four pixels is stored in + * a single vector register: {g0, g1, g2, g3}. + */ + + +static void +attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) +{ + if(attrib == 0) + lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix); + else + lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); +} + + +/** + * Initialize the bld->a0, dadx, dady fields. This involves fetching + * those values from the arrays which are passed into the JIT function. + */ +static void +coeffs_init(struct lp_build_interp_soa_context *bld, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr) +{ + LLVMBuilderRef builder = bld->base.builder; + unsigned attrib; + unsigned chan; + + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + unsigned mask = bld->mask[attrib]; + unsigned mode = bld->mode[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0); + LLVMValueRef a0 = NULL; + LLVMValueRef dadx = NULL; + LLVMValueRef dady = NULL; + + switch( mode ) { + case TGSI_INTERPOLATE_PERSPECTIVE: + /* fall-through */ + + case TGSI_INTERPOLATE_LINEAR: + dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); + dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); + dadx = lp_build_broadcast_scalar(&bld->base, dadx); + dady = lp_build_broadcast_scalar(&bld->base, dady); + attrib_name(dadx, attrib, chan, ".dadx"); + attrib_name(dady, attrib, chan, ".dady"); + /* fall-through */ + + case TGSI_INTERPOLATE_CONSTANT: + a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); + a0 = lp_build_broadcast_scalar(&bld->base, a0); + attrib_name(a0, attrib, chan, ".a0"); + break; + + default: + assert(0); + break; + } + + bld->a0 [attrib][chan] = a0; + bld->dadx[attrib][chan] = dadx; + bld->dady[attrib][chan] = dady; + } + } + } +} + + +/** + * Emit LLVM code to compute the fragment shader input attribute values. + * For example, for a color input, we'll compute red, green, blue and alpha + * values for the four pixels in a quad. + * Recall that we're operating on 4-element vectors so each arithmetic + * operation is operating on the four pixels in a quad. + */ +static void +attribs_init(struct lp_build_interp_soa_context *bld) +{ + LLVMValueRef x = bld->pos[0]; + LLVMValueRef y = bld->pos[1]; + LLVMValueRef oow = NULL; + unsigned attrib; + unsigned chan; + + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + unsigned mask = bld->mask[attrib]; + unsigned mode = bld->mode[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef a0 = bld->a0 [attrib][chan]; + LLVMValueRef dadx = bld->dadx[attrib][chan]; + LLVMValueRef dady = bld->dady[attrib][chan]; + LLVMValueRef res; + + res = a0; + + if (mode != TGSI_INTERPOLATE_CONSTANT) { + /* res = res + x * dadx */ + res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx)); + /* res = res + y * dady */ + res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady)); + } + + /* Keep the value of the attribue before perspective divide for faster updates */ + bld->attribs_pre[attrib][chan] = res; + + if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { + LLVMValueRef w = bld->pos[3]; + assert(attrib != 0); + if(!oow) + oow = lp_build_rcp(&bld->base, w); + res = lp_build_mul(&bld->base, res, oow); + } + + attrib_name(res, attrib, chan, ""); + + bld->attribs[attrib][chan] = res; + } + } + } +} + + +/** + * Increment the shader input attribute values. + * This is called when we move from one quad to the next. + */ +static void +attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) +{ + LLVMValueRef oow = NULL; + unsigned attrib; + unsigned chan; + + assert(quad_index < 4); + + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + unsigned mask = bld->mask[attrib]; + unsigned mode = bld->mode[attrib]; + + if (mode != TGSI_INTERPOLATE_CONSTANT) { + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef dadx = bld->dadx[attrib][chan]; + LLVMValueRef dady = bld->dady[attrib][chan]; + LLVMValueRef res; + + res = bld->attribs_pre[attrib][chan]; + + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad */ + /* build res = res + dadx + dadx */ + res = lp_build_add(&bld->base, res, dadx); + res = lp_build_add(&bld->base, res, dadx); + } + + if (quad_index == 2 || quad_index == 3) { + /* bottom-left or bottom-right quad */ + /* build res = res + dady + dady */ + res = lp_build_add(&bld->base, res, dady); + res = lp_build_add(&bld->base, res, dady); + } + + //XXX bld->attribs_pre[attrib][chan] = res; + + if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { + LLVMValueRef w = bld->pos[3]; + assert(attrib != 0); + if(!oow) + oow = lp_build_rcp(&bld->base, w); + res = lp_build_mul(&bld->base, res, oow); + } + + attrib_name(res, attrib, chan, ""); + + bld->attribs[attrib][chan] = res; + } + } + } + } +} + + +/** + * Generate the position vectors. + * + * Parameter x0, y0 are the integer values with the quad upper left coordinates. + */ +static void +pos_init(struct lp_build_interp_soa_context *bld, + LLVMValueRef x0, + LLVMValueRef y0) +{ + lp_build_name(x0, "pos.x"); + lp_build_name(y0, "pos.y"); + + bld->attribs[0][0] = x0; + bld->attribs[0][1] = y0; +} + + +/** + * Update quad position values when moving to the next quad. + */ +static void +pos_update(struct lp_build_interp_soa_context *bld, int quad_index) +{ + LLVMValueRef x = bld->attribs[0][0]; + LLVMValueRef y = bld->attribs[0][1]; + const int xstep = 2, ystep = 2; + + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad in block */ + /* build x += xstep */ + x = lp_build_add(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } + + if (quad_index == 2) { + /* bottom-left quad in block */ + /* build y += ystep */ + y = lp_build_add(&bld->base, y, + lp_build_const_scalar(bld->base.type, ystep)); + /* build x -= xstep */ + x = lp_build_sub(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } + + lp_build_name(x, "pos.x"); + lp_build_name(y, "pos.y"); + + bld->attribs[0][0] = x; + bld->attribs[0][1] = y; +} + + +/** + * Initialize fragment shader input attribute info. + */ +void +lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, + const struct tgsi_token *tokens, + boolean flatshade, + LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr, + LLVMValueRef x0, + LLVMValueRef y0) +{ + struct tgsi_parse_context parse; + struct tgsi_full_declaration *decl; + + memset(bld, 0, sizeof *bld); + + lp_build_context_init(&bld->base, builder, type); + + /* For convenience */ + bld->pos = bld->attribs[0]; + bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1]; + + /* Position */ + bld->num_attribs = 1; + bld->mask[0] = TGSI_WRITEMASK_ZW; + bld->mode[0] = TGSI_INTERPOLATE_LINEAR; + + /* Inputs */ + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + decl = &parse.FullToken.FullDeclaration; + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned attrib; + + first = decl->Range.First; + last = decl->Range.Last; + mask = decl->Declaration.UsageMask; + + for( attrib = first; attrib <= last; ++attrib ) { + bld->mask[1 + attrib] = mask; + + /* XXX: have mesa set INTERP_CONSTANT in the fragment + * shader. + */ + if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + flatshade) + bld->mode[1 + attrib] = TGSI_INTERPOLATE_CONSTANT; + else + bld->mode[1 + attrib] = decl->Declaration.Interpolate; + } + + bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + case TGSI_TOKEN_TYPE_IMMEDIATE: + break; + + default: + assert( 0 ); + } + } + tgsi_parse_free( &parse ); + + coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); + + pos_init(bld, x0, y0); + + attribs_init(bld); +} + + +/** + * Advance the position and inputs to the given quad within the block. + */ +void +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index) +{ + assert(quad_index < 4); + + pos_update(bld, quad_index); + + attribs_update(bld, quad_index); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h new file mode 100644 index 00000000000..ca958cdf343 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Position and shader input interpolation. + * + * Special attention is given to the interpolation of side by side quads. + * Multiplications are made only for the first quad. Interpolation of + * inputs for posterior quads are done exclusively with additions, and + * perspective divide if necessary. + * + * @author Jose Fonseca + */ + +#ifndef LP_BLD_INTERP_H +#define LP_BLD_INTERP_H + + +#include + +#include "tgsi/tgsi_exec.h" + +#include "lp_bld_type.h" + + +struct tgsi_token; + + +struct lp_build_interp_soa_context +{ + struct lp_build_context base; + + unsigned num_attribs; + unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; + unsigned mode[1 + PIPE_MAX_SHADER_INPUTS]; + + LLVMValueRef a0 [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + + /* Attribute values before perspective divide */ + LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + + LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + + /* + * Convenience pointers. Callers may access this one. + */ + const LLVMValueRef *pos; + const LLVMValueRef (*inputs)[NUM_CHANNELS]; +}; + + +void +lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, + const struct tgsi_token *tokens, + boolean flatshade, + LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr, + LLVMValueRef x0, + LLVMValueRef y0); + +void +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index); + + +#endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c new file mode 100644 index 00000000000..9895749d568 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c @@ -0,0 +1,192 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helpers for emiting intrinsic calls. + * + * LLVM vanilla IR doesn't represent all basic arithmetic operations we care + * about, and it is often necessary to resort target-specific intrinsics for + * performance, convenience. + * + * Ideally we would like to stay away from target specific intrinsics and + * move all the instruction selection logic into upstream LLVM where it belongs. + * + * These functions are also used for calling C functions provided by us from + * generated LLVM code. + * + * @author Jose Fonseca + */ + + +#include "util/u_debug.h" + +#include "lp_bld_intr.h" + + +LLVMValueRef +lp_declare_intrinsic(LLVMModuleRef module, + const char *name, + LLVMTypeRef ret_type, + LLVMTypeRef *arg_types, + unsigned num_args) +{ + LLVMTypeRef function_type; + LLVMValueRef function; + + assert(!LLVMGetNamedFunction(module, name)); + + function_type = LLVMFunctionType(ret_type, arg_types, num_args, 0); + function = LLVMAddFunction(module, name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMExternalLinkage); + + assert(LLVMIsDeclaration(function)); + + if(name[0] == 'l' && + name[1] == 'l' && + name[2] == 'v' && + name[3] == 'm' && + name[4] == '.') + assert(LLVMGetIntrinsicID(function)); + + return function; +} + + +LLVMValueRef +lp_build_intrinsic(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef *args, + unsigned num_args) +{ + LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); + LLVMValueRef function; + + function = LLVMGetNamedFunction(module, name); + if(!function) { + LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS]; + unsigned i; + + assert(num_args <= LP_MAX_FUNC_ARGS); + + for(i = 0; i < num_args; ++i) { + assert(args[i]); + arg_types[i] = LLVMTypeOf(args[i]); + } + + function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); + } + + return LLVMBuildCall(builder, function, args, num_args, ""); +} + + +LLVMValueRef +lp_build_intrinsic_unary(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef a) +{ + return lp_build_intrinsic(builder, name, ret_type, &a, 1); +} + + +LLVMValueRef +lp_build_intrinsic_binary(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef a, + LLVMValueRef b) +{ + LLVMValueRef args[2]; + + args[0] = a; + args[1] = b; + + return lp_build_intrinsic(builder, name, ret_type, args, 2); +} + + +LLVMValueRef +lp_build_intrinsic_map(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef *args, + unsigned num_args) +{ + LLVMTypeRef ret_elem_type = LLVMGetElementType(ret_type); + unsigned n = LLVMGetVectorSize(ret_type); + unsigned i, j; + LLVMValueRef res; + + assert(num_args <= LP_MAX_FUNC_ARGS); + + res = LLVMGetUndef(ret_type); + for(i = 0; i < n; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef arg_elems[LP_MAX_FUNC_ARGS]; + LLVMValueRef res_elem; + for(j = 0; j < num_args; ++j) + arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, ""); + res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args); + res = LLVMBuildInsertElement(builder, res, res_elem, index, ""); + } + + return res; +} + + +LLVMValueRef +lp_build_intrinsic_map_unary(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef a) +{ + return lp_build_intrinsic_map(builder, name, ret_type, &a, 1); +} + + +LLVMValueRef +lp_build_intrinsic_map_binary(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef a, + LLVMValueRef b) +{ + LLVMValueRef args[2]; + + args[0] = a; + args[1] = b; + + return lp_build_intrinsic_map(builder, name, ret_type, args, 2); +} + + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h new file mode 100644 index 00000000000..f813f27074b --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for calling intrinsics. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_INTR_H +#define LP_BLD_INTR_H + + +#include + + +/** + * Max number of arguments in an intrinsic. + */ +#define LP_MAX_FUNC_ARGS 32 + + +LLVMValueRef +lp_declare_intrinsic(LLVMModuleRef module, + const char *name, + LLVMTypeRef ret_type, + LLVMTypeRef *arg_types, + unsigned num_args); + +LLVMValueRef +lp_build_intrinsic(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef *args, + unsigned num_args); + + +LLVMValueRef +lp_build_intrinsic_unary(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef a); + + +LLVMValueRef +lp_build_intrinsic_binary(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef a, + LLVMValueRef b); + + +LLVMValueRef +lp_build_intrinsic_map(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef *args, + unsigned num_args); + + +LLVMValueRef +lp_build_intrinsic_map_unary(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef a); + + +LLVMValueRef +lp_build_intrinsic_map_binary(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef a, + LLVMValueRef b); + + +#endif /* !LP_BLD_INTR_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c new file mode 100644 index 00000000000..d23de4f0ef8 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -0,0 +1,421 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for logical operations. + * + * @author Jose Fonseca + */ + + +#include "util/u_cpu_detect.h" +#include "util/u_debug.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_intr.h" +#include "lp_bld_logic.h" + + +/** + * Build code to compare two values 'a' and 'b' of 'type' using the given func. + * \param func one of PIPE_FUNC_x + */ +LLVMValueRef +lp_build_compare(LLVMBuilderRef builder, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef zeros = LLVMConstNull(int_vec_type); + LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); + LLVMValueRef cond; + LLVMValueRef res; + unsigned i; + + assert(func >= PIPE_FUNC_NEVER); + assert(func <= PIPE_FUNC_ALWAYS); + + if(func == PIPE_FUNC_NEVER) + return zeros; + if(func == PIPE_FUNC_ALWAYS) + return ones; + + /* TODO: optimize the constant case */ + + /* XXX: It is not clear if we should use the ordered or unordered operators */ + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if(type.width * type.length == 128) { + if(type.floating && util_cpu_caps.has_sse) { + /* float[4] comparison */ + LLVMValueRef args[3]; + unsigned cc; + boolean swap; + + swap = FALSE; + switch(func) { + case PIPE_FUNC_EQUAL: + cc = 0; + break; + case PIPE_FUNC_NOTEQUAL: + cc = 4; + break; + case PIPE_FUNC_LESS: + cc = 1; + break; + case PIPE_FUNC_LEQUAL: + cc = 2; + break; + case PIPE_FUNC_GREATER: + cc = 1; + swap = TRUE; + break; + case PIPE_FUNC_GEQUAL: + cc = 2; + swap = TRUE; + break; + default: + assert(0); + return lp_build_undef(type); + } + + if(swap) { + args[0] = b; + args[1] = a; + } + else { + args[0] = a; + args[1] = b; + } + + args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); + res = lp_build_intrinsic(builder, + "llvm.x86.sse.cmp.ps", + vec_type, + args, 3); + res = LLVMBuildBitCast(builder, res, int_vec_type, ""); + return res; + } + else if(util_cpu_caps.has_sse2) { + /* int[4] comparison */ + static const struct { + unsigned swap:1; + unsigned eq:1; + unsigned gt:1; + unsigned not:1; + } table[] = { + {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ + {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ + {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ + {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ + {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ + {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ + {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ + {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ + }; + const char *pcmpeq; + const char *pcmpgt; + LLVMValueRef args[2]; + LLVMValueRef res; + + switch (type.width) { + case 8: + pcmpeq = "llvm.x86.sse2.pcmpeq.b"; + pcmpgt = "llvm.x86.sse2.pcmpgt.b"; + break; + case 16: + pcmpeq = "llvm.x86.sse2.pcmpeq.w"; + pcmpgt = "llvm.x86.sse2.pcmpgt.w"; + break; + case 32: + pcmpeq = "llvm.x86.sse2.pcmpeq.d"; + pcmpgt = "llvm.x86.sse2.pcmpgt.d"; + break; + default: + assert(0); + return lp_build_undef(type); + } + + /* There are no signed byte and unsigned word/dword comparison + * instructions. So flip the sign bit so that the results match. + */ + if(table[func].gt && + ((type.width == 8 && type.sign) || + (type.width != 8 && !type.sign))) { + LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + a = LLVMBuildXor(builder, a, msb, ""); + b = LLVMBuildXor(builder, b, msb, ""); + } + + if(table[func].swap) { + args[0] = b; + args[1] = a; + } + else { + args[0] = a; + args[1] = b; + } + + if(table[func].eq) + res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); + else if (table[func].gt) + res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); + else + res = LLVMConstNull(vec_type); + + if(table[func].not) + res = LLVMBuildNot(builder, res, ""); + + return res; + } + } +#endif + + if(type.floating) { + LLVMRealPredicate op; + switch(func) { + case PIPE_FUNC_NEVER: + op = LLVMRealPredicateFalse; + break; + case PIPE_FUNC_ALWAYS: + op = LLVMRealPredicateTrue; + break; + case PIPE_FUNC_EQUAL: + op = LLVMRealUEQ; + break; + case PIPE_FUNC_NOTEQUAL: + op = LLVMRealUNE; + break; + case PIPE_FUNC_LESS: + op = LLVMRealULT; + break; + case PIPE_FUNC_LEQUAL: + op = LLVMRealULE; + break; + case PIPE_FUNC_GREATER: + op = LLVMRealUGT; + break; + case PIPE_FUNC_GEQUAL: + op = LLVMRealUGE; + break; + default: + assert(0); + return lp_build_undef(type); + } + +#if 0 + /* XXX: Although valid IR, no LLVM target currently support this */ + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSelect(builder, cond, ones, zeros, ""); +#else + debug_printf("%s: warning: using slow element-wise vector comparison\n", + __FUNCTION__); + res = LLVMGetUndef(int_vec_type); + for(i = 0; i < type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + cond = LLVMBuildFCmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } +#endif + } + else { + LLVMIntPredicate op; + switch(func) { + case PIPE_FUNC_EQUAL: + op = LLVMIntEQ; + break; + case PIPE_FUNC_NOTEQUAL: + op = LLVMIntNE; + break; + case PIPE_FUNC_LESS: + op = type.sign ? LLVMIntSLT : LLVMIntULT; + break; + case PIPE_FUNC_LEQUAL: + op = type.sign ? LLVMIntSLE : LLVMIntULE; + break; + case PIPE_FUNC_GREATER: + op = type.sign ? LLVMIntSGT : LLVMIntUGT; + break; + case PIPE_FUNC_GEQUAL: + op = type.sign ? LLVMIntSGE : LLVMIntUGE; + break; + default: + assert(0); + return lp_build_undef(type); + } + +#if 0 + /* XXX: Although valid IR, no LLVM target currently support this */ + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSelect(builder, cond, ones, zeros, ""); +#else + debug_printf("%s: warning: using slow element-wise int vector comparison\n", + __FUNCTION__); + res = LLVMGetUndef(int_vec_type); + for(i = 0; i < type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + cond = LLVMBuildICmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } +#endif + } + + return res; +} + + + +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \param func one of PIPE_FUNC_x + */ +LLVMValueRef +lp_build_cmp(struct lp_build_context *bld, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + return lp_build_compare(bld->builder, bld->type, func, a, b); +} + + +LLVMValueRef +lp_build_select(struct lp_build_context *bld, + LLVMValueRef mask, + LLVMValueRef a, + LLVMValueRef b) +{ + struct lp_type type = bld->type; + LLVMValueRef res; + + if(a == b) + return a; + + if(type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + } + + a = LLVMBuildAnd(bld->builder, a, mask, ""); + + /* This often gets translated to PANDN, but sometimes the NOT is + * pre-computed and stored in another constant. The best strategy depends + * on available registers, so it is not a big deal -- hopefully LLVM does + * the right decision attending the rest of the program. + */ + b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + + res = LLVMBuildOr(bld->builder, a, b, ""); + + if(type.floating) { + LLVMTypeRef vec_type = lp_build_vec_type(type); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + + return res; +} + + +LLVMValueRef +lp_build_select_aos(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + const boolean cond[4]) +{ + const struct lp_type type = bld->type; + const unsigned n = type.length; + unsigned i, j; + + if(a == b) + return a; + if(cond[0] && cond[1] && cond[2] && cond[3]) + return a; + if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) + return b; + if(a == bld->undef || b == bld->undef) + return bld->undef; + + /* + * There are three major ways of accomplishing this: + * - with a shuffle, + * - with a select, + * - or with a bit mask. + * + * Select isn't supported for vector types yet. + * The flip between these is empirical and might need to be. + */ + if (n <= 4) { + /* + * Shuffle. + */ + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); + + return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); + } + else { +#if 0 + /* XXX: Unfortunately select of vectors do not work */ + /* Use a select */ + LLVMTypeRef elem_type = LLVMInt1Type(); + LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); + + return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); +#else + LLVMValueRef mask = lp_build_const_mask_aos(type, cond); + return lp_build_select(bld, mask, a, b); +#endif + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h new file mode 100644 index 00000000000..40d64eb2c19 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for logical operations. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_LOGIC_H +#define LP_BLD_LOGIC_H + + +#include + +#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ + + +struct lp_type; +struct lp_build_context; + + +LLVMValueRef +lp_build_compare(LLVMBuilderRef builder, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b); + + +/** + * @param func is one of PIPE_FUNC_xxx + */ +LLVMValueRef +lp_build_cmp(struct lp_build_context *bld, + unsigned func, + LLVMValueRef a, + LLVMValueRef b); + + +LLVMValueRef +lp_build_select(struct lp_build_context *bld, + LLVMValueRef mask, + LLVMValueRef a, + LLVMValueRef b); + +LLVMValueRef +lp_build_select_aos(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + const boolean cond[4]); + + +#endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp new file mode 100644 index 00000000000..6e79438ead0 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -0,0 +1,75 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_config.h" + +#include "lp_bld_misc.h" + + +#ifndef LLVM_NATIVE_ARCH + +namespace llvm { + extern void LinkInJIT(); +} + + +void +LLVMLinkInJIT(void) +{ + llvm::LinkInJIT(); +} + + +extern "C" int X86TargetMachineModule; + + +int +LLVMInitializeNativeTarget(void) +{ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + X86TargetMachineModule = 1; +#endif + return 0; +} + + +#endif + + +/* + * Hack to allow the linking of release LLVM static libraries on a debug build. + * + * See also: + * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d + */ +#if defined(_MSC_VER) && defined(_DEBUG) +#include +extern "C" { + _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} +} +#endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h new file mode 100644 index 00000000000..0e787e0b9cb --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_BLD_MISC_H +#define LP_BLD_MISC_H + + +#include "llvm/Config/config.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifndef LLVM_NATIVE_ARCH + +void +LLVMLinkInJIT(void); + +int +LLVMInitializeNativeTarget(void); + +#endif /* !LLVM_NATIVE_ARCH */ + + +#ifdef __cplusplus +} +#endif + + +#endif /* !LP_BLD_MISC_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c new file mode 100644 index 00000000000..bc360ad77ad --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -0,0 +1,418 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helper functions for packing/unpacking. + * + * Pack/unpacking is necessary for conversion between types of different + * bit width. + * + * They are also commonly used when an computation needs higher + * precision for the intermediate values. For example, if one needs the + * function: + * + * c = compute(a, b); + * + * to use more precision for intermediate results then one should implement it + * as: + * + * LLVMValueRef + * compute(LLVMBuilderRef builder struct lp_type type, LLVMValueRef a, LLVMValueRef b) + * { + * struct lp_type wide_type = lp_wider_type(type); + * LLVMValueRef al, ah, bl, bh, cl, ch, c; + * + * lp_build_unpack2(builder, type, wide_type, a, &al, &ah); + * lp_build_unpack2(builder, type, wide_type, b, &bl, &bh); + * + * cl = compute_half(al, bl); + * ch = compute_half(ah, bh); + * + * c = lp_build_pack2(bld->builder, wide_type, type, cl, ch); + * + * return c; + * } + * + * where compute_half() would do the computation for half the elements with + * twice the precision. + * + * @author Jose Fonseca + */ + + +#include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_intr.h" +#include "lp_bld_arit.h" +#include "lp_bld_pack.h" + + +/** + * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. + */ +static LLVMValueRef +lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i, j; + + assert(n <= LP_MAX_VECTOR_LENGTH); + assert(lo_hi < 2); + + /* TODO: cache results in a static table */ + + for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { + elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); + elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); + } + + return LLVMConstVector(elems, n); +} + + +/** + * Build shuffle vectors that match PACKxx instructions. + */ +static LLVMValueRef +lp_build_const_pack_shuffle(unsigned n) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(n <= LP_MAX_VECTOR_LENGTH); + + /* TODO: cache results in a static table */ + + for(i = 0; i < n; ++i) + elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); + + return LLVMConstVector(elems, n); +} + + +/** + * Interleave vector elements. + * + * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions. + */ +LLVMValueRef +lp_build_interleave2(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a, + LLVMValueRef b, + unsigned lo_hi) +{ + LLVMValueRef shuffle; + + shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi); + + return LLVMBuildShuffleVector(builder, a, b, shuffle, ""); +} + + +/** + * Double the bit width. + * + * This will only change the number of bits the values are represented, not the + * values themselves. + */ +void +lp_build_unpack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst_lo, + LLVMValueRef *dst_hi) +{ + LLVMValueRef msb; + LLVMTypeRef dst_vec_type; + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(dst_type.width == src_type.width * 2); + assert(dst_type.length * 2 == src_type.length); + + if(dst_type.sign && src_type.sign) { + /* Replicate the sign bit in the most significant bits */ + msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); + } + else + /* Most significant bits always zero */ + msb = lp_build_zero(src_type); + + /* Interleave bits */ + if(util_cpu_caps.little_endian) { + *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0); + *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1); + } + else { + *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0); + *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1); + } + + /* Cast the result into the new type (twice as wide) */ + + dst_vec_type = lp_build_vec_type(dst_type); + + *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, ""); + *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, ""); +} + + +/** + * Expand the bit width. + * + * This will only change the number of bits the values are represented, not the + * values themselves. + */ +void +lp_build_unpack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst, unsigned num_dsts) +{ + unsigned num_tmps; + unsigned i; + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length == dst_type.length * num_dsts); + + num_tmps = 1; + dst[0] = src; + + while(src_type.width < dst_type.width) { + struct lp_type tmp_type = src_type; + + tmp_type.width *= 2; + tmp_type.length /= 2; + + for(i = num_tmps; i--; ) { + lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]); + } + + src_type = tmp_type; + + num_tmps *= 2; + } + + assert(num_tmps == num_dsts); +} + + +/** + * Non-interleaved pack. + * + * This will move values as + * + * lo = __ l0 __ l1 __ l2 __.. __ ln + * hi = __ h0 __ h1 __ h2 __.. __ hn + * res = l0 l1 l2 .. ln h0 h1 h2 .. hn + * + * This will only change the number of bits the values are represented, not the + * values themselves. + * + * It is assumed the values are already clamped into the destination type range. + * Values outside that range will produce undefined results. Use + * lp_build_packs2 instead. + */ +LLVMValueRef +lp_build_pack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi) +{ + LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); + LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); + LLVMValueRef shuffle; + LLVMValueRef res; + + dst_vec_type = lp_build_vec_type(dst_type); + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(src_type.width == dst_type.width * 2); + assert(src_type.length * 2 == dst_type.length); + + if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { + switch(src_type.width) { + case 32: + if(dst_type.sign) { + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); + } + else { + if (util_cpu_caps.has_sse4_1) { + /* PACKUSDW is the only instrinsic with a consistent signature */ + return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); + } + else { + assert(0); + return LLVMGetUndef(dst_vec_type); + } + } + break; + + case 16: + if(dst_type.sign) + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); + else + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); + break; + + default: + assert(0); + return LLVMGetUndef(dst_vec_type); + break; + } + + res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); + return res; + } + + lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); + hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); + + shuffle = lp_build_const_pack_shuffle(dst_type.length); + + res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, ""); + + return res; +} + + + +/** + * Non-interleaved pack and saturate. + * + * Same as lp_build_pack2 but will saturate values so that they fit into the + * destination type. + */ +LLVMValueRef +lp_build_packs2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi) +{ + boolean clamp; + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(src_type.sign == dst_type.sign); + assert(src_type.width == dst_type.width * 2); + assert(src_type.length * 2 == dst_type.length); + + clamp = TRUE; + + /* All X86 SSE non-interleaved pack instructions take signed inputs and + * saturate them, so no need to clamp for those cases. */ + if(util_cpu_caps.has_sse2 && + src_type.width * src_type.length == 128 && + src_type.sign) + clamp = FALSE; + + if(clamp) { + struct lp_build_context bld; + unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; + LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); + lp_build_context_init(&bld, builder, src_type); + lo = lp_build_min(&bld, lo, dst_max); + hi = lp_build_min(&bld, hi, dst_max); + /* FIXME: What about lower bound? */ + } + + return lp_build_pack2(builder, src_type, dst_type, lo, hi); +} + + +/** + * Truncate the bit width. + * + * TODO: Handle saturation consistently. + */ +LLVMValueRef +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs) +{ + LLVMValueRef (*pack2)(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length); + + if(clamped) + pack2 = &lp_build_pack2; + else + pack2 = &lp_build_packs2; + + for(i = 0; i < num_srcs; ++i) + tmp[i] = src[i]; + + while(src_type.width > dst_type.width) { + struct lp_type tmp_type = src_type; + + tmp_type.width /= 2; + tmp_type.length *= 2; + + /* Take in consideration the sign changes only in the last step */ + if(tmp_type.width == dst_type.width) + tmp_type.sign = dst_type.sign; + + num_srcs /= 2; + + for(i = 0; i < num_srcs; ++i) + tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]); + + src_type = tmp_type; + } + + assert(num_srcs == 1); + + return tmp[0]; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h new file mode 100644 index 00000000000..fb2a34984a4 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for packing/unpacking conversions. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_PACK_H +#define LP_BLD_PACK_H + + +#include + + +struct lp_type; + + +LLVMValueRef +lp_build_interleave2(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a, + LLVMValueRef b, + unsigned lo_hi); + + +void +lp_build_unpack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst_lo, + LLVMValueRef *dst_hi); + + +void +lp_build_unpack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst, unsigned num_dsts); + + +LLVMValueRef +lp_build_packs2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + + +LLVMValueRef +lp_build_pack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + + +LLVMValueRef +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs); + + +#endif /* !LP_BLD_PACK_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c new file mode 100644 index 00000000000..9003e108c1c --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling -- common code. + * + * @author Jose Fonseca + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "lp_bld_debug.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_type.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width0); + state->pot_height = util_is_pot(texture->height0); + state->pot_depth = util_is_pot(texture->depth0); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + state->compare_mode = sampler->compare_mode; + if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +/** + * Compute the offset of a pixel. + * + * x, y, y_stride are vectors + */ +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); + + x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(bld, x, x_stride); + y_offset = lp_build_mul(bld, y, y_stride); + + offset = lp_build_add(bld, x_offset, y_offset); + } + + return offset; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h new file mode 100644 index 00000000000..8cb8210ca76 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -0,0 +1,155 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca + */ + +#ifndef LP_BLD_SAMPLE_H +#define LP_BLD_SAMPLE_H + + +#include + +struct pipe_texture; +struct pipe_sampler_state; +struct util_format_description; +struct lp_type; +struct lp_build_context; + + +/** + * Sampler static state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are embedded in the generated code. + */ +struct lp_sampler_static_state +{ + /* pipe_texture's state */ + enum pipe_format format; + unsigned target:2; + unsigned pot_width:1; + unsigned pot_height:1; + unsigned pot_depth:1; + + /* pipe_sampler_state's state */ + unsigned wrap_s:3; + unsigned wrap_t:3; + unsigned wrap_r:3; + unsigned min_img_filter:2; + unsigned min_mip_filter:2; + unsigned mag_img_filter:2; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned normalized_coords:1; + unsigned prefilter:4; +}; + + +/** + * Sampler dynamic state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are computed in runtime. + * + * There are obtained through callbacks, as we don't want to tie the texture + * sampling code generation logic to any particular texture layout or pipe + * driver. + */ +struct lp_sampler_dynamic_state +{ + + /** Obtain the base texture width. */ + LLVMValueRef + (*width)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + /** Obtain the base texture height. */ + LLVMValueRef + (*height)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*data_ptr)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + +}; + + +/** + * Derive the sampler static state. + */ +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler); + + +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr); + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + struct lp_type fp_type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); + + + +#endif /* LP_BLD_SAMPLE_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c new file mode 100644 index 00000000000..854dd0b28c2 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -0,0 +1,598 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling -- SoA. + * + * @author Jose Fonseca + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "util/u_debug_dump.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "util/u_cpu_detect.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_pack.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +/** + * Keep all information for sampling code generation in a single place. + */ +struct lp_build_sample_context +{ + LLVMBuilderRef builder; + + const struct lp_sampler_static_state *static_state; + + struct lp_sampler_dynamic_state *dynamic_state; + + const struct util_format_description *format_desc; + + /** Incoming coordinates type and build context */ + struct lp_type coord_type; + struct lp_build_context coord_bld; + + /** Integer coordinates */ + struct lp_type int_coord_type; + struct lp_build_context int_coord_bld; + + /** Output texels type and build context */ + struct lp_type texel_type; + struct lp_build_context texel_bld; +}; + + +static void +lp_build_sample_texel_soa(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef offset; + LLVMValueRef packed; + + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); + + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + packed = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); + + lp_build_unpack_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + packed, texel); +} + + +static LLVMValueRef +lp_build_sample_packed(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef offset; + + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); + + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + return lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); +} + + +static LLVMValueRef +lp_build_sample_wrap(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if(is_pot) + coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); + else + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord = LLVMBuildURem(bld->builder, coord, length, ""); + break; + + case PIPE_TEX_WRAP_CLAMP: + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + /* FIXME */ + _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n", + debug_dump_tex_wrap(wrap_mode, TRUE)); + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + default: + assert(0); + } + + return coord; +} + + +static void +lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef x; + LLVMValueRef y; + + x = lp_build_ifloor(&bld->coord_bld, s); + y = lp_build_ifloor(&bld->coord_bld, t); + lp_build_name(x, "tex.x.floor"); + lp_build_name(y, "tex.y.floor"); + + x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + lp_build_name(x, "tex.x.wrapped"); + lp_build_name(y, "tex.y.wrapped"); + + lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); +} + + +static void +lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef half; + LLVMValueRef s_ipart; + LLVMValueRef t_ipart; + LLVMValueRef s_fpart; + LLVMValueRef t_fpart; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2][4]; + unsigned chan; + + half = lp_build_const_scalar(bld->coord_type, 0.5); + s = lp_build_sub(&bld->coord_bld, s, half); + t = lp_build_sub(&bld->coord_bld, t, half); + + s_ipart = lp_build_floor(&bld->coord_bld, s); + t_ipart = lp_build_floor(&bld->coord_bld, t); + + s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); + t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); + + x0 = lp_build_itrunc(&bld->coord_bld, s_ipart); + y0 = lp_build_itrunc(&bld->coord_bld, t_ipart); + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + + /* TODO: Don't interpolate missing channels */ + for(chan = 0; chan < 4; ++chan) { + texel[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } +} + + +static void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); + unsigned chan; + + /* Decode the input vector components */ + for (chan = 0; chan < 4; ++chan) { + unsigned start = chan*8; + unsigned stop = start + 8; + LLVMValueRef input; + + input = packed; + + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); + + if(stop < 32) + input = LLVMBuildAnd(builder, input, mask, ""); + + input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + + rgba[chan] = input; + } +} + + +static void +lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMBuilderRef builder = bld->builder; + struct lp_build_context i32, h16, u8n; + LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; + LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; + LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2]; + LLVMValueRef neighbors_lo[2][2]; + LLVMValueRef neighbors_hi[2][2]; + LLVMValueRef packed, packed_lo, packed_hi; + LLVMValueRef unswizzled[4]; + + lp_build_context_init(&i32, builder, lp_type_int(32)); + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + + i32_vec_type = lp_build_vec_type(i32.type); + h16_vec_type = lp_build_vec_type(h16.type); + u8n_vec_type = lp_build_vec_type(u8n.type); + + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + + s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); + t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); + + i32_c128 = lp_build_int_const_scalar(i32.type, -128); + s = LLVMBuildAdd(builder, s, i32_c128, ""); + t = LLVMBuildAdd(builder, t, i32_c128, ""); + + i32_c8 = lp_build_int_const_scalar(i32.type, 8); + s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); + t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); + + i32_c255 = lp_build_int_const_scalar(i32.type, 255); + s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); + t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); + + x0 = s_ipart; + y0 = t_ipart; + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + /* + * Transform 4 x i32 in + * + * s_fpart = {s0, s1, s2, s3} + * + * into 8 x i16 + * + * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} + * + * into two 8 x i16 + * + * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} + * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} + * + * and likewise for t_fpart. There is no risk of loosing precision here + * since the fractional parts only use the lower 8bits. + */ + + s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); + t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); + + { + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffle_lo; + LLVMValueRef shuffle_hi; + unsigned i, j; + + for(j = 0; j < h16.type.length; j += 4) { + unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; + LLVMValueRef index; + + index = LLVMConstInt(elem_type, j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_lo[j + i] = index; + + index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_hi[j + i] = index; + } + + shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); + shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); + + s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); + t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); + s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); + t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); + } + + /* + * Fetch the pixels as 4 x 32bit (rgba order might differ): + * + * rgba0 rgba1 rgba2 rgba3 + * + * bit cast them into 16 x u8 + * + * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 + * + * unpack them into two 8 x i16: + * + * r0 g0 b0 a0 r1 g1 b1 a1 + * r2 g2 b2 a2 r3 g3 b3 a3 + * + * The higher 8 bits of the resulting elements will be zero. + */ + + neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); + neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); + neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); + neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); + + neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); + neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); + neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); + neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); + + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); + + /* + * Linear interpolate with 8.8 fixed point. + */ + + packed_lo = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[0][0], + neighbors_lo[0][1], + neighbors_lo[1][0], + neighbors_lo[1][1]); + + packed_hi = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[0][0], + neighbors_hi[0][1], + neighbors_hi[1][0], + neighbors_hi[1][1]); + + packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); + + /* + * Convert to SoA and swizzle. + */ + + packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); + + lp_build_rgba8_to_f32_soa(bld->builder, + bld->texel_type, + packed, unswizzled); + + lp_build_format_swizzle_soa(bld->format_desc, + bld->texel_type, unswizzled, + texel); +} + + +static void +lp_build_sample_compare(struct lp_build_sample_context *bld, + LLVMValueRef p, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + LLVMValueRef res; + unsigned chan; + + if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) + return; + + /* TODO: Compare before swizzling, to avoid redundant computations */ + res = NULL; + for(chan = 0; chan < 4; ++chan) { + LLVMValueRef cmp; + cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); + cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); + + if(res) + res = lp_build_add(texel_bld, res, cmp); + else + res = cmp; + } + + assert(res); + res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for(chan = 0; chan < 3; ++chan) + texel[chan] = res; + texel[3] = texel_bld->one; +} + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_build_sample_context bld; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef stride; + LLVMValueRef data_ptr; + LLVMValueRef s; + LLVMValueRef t; + LLVMValueRef p; + + /* Setup our build context */ + memset(&bld, 0, sizeof bld); + bld.builder = builder; + bld.static_state = static_state; + bld.dynamic_state = dynamic_state; + bld.format_desc = util_format_description(static_state->format); + bld.coord_type = type; + bld.int_coord_type = lp_int_type(type); + bld.texel_type = type; + lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); + lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); + lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); + + /* Get the dynamic state */ + width = dynamic_state->width(dynamic_state, builder, unit); + height = dynamic_state->height(dynamic_state, builder, unit); + stride = dynamic_state->stride(dynamic_state, builder, unit); + data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + + s = coords[0]; + t = coords[1]; + p = coords[2]; + + width = lp_build_broadcast_scalar(&bld.int_coord_bld, width); + height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); + stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); + + if(static_state->target == PIPE_TEXTURE_1D) + t = bld.coord_bld.zero; + + if(static_state->normalized_coords) { + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); + LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, ""); + s = lp_build_mul(&bld.coord_bld, s, fp_width); + t = lp_build_mul(&bld.coord_bld, t, fp_height); + } + + switch (static_state->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + case PIPE_TEX_FILTER_LINEAR: + if(lp_format_is_rgba8(bld.format_desc)) + lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); + else + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + default: + assert(0); + } + + /* FIXME: respect static_state->min_mip_filter */; + /* FIXME: respect static_state->mag_img_filter */; + /* FIXME: respect static_state->prefilter */; + + lp_build_sample_compare(&bld, p, texel); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.c b/src/gallium/auxiliary/gallivm/lp_bld_struct.c new file mode 100644 index 00000000000..3998ac374fe --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helper functions for manipulation structures. + * + * @author Jose Fonseca + */ + + +#include "util/u_debug.h" +#include "util/u_memory.h" + +#include "lp_bld_debug.h" +#include "lp_bld_struct.h" + + +LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name) +{ + LLVMValueRef indices[2]; + LLVMValueRef member_ptr; + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); + return member_ptr; +} + + +LLVMValueRef +lp_build_struct_get(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name) +{ + LLVMValueRef member_ptr; + LLVMValueRef res; + member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); + res = LLVMBuildLoad(builder, member_ptr, ""); + lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); + return res; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h new file mode 100644 index 00000000000..740392f5611 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -0,0 +1,75 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for type conversions. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_STRUCT_H +#define LP_BLD_STRUCT_H + + +#include +#include + +#include "util/u_debug.h" +#include "util/u_memory.h" + + +#define LP_CHECK_STRUCT_SIZE(_ctype, _ltarget, _ltype) \ + assert(LLVMABISizeOfType(_ltarget, _ltype) == \ + sizeof(_ctype)) + +#define LP_CHECK_MEMBER_OFFSET(_ctype, _cmember, _ltarget, _ltype, _lindex) \ + assert(LLVMOffsetOfElement(_ltarget, _ltype, _lindex) == \ + offsetof(_ctype, _cmember)) + + +/** + * Get value pointer to a structure member. + */ +LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name); + +/** + * Get the value of a structure member. + */ +LLVMValueRef +lp_build_struct_get(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name); + + +#endif /* !LP_BLD_STRUCT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c new file mode 100644 index 00000000000..64e81f7b1fe --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for swizzling/shuffling. + * + * @author Jose Fonseca + */ + + +#include "util/u_debug.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" + + +LLVMValueRef +lp_build_broadcast(LLVMBuilderRef builder, + LLVMTypeRef vec_type, + LLVMValueRef scalar) +{ + const unsigned n = LLVMGetVectorSize(vec_type); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(vec_type); + for(i = 0; i < n; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(builder, res, scalar, index, ""); + } + + return res; +} + + +LLVMValueRef +lp_build_broadcast_scalar(struct lp_build_context *bld, + LLVMValueRef scalar) +{ + const struct lp_type type = bld->type; + LLVMValueRef res; + unsigned i; + + res = bld->undef; + for(i = 0; i < type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(bld->builder, res, scalar, index, ""); + } + + return res; +} + + +LLVMValueRef +lp_build_broadcast_aos(struct lp_build_context *bld, + LLVMValueRef a, + unsigned channel) +{ + const struct lp_type type = bld->type; + const unsigned n = type.length; + unsigned i, j; + + if(a == bld->undef || a == bld->zero || a == bld->one) + return a; + + /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing + * using shuffles here actually causes worst results. More investigation is + * needed. */ + if (n <= 4) { + /* + * Shuffle. + */ + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); + + return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); + } + else { + /* + * Bit mask and recursive shifts + * + * XYZW XYZW .... XYZW <= input + * 0Y00 0Y00 .... 0Y00 + * YY00 YY00 .... YY00 + * YYYY YYYY .... YYYY <= output + */ + struct lp_type type4 = type; + const char shifts[4][2] = { + { 1, 2}, + {-1, 2}, + { 1, -2}, + {-1, -2} + }; + boolean cond[4]; + unsigned i; + + memset(cond, 0, sizeof cond); + cond[channel] = 1; + + a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), ""); + + type4.width *= 4; + type4.length /= 4; + + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), ""); + + for(i = 0; i < 2; ++i) { + LLVMValueRef tmp = NULL; + int shift = shifts[channel][i]; + +#ifdef PIPE_ARCH_LITTLE_ENDIAN + shift = -shift; +#endif + + if(shift > 0) + tmp = LLVMBuildLShr(bld->builder, a, lp_build_int_const_scalar(type4, shift*type.width), ""); + if(shift < 0) + tmp = LLVMBuildShl(bld->builder, a, lp_build_int_const_scalar(type4, -shift*type.width), ""); + + assert(tmp); + if(tmp) + a = LLVMBuildOr(bld->builder, a, tmp, ""); + } + + return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), ""); + } +} + + +LLVMValueRef +lp_build_swizzle1_aos(struct lp_build_context *bld, + LLVMValueRef a, + const unsigned char swizzle[4]) +{ + const unsigned n = bld->type.length; + unsigned i, j; + + if(a == bld->undef || a == bld->zero || a == bld->one) + return a; + + if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3]) + return lp_build_broadcast_aos(bld, a, swizzle[0]); + + { + /* + * Shuffle. + */ + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0); + + return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); + } +} + + +LLVMValueRef +lp_build_swizzle2_aos(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + const unsigned char swizzle[4]) +{ + const unsigned n = bld->type.length; + unsigned i, j; + + if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4) + return lp_build_swizzle1_aos(bld, a, swizzle); + + if(a == b) { + unsigned char swizzle1[4]; + swizzle1[0] = swizzle[0] % 4; + swizzle1[1] = swizzle[1] % 4; + swizzle1[2] = swizzle[2] % 4; + swizzle1[3] = swizzle[3] % 4; + return lp_build_swizzle1_aos(bld, a, swizzle1); + } + + if(swizzle[0] % 4 == 0 && + swizzle[1] % 4 == 1 && + swizzle[2] % 4 == 2 && + swizzle[3] % 4 == 3) { + boolean cond[4]; + cond[0] = swizzle[0] / 4; + cond[1] = swizzle[1] / 4; + cond[2] = swizzle[2] / 4; + cond[3] = swizzle[3] / 4; + return lp_build_select_aos(bld, a, b, cond); + } + + { + /* + * Shuffle. + */ + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0); + + return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); + } +} + + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h new file mode 100644 index 00000000000..b9472127a63 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for swizzling/shuffling. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_SWIZZLE_H +#define LP_BLD_SWIZZLE_H + + +#include + + +struct lp_type; +struct lp_build_context; + + +LLVMValueRef +lp_build_broadcast(LLVMBuilderRef builder, + LLVMTypeRef vec_type, + LLVMValueRef scalar); + + +LLVMValueRef +lp_build_broadcast_scalar(struct lp_build_context *bld, + LLVMValueRef scalar); + + +/** + * Broadcast one channel of a vector composed of arrays of XYZW structures into + * all four channel. + */ +LLVMValueRef +lp_build_broadcast_aos(struct lp_build_context *bld, + LLVMValueRef a, + unsigned channel); + + +/** + * Swizzle a vector consisting of an array of XYZW structs. + * + * @param swizzle is the in [0,4[ range. + */ +LLVMValueRef +lp_build_swizzle1_aos(struct lp_build_context *bld, + LLVMValueRef a, + const unsigned char swizzle[4]); + + +/** + * Swizzle two vector consisting of an array of XYZW structs. + * + * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b. + */ +LLVMValueRef +lp_build_swizzle2_aos(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + const unsigned char swizzle[4]); + + +#endif /* !LP_BLD_SWIZZLE_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h new file mode 100644 index 00000000000..eddb7a83fa2 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * TGSI to LLVM IR translation. + * + * @author Jose Fonseca + */ + +#ifndef LP_BLD_TGSI_H +#define LP_BLD_TGSI_H + +#include + + +struct tgsi_token; +struct lp_type; +struct lp_build_context; +struct lp_build_mask_context; + + +/** + * Sampler code generation interface. + * + * Although texture sampling is a requirement for TGSI translation, it is + * a very different problem with several different approaches to it. This + * structure establishes an interface for texture sampling code generation, so + * that we can easily use different texture sampling strategies. + */ +struct lp_build_sampler_soa +{ + void + (*destroy)( struct lp_build_sampler_soa *sampler ); + + void + (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); +}; + + +void +lp_build_tgsi_soa(LLVMBuilderRef builder, + const struct tgsi_token *tokens, + struct lp_type type, + struct lp_build_mask_context *mask, + LLVMValueRef consts_ptr, + const LLVMValueRef *pos, + const LLVMValueRef (*inputs)[4], + LLVMValueRef (*outputs)[4], + struct lp_build_sampler_soa *sampler); + + +#endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c new file mode 100644 index 00000000000..85e3b1bdd42 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -0,0 +1,1467 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * TGSI to LLVM IR translation -- SoA. + * + * @author Jose Fonseca + * + * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, + * Brian Paul, and others. + */ + +#include "pipe/p_config.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_flow.h" +#include "lp_bld_tgsi.h" + + +#define LP_MAX_TEMPS 256 +#define LP_MAX_IMMEDIATES 256 + + +#define FOR_EACH_CHANNEL( CHAN )\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + + +struct lp_build_tgsi_soa_context +{ + struct lp_build_context base; + + LLVMValueRef consts_ptr; + const LLVMValueRef *pos; + const LLVMValueRef (*inputs)[NUM_CHANNELS]; + LLVMValueRef (*outputs)[NUM_CHANNELS]; + + struct lp_build_sampler_soa *sampler; + + LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; + LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; + + struct lp_build_mask_context *mask; +}; + + +static const unsigned char +swizzle_left[4] = { + QUAD_TOP_LEFT, QUAD_TOP_LEFT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT +}; + +static const unsigned char +swizzle_right[4] = { + QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, + QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT +}; + +static const unsigned char +swizzle_top[4] = { + QUAD_TOP_LEFT, QUAD_TOP_RIGHT, + QUAD_TOP_LEFT, QUAD_TOP_RIGHT +}; + +static const unsigned char +swizzle_bottom[4] = { + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT +}; + + +static LLVMValueRef +emit_ddx(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); + LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); + return lp_build_sub(&bld->base, src_right, src_left); +} + + +static LLVMValueRef +emit_ddy(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); + LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); + return lp_build_sub(&bld->base, src_top, src_bottom); +} + + +/** + * Register fetch. + */ +static LLVMValueRef +emit_fetch( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + const unsigned chan_index ) +{ + const struct tgsi_full_src_register *reg = &inst->Src[index]; + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); + LLVMValueRef res; + + switch (swizzle) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: + + switch (reg->Register.File) { + case TGSI_FILE_CONSTANT: { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); + LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); + LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + res = lp_build_broadcast_scalar(&bld->base, scalar); + break; + } + + case TGSI_FILE_IMMEDIATE: + res = bld->immediates[reg->Register.Index][swizzle]; + assert(res); + break; + + case TGSI_FILE_INPUT: + res = bld->inputs[reg->Register.Index][swizzle]; + assert(res); + break; + + case TGSI_FILE_TEMPORARY: + res = bld->temps[reg->Register.Index][swizzle]; + if(!res) + return bld->base.undef; + break; + + default: + assert( 0 ); + return bld->base.undef; + } + break; + + default: + assert( 0 ); + return bld->base.undef; + } + + switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { + case TGSI_UTIL_SIGN_CLEAR: + res = lp_build_abs( &bld->base, res ); + break; + + case TGSI_UTIL_SIGN_SET: + /* TODO: Use bitwese OR for floating point */ + res = lp_build_abs( &bld->base, res ); + res = LLVMBuildNeg( bld->base.builder, res, "" ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + res = LLVMBuildNeg( bld->base.builder, res, "" ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + return res; +} + + +/** + * Register fetch with derivatives. + */ +static void +emit_fetch_deriv( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + const unsigned chan_index, + LLVMValueRef *res, + LLVMValueRef *ddx, + LLVMValueRef *ddy) +{ + LLVMValueRef src; + + src = emit_fetch(bld, inst, index, chan_index); + + if(res) + *res = src; + + /* TODO: use interpolation coeffs for inputs */ + + if(ddx) + *ddx = emit_ddx(bld, src); + + if(ddy) + *ddy = emit_ddy(bld, src); +} + + +/** + * Register store. + */ +static void +emit_store( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + unsigned chan_index, + LLVMValueRef value) +{ + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; + + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + value = lp_build_max(&bld->base, value, bld->base.zero); + value = lp_build_min(&bld->base, value, bld->base.one); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0)); + value = lp_build_min(&bld->base, value, bld->base.one); + break; + + default: + assert(0); + } + + switch( reg->Register.File ) { + case TGSI_FILE_OUTPUT: + bld->outputs[reg->Register.Index][chan_index] = value; + break; + + case TGSI_FILE_TEMPORARY: + bld->temps[reg->Register.Index][chan_index] = value; + break; + + case TGSI_FILE_ADDRESS: + /* FIXME */ + assert(0); + break; + + default: + assert( 0 ); + } +} + + +/** + * High-level instruction translators. + */ + + +static void +emit_tex( struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + boolean apply_lodbias, + boolean projected, + LLVMValueRef *texel) +{ + const uint unit = inst->Src[1].Register.Index; + LLVMValueRef lodbias; + LLVMValueRef oow = NULL; + LLVMValueRef coords[3]; + unsigned num_coords; + unsigned i; + + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + num_coords = 1; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + num_coords = 2; + break; + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + num_coords = 3; + break; + default: + assert(0); + return; + } + + if(apply_lodbias) + lodbias = emit_fetch( bld, inst, 0, 3 ); + else + lodbias = bld->base.zero; + + if (projected) { + oow = emit_fetch( bld, inst, 0, 3 ); + oow = lp_build_rcp(&bld->base, oow); + } + + for (i = 0; i < num_coords; i++) { + coords[i] = emit_fetch( bld, inst, 0, i ); + if (projected) + coords[i] = lp_build_mul(&bld->base, coords[i], oow); + } + for (i = num_coords; i < 3; i++) { + coords[i] = bld->base.undef; + } + + bld->sampler->emit_fetch_texel(bld->sampler, + bld->base.builder, + bld->base.type, + unit, num_coords, coords, lodbias, + texel); +} + + +static void +emit_kil( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst ) +{ + const struct tgsi_full_src_register *reg = &inst->Src[0]; + LLVMValueRef terms[NUM_CHANNELS]; + LLVMValueRef mask; + unsigned chan_index; + + memset(&terms, 0, sizeof terms); + + FOR_EACH_CHANNEL( chan_index ) { + unsigned swizzle; + + /* Unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); + + /* Check if the component has not been already tested. */ + assert(swizzle < NUM_CHANNELS); + if( !terms[swizzle] ) + /* TODO: change the comparison operator instead of setting the sign */ + terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); + } + + mask = NULL; + FOR_EACH_CHANNEL( chan_index ) { + if(terms[chan_index]) { + LLVMValueRef chan_mask; + + chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); + + if(mask) + mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); + else + mask = chan_mask; + } + } + + if(mask) + lp_build_mask_update(bld->mask, mask); +} + + +/** + * Check if inst src/dest regs use indirect addressing into temporary + * register file. + */ +static boolean +indirect_temp_reference(const struct tgsi_full_instruction *inst) +{ + uint i; + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *reg = &inst->Src[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) + return TRUE; + } + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *reg = &inst->Dst[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) + return TRUE; + } + return FALSE; +} + + +static int +emit_instruction( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info) +{ + unsigned chan_index; + LLVMValueRef src0, src1, src2; + LLVMValueRef tmp0, tmp1, tmp2; + LLVMValueRef tmp3 = NULL; + LLVMValueRef tmp4 = NULL; + LLVMValueRef tmp5 = NULL; + LLVMValueRef tmp6 = NULL; + LLVMValueRef tmp7 = NULL; + LLVMValueRef res; + LLVMValueRef dst0[NUM_CHANNELS]; + + /* we can't handle indirect addressing into temp register file yet */ + if (indirect_temp_reference(inst)) + return FALSE; + + assert(info->num_dst <= 1); + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.undef; + } + } + + switch (inst->Instruction.Opcode) { +#if 0 + case TGSI_OPCODE_ARL: + /* FIXME */ + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + emit_flr(bld, 0, 0); + emit_f2it( bld, 0 ); + dst0[chan_index] = tmp0; + } + break; +#endif + + case TGSI_OPCODE_MOV: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { + dst0[CHAN_X] = bld->base.one; + } + if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { + src0 = emit_fetch( bld, inst, 0, CHAN_X ); + dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); + } + if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { + /* XMM[1] = SrcReg[0].yyyy */ + tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); + /* XMM[1] = max(XMM[1], 0) */ + tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); + /* XMM[2] = SrcReg[0].wwww */ + tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); + tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); + dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); + } + if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { + dst0[CHAN_W] = bld->base.one; + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + src0 = emit_fetch( bld, inst, 0, CHAN_X ); + res = lp_build_rcp(&bld->base, src0); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = res; + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + src0 = emit_fetch( bld, inst, 0, CHAN_X ); + src0 = lp_build_abs(&bld->base, src0); + res = lp_build_rsqrt(&bld->base, src0); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = res; + } + break; + + case TGSI_OPCODE_EXP: + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { + LLVMValueRef *p_exp2_int_part = NULL; + LLVMValueRef *p_frac_part = NULL; + LLVMValueRef *p_exp2 = NULL; + + src0 = emit_fetch( bld, inst, 0, CHAN_X ); + + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) + p_exp2_int_part = &tmp0; + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) + p_frac_part = &tmp1; + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) + p_exp2 = &tmp2; + + lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); + + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) + dst0[CHAN_X] = tmp0; + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) + dst0[CHAN_Y] = tmp1; + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) + dst0[CHAN_Z] = tmp2; + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { + dst0[CHAN_W] = bld->base.one; + } + break; + + case TGSI_OPCODE_LOG: + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { + LLVMValueRef *p_floor_log2 = NULL; + LLVMValueRef *p_exp = NULL; + LLVMValueRef *p_log2 = NULL; + + src0 = emit_fetch( bld, inst, 0, CHAN_X ); + src0 = lp_build_abs( &bld->base, src0 ); + + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) + p_floor_log2 = &tmp0; + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) + p_exp = &tmp1; + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) + p_log2 = &tmp2; + + lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); + + /* dst.x = floor(lg2(abs(src.x))) */ + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) + dst0[CHAN_X] = tmp0; + /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { + dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); + } + /* dst.z = lg2(abs(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) + dst0[CHAN_Z] = tmp2; + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { + dst0[CHAN_W] = bld->base.one; + } + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + dst0[chan_index] = lp_build_add(&bld->base, src0, src1); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); + tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); + tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = tmp0; + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); + tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); + tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); + tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = tmp0; + } + break; + + case TGSI_OPCODE_DST: + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { + dst0[CHAN_X] = bld->base.one; + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { + tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); + tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { + dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { + dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + tmp1 = emit_fetch( bld, inst, 1, chan_index ); + tmp2 = emit_fetch( bld, inst, 2, chan_index ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + tmp0 = lp_build_add( &bld->base, tmp0, tmp2); + dst0[chan_index] = tmp0; + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + tmp1 = emit_fetch( bld, inst, 1, chan_index ); + dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); + } + break; + + case TGSI_OPCODE_LRP: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp0 = lp_build_sub( &bld->base, src1, src2 ); + tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); + dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); + } + break; + + case TGSI_OPCODE_CND: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp1 = lp_build_const_scalar(bld->base.type, 0.5); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); + } + break; + + case TGSI_OPCODE_DP2A: + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ + tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ + tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ + tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ + tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ + } + break; + + case TGSI_OPCODE_FRC: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + tmp0 = lp_build_floor(&bld->base, src0); + tmp0 = lp_build_sub(&bld->base, src0, tmp0); + dst0[chan_index] = tmp0; + } + break; + + case TGSI_OPCODE_CLAMP: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp0 = lp_build_max(&bld->base, tmp0, src1); + tmp0 = lp_build_min(&bld->base, tmp0, src2); + dst0[chan_index] = tmp0; + } + break; + + case TGSI_OPCODE_FLR: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_floor(&bld->base, tmp0); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_round(&bld->base, tmp0); + } + break; + + case TGSI_OPCODE_EX2: { + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + tmp0 = lp_build_exp2( &bld->base, tmp0); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = tmp0; + } + break; + } + + case TGSI_OPCODE_LG2: + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + tmp0 = lp_build_log2( &bld->base, tmp0); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = tmp0; + } + break; + + case TGSI_OPCODE_POW: + src0 = emit_fetch( bld, inst, 0, CHAN_X ); + src1 = emit_fetch( bld, inst, 1, CHAN_X ); + res = lp_build_pow( &bld->base, src0, src1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = res; + } + break; + + case TGSI_OPCODE_XPD: + if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { + tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); + tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); + } + if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { + tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); + tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { + tmp2 = tmp0; + tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); + tmp5 = tmp3; + tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); + tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); + dst0[CHAN_X] = tmp2; + } + if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { + tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); + tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { + tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); + tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); + dst0[CHAN_Y] = tmp3; + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { + tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); + tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); + dst0[CHAN_Z] = tmp5; + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { + dst0[CHAN_W] = bld->base.one; + } + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); + } + break; + + case TGSI_OPCODE_RCC: + /* deprecated? */ + assert(0); + return 0; + + case TGSI_OPCODE_DPH: + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); + tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); + tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = tmp0; + } + break; + + case TGSI_OPCODE_COS: + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + tmp0 = lp_build_cos( &bld->base, tmp0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = tmp0; + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); + } + break; + + case TGSI_OPCODE_KILP: + /* predicated kill */ + /* FIXME */ + return 0; + break; + + case TGSI_OPCODE_KIL: + /* conditional kill */ + emit_kil( bld, inst ); + break; + + case TGSI_OPCODE_PK2H: + return 0; + break; + + case TGSI_OPCODE_PK2US: + return 0; + break; + + case TGSI_OPCODE_PK4B: + return 0; + break; + + case TGSI_OPCODE_PK4UB: + return 0; + break; + + case TGSI_OPCODE_RFL: + return 0; + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); + } + break; + + case TGSI_OPCODE_SFL: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.zero; + } + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); + } + break; + + case TGSI_OPCODE_SIN: + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + tmp0 = lp_build_sin( &bld->base, tmp0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = tmp0; + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); + } + break; + + case TGSI_OPCODE_STR: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.one; + } + break; + + case TGSI_OPCODE_TEX: + emit_tex( bld, inst, FALSE, FALSE, dst0 ); + break; + + case TGSI_OPCODE_TXD: + /* FIXME */ + return 0; + break; + + case TGSI_OPCODE_UP2H: + /* deprecated */ + assert (0); + return 0; + break; + + case TGSI_OPCODE_UP2US: + /* deprecated */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_UP4B: + /* deprecated */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_UP4UB: + /* deprecated */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_X2D: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_ARA: + /* deprecated */ + assert(0); + return 0; + break; + +#if 0 + case TGSI_OPCODE_ARR: + /* FIXME */ + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + emit_rnd( bld, 0, 0 ); + emit_f2it( bld, 0 ); + dst0[chan_index] = tmp0; + } + break; +#endif + + case TGSI_OPCODE_BRA: + /* deprecated */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_CAL: + /* FIXME */ + return 0; + break; + + case TGSI_OPCODE_RET: + /* FIXME */ + return 0; + break; + + case TGSI_OPCODE_END: + break; + + case TGSI_OPCODE_SSG: + /* TGSI_OPCODE_SGN */ + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); + } + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); + } + break; + + case TGSI_OPCODE_SCS: + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { + dst0[CHAN_Z] = bld->base.zero; + } + IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { + dst0[CHAN_W] = bld->base.one; + } + break; + + case TGSI_OPCODE_TXB: + emit_tex( bld, inst, TRUE, FALSE, dst0 ); + break; + + case TGSI_OPCODE_NRM: + /* fall-through */ + case TGSI_OPCODE_NRM4: + /* 3 or 4-component normalization */ + { + uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; + + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || + IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || + (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { + + /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ + + /* xmm4 = src.x */ + /* xmm0 = src.x * src.x */ + tmp0 = emit_fetch(bld, inst, 0, CHAN_X); + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { + tmp4 = tmp0; + } + tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); + + /* xmm5 = src.y */ + /* xmm0 = xmm0 + src.y * src.y */ + tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { + tmp5 = tmp1; + } + tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + + /* xmm6 = src.z */ + /* xmm0 = xmm0 + src.z * src.z */ + tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { + tmp6 = tmp1; + } + tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + + if (dims == 4) { + /* xmm7 = src.w */ + /* xmm0 = xmm0 + src.w * src.w */ + tmp1 = emit_fetch(bld, inst, 0, CHAN_W); + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { + tmp7 = tmp1; + } + tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + } + + /* xmm1 = 1 / sqrt(xmm0) */ + tmp1 = lp_build_rsqrt( &bld->base, tmp0); + + /* dst.x = xmm1 * src.x */ + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { + dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); + } + + /* dst.y = xmm1 * src.y */ + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); + } + + /* dst.z = xmm1 * src.z */ + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { + dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); + } + + /* dst.w = xmm1 * src.w */ + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { + dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); + } + } + + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { + dst0[CHAN_W] = bld->base.one; + } + } + break; + + case TGSI_OPCODE_DIV: + /* deprecated */ + assert( 0 ); + return 0; + break; + + case TGSI_OPCODE_DP2: + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ + tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ + tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ + tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ + } + break; + + case TGSI_OPCODE_TXL: + emit_tex( bld, inst, TRUE, FALSE, dst0 ); + break; + + case TGSI_OPCODE_TXP: + emit_tex( bld, inst, FALSE, TRUE, dst0 ); + break; + + case TGSI_OPCODE_BRK: + /* FIXME */ + return 0; + break; + + case TGSI_OPCODE_IF: + /* FIXME */ + return 0; + break; + + case TGSI_OPCODE_BGNFOR: + /* deprecated */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_REP: + /* deprecated */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_ELSE: + /* FIXME */ + return 0; + break; + + case TGSI_OPCODE_ENDIF: + /* FIXME */ + return 0; + break; + + case TGSI_OPCODE_ENDFOR: + /* deprecated */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_ENDREP: + /* deprecated */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_PUSHA: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_POPA: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); + } + break; + + case TGSI_OPCODE_I2F: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_NOT: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); + } + break; + + case TGSI_OPCODE_SHL: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_ISHR: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_AND: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_OR: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_MOD: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_XOR: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_SAD: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_TXF: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_TXQ: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_CONT: + /* deprecated? */ + assert(0); + return 0; + break; + + case TGSI_OPCODE_EMIT: + return 0; + break; + + case TGSI_OPCODE_ENDPRIM: + return 0; + break; + + case TGSI_OPCODE_NOP: + break; + + default: + return 0; + } + + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_store( bld, inst, 0, chan_index, dst0[chan_index]); + } + } + + return 1; +} + + +void +lp_build_tgsi_soa(LLVMBuilderRef builder, + const struct tgsi_token *tokens, + struct lp_type type, + struct lp_build_mask_context *mask, + LLVMValueRef consts_ptr, + const LLVMValueRef *pos, + const LLVMValueRef (*inputs)[NUM_CHANNELS], + LLVMValueRef (*outputs)[NUM_CHANNELS], + struct lp_build_sampler_soa *sampler) +{ + struct lp_build_tgsi_soa_context bld; + struct tgsi_parse_context parse; + uint num_immediates = 0; + unsigned i; + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + bld.mask = mask; + bld.pos = pos; + bld.inputs = inputs; + bld.outputs = outputs; + bld.consts_ptr = consts_ptr; + bld.sampler = sampler; + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* Inputs already interpolated */ + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; + const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); + if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + info ? info->mnemonic : ""); + } + + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + assert(size <= 4); + assert(num_immediates < LP_MAX_IMMEDIATES); + for( i = 0; i < size; ++i ) + bld.immediates[num_immediates][i] = + lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float); + for( i = size; i < 4; ++i ) + bld.immediates[num_immediates][i] = bld.base.undef; + num_immediates++; + } + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free( &parse ); +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c new file mode 100644 index 00000000000..8270cd057f6 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_debug.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" + + +LLVMTypeRef +lp_build_elem_type(struct lp_type type) +{ + if (type.floating) { + switch(type.width) { + case 32: + return LLVMFloatType(); + break; + case 64: + return LLVMDoubleType(); + break; + default: + assert(0); + return LLVMFloatType(); + } + } + else { + return LLVMIntType(type.width); + } +} + + +LLVMTypeRef +lp_build_vec_type(struct lp_type type) +{ + LLVMTypeRef elem_type = lp_build_elem_type(type); + return LLVMVectorType(elem_type, type.length); +} + + +/** + * This function is a mirror of lp_build_elem_type() above. + * + * XXX: I'm not sure if it wouldn't be easier/efficient to just recreate the + * type and check for identity. + */ +boolean +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type) +{ + LLVMTypeKind elem_kind; + + assert(elem_type); + if(!elem_type) + return FALSE; + + elem_kind = LLVMGetTypeKind(elem_type); + + if (type.floating) { + switch(type.width) { + case 32: + if(elem_kind != LLVMFloatTypeKind) + return FALSE; + break; + case 64: + if(elem_kind != LLVMDoubleTypeKind) + return FALSE; + break; + default: + assert(0); + return FALSE; + } + } + else { + if(elem_kind != LLVMIntegerTypeKind) + return FALSE; + + if(LLVMGetIntTypeWidth(elem_type) != type.width) + return FALSE; + } + + return TRUE; +} + + +boolean +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) +{ + LLVMTypeRef elem_type; + + assert(vec_type); + if(!vec_type) + return FALSE; + + if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) + return FALSE; + + if(LLVMGetVectorSize(vec_type) != type.length) + return FALSE; + + elem_type = LLVMGetElementType(vec_type); + + return lp_check_elem_type(type, elem_type); +} + + +boolean +lp_check_value(struct lp_type type, LLVMValueRef val) +{ + LLVMTypeRef vec_type; + + assert(val); + if(!val) + return FALSE; + + vec_type = LLVMTypeOf(val); + + return lp_check_vec_type(type, vec_type); +} + + +LLVMTypeRef +lp_build_int_elem_type(struct lp_type type) +{ + return LLVMIntType(type.width); +} + + +LLVMTypeRef +lp_build_int_vec_type(struct lp_type type) +{ + LLVMTypeRef elem_type = lp_build_int_elem_type(type); + return LLVMVectorType(elem_type, type.length); +} + + +/** + * Build int32[4] vector type + */ +LLVMTypeRef +lp_build_int32_vec4_type(void) +{ + struct lp_type t; + LLVMTypeRef type; + + memset(&t, 0, sizeof(t)); + t.floating = FALSE; /* floating point values */ + t.sign = TRUE; /* values are signed */ + t.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + t.width = 32; /* 32-bit int */ + t.length = 4; /* 4 elements per vector */ + + type = lp_build_int_elem_type(t); + return LLVMVectorType(type, t.length); +} + + +struct lp_type +lp_int_type(struct lp_type type) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = type.width; + res_type.length = type.length; + + return res_type; +} + + +/** + * Return the type with twice the bit width (hence half the number of elements). + */ +struct lp_type +lp_wider_type(struct lp_type type) +{ + struct lp_type res_type; + + memcpy(&res_type, &type, sizeof res_type); + res_type.width *= 2; + res_type.length /= 2; + + assert(res_type.length); + + return res_type; +} + + +void +lp_build_context_init(struct lp_build_context *bld, + LLVMBuilderRef builder, + struct lp_type type) +{ + bld->builder = builder; + bld->type = type; + bld->undef = lp_build_undef(type); + bld->zero = lp_build_zero(type); + bld->one = lp_build_one(type); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h new file mode 100644 index 00000000000..62ee05be4df --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -0,0 +1,273 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Convenient representation of SIMD types. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_TYPE_H +#define LP_BLD_TYPE_H + + +#include + +#include + + +/** + * Native SIMD register width. + * + * 128 for all architectures we care about. + */ +#define LP_NATIVE_VECTOR_WIDTH 128 + +/** + * Several functions can only cope with vectors of length up to this value. + * You may need to increase that value if you want to represent bigger vectors. + */ +#define LP_MAX_VECTOR_LENGTH 16 + + +/** + * The LLVM type system can't conveniently express all the things we care about + * on the types used for intermediate computations, such as signed vs unsigned, + * normalized values, or fixed point. + */ +struct lp_type { + /** + * Floating-point. Cannot be used with fixed. Integer numbers are + * represented by this zero. + */ + unsigned floating:1; + + /** + * Fixed-point. Cannot be used with floating. Integer numbers are + * represented by this zero. + */ + unsigned fixed:1; + + /** + * Whether it can represent negative values or not. + * + * If this is not set for floating point, it means that all values are + * assumed to be positive. + */ + unsigned sign:1; + + /** + * Whether values are normalized to fit [0, 1] interval, or [-1, 1] + * interval for signed types. + * + * For integer types it means the representable integer range should be + * interpreted as the interval above. + * + * For floating and fixed point formats it means the values should be + * clamped to the interval above. + */ + unsigned norm:1; + + /** + * Element width. + * + * For fixed point values, the fixed point is assumed to be at half the + * width. + */ + unsigned width:14; + + /** + * Vector length. + * + * width*length should be a power of two greater or equal to eight. + * + * @sa LP_MAX_VECTOR_LENGTH + */ + unsigned length:14; +}; + + +/** + * We need most of the information here in order to correctly and efficiently + * translate an arithmetic operation into LLVM IR. Putting it here avoids the + * trouble of passing it as parameters. + */ +struct lp_build_context +{ + LLVMBuilderRef builder; + + /** + * This not only describes the input/output LLVM types, but also whether + * to normalize/clamp the results. + */ + struct lp_type type; + + /** Same as lp_build_undef(type) */ + LLVMValueRef undef; + + /** Same as lp_build_zero(type) */ + LLVMValueRef zero; + + /** Same as lp_build_one(type) */ + LLVMValueRef one; +}; + + +static INLINE struct lp_type +lp_type_float(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.floating = TRUE; + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_int(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_uint(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_unorm(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.norm = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_fixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_ufixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +LLVMTypeRef +lp_build_elem_type(struct lp_type type); + + +LLVMTypeRef +lp_build_vec_type(struct lp_type type); + + +boolean +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type); + + +boolean +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type); + + +boolean +lp_check_value(struct lp_type type, LLVMValueRef val); + + +LLVMTypeRef +lp_build_int_elem_type(struct lp_type type); + + +LLVMTypeRef +lp_build_int_vec_type(struct lp_type type); + + +LLVMTypeRef +lp_build_int32_vec4_type(void); + + +struct lp_type +lp_int_type(struct lp_type type); + + +struct lp_type +lp_wider_type(struct lp_type type); + + +void +lp_build_context_init(struct lp_build_context *bld, + LLVMBuilderRef builder, + struct lp_type type); + + +#endif /* !LP_BLD_TYPE_H */ diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c deleted file mode 100644 index cb85e1734ec..00000000000 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ /dev/null @@ -1,210 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * This file is compiled with clang into the LLVM bitcode - * - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -typedef __attribute__(( ext_vector_type(4) )) float float4; - - -extern float fabsf(float val); - -/* helpers */ - -float4 absvec(float4 vec) -{ - float4 res; - res.x = fabsf(vec.x); - res.y = fabsf(vec.y); - res.z = fabsf(vec.z); - res.w = fabsf(vec.w); - - return res; -} - -float4 maxvec(float4 a, float4 b) -{ - return (float4){(a.x > b.x) ? a.x : b.x, - (a.y > b.y) ? a.y : b.y, - (a.z > b.z) ? a.z : b.z, - (a.w > b.w) ? a.w : b.w}; -} - -float4 minvec(float4 a, float4 b) -{ - return (float4){(a.x < b.x) ? a.x : b.x, - (a.y < b.y) ? a.y : b.y, - (a.z < b.z) ? a.z : b.z, - (a.w < b.w) ? a.w : b.w}; -} - -extern float powf(float num, float p); -extern float sqrtf(float x); - -float4 powvec(float4 vec, float4 q) -{ - float4 p; - p.x = powf(vec.x, q.x); - p.y = powf(vec.y, q.y); - p.z = powf(vec.z, q.z); - p.w = powf(vec.w, q.w); - return p; -} - -float4 sqrtvec(float4 vec) -{ - float4 p; - p.x = sqrtf(vec.x); - p.y = sqrtf(vec.y); - p.z = sqrtf(vec.z); - p.w = sqrtf(vec.w); - return p; -} - -float4 sltvec(float4 v1, float4 v2) -{ - float4 p; - p.x = (v1.x < v2.x) ? 1.0 : 0.0; - p.y = (v1.y < v2.y) ? 1.0 : 0.0; - p.z = (v1.z < v2.z) ? 1.0 : 0.0; - p.w = (v1.w < v2.w) ? 1.0 : 0.0; - return p; -} - - -/* instructions */ - -void abs(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) -{ - res[0] = absvec(tmp0x); - res[1] = absvec(tmp0y); - res[2] = absvec(tmp0z); - res[3] = absvec(tmp0w); -} - -void dp3(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + - (tmp0z * tmp1z); - - res[0] = dot; - res[1] = dot; - res[2] = dot; - res[3] = dot; -} - -void dp4(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + - (tmp0z * tmp1z) + (tmp0w * tmp1w); - - res[0] = dot; - res[1] = dot; - res[2] = dot; - res[3] = dot; -} - -void lit(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) -{ - const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; - const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; - const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; - - res[0] = (float4){1.0, 1.0, 1.0, 1.0}; - if (tmp0x.x > 0) { - float4 tmpy = maxvec(tmp0y, zerovec); - float4 tmpw = minvec(tmp0w, plus128); - tmpw = maxvec(tmpw, min128); - res[1] = tmp0x; - res[2] = powvec(tmpy, tmpw); - } else { - res[1] = zerovec; - res[2] = zerovec; - } - res[3] = (float4){1.0, 1.0, 1.0, 1.0}; -} - -void min(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - res[0] = minvec(tmp0x, tmp1x); - res[1] = minvec(tmp0y, tmp1y); - res[2] = minvec(tmp0z, tmp1z); - res[3] = minvec(tmp0w, tmp1w); -} - - -void max(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - res[0] = maxvec(tmp0x, tmp1x); - res[1] = maxvec(tmp0y, tmp1y); - res[2] = maxvec(tmp0z, tmp1z); - res[3] = maxvec(tmp0w, tmp1w); -} - -void pow(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - res[0] = powvec(tmp0x, tmp1x); - res[1] = res[0]; - res[2] = res[0]; - res[3] = res[0]; -} - -void rsq(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) -{ - const float4 onevec = (float4) {1., 1., 1., 1.}; - res[0] = onevec/sqrtvec(absvec(tmp0x)); - res[1] = onevec/sqrtvec(absvec(tmp0y)); - res[2] = onevec/sqrtvec(absvec(tmp0z)); - res[3] = onevec/sqrtvec(absvec(tmp0w)); -} - -void slt(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - res[0] = sltvec(tmp0x, tmp1x); - res[1] = sltvec(tmp0y, tmp1y); - res[2] = sltvec(tmp0z, tmp1z); - res[3] = sltvec(tmp0w, tmp1w); -} - diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp deleted file mode 100644 index 73df24c9769..00000000000 --- a/src/gallium/auxiliary/gallivm/storage.cpp +++ /dev/null @@ -1,364 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -#ifdef MESA_LLVM - -#include "storage.h" - -#include "gallivm_p.h" - -#include "pipe/p_shader_tokens.h" -#include -#include -#include - -#include -#include -#include -#include -#include - -using namespace llvm; - -Storage::Storage(llvm::BasicBlock *block, llvm::Value *input) - : m_block(block), - m_INPUT(input), - m_addrs(32), - m_idx(0) -{ - m_floatVecType = VectorType::get(Type::FloatTy, 4); - m_intVecType = VectorType::get(IntegerType::get(32), 4); - - m_undefFloatVec = UndefValue::get(m_floatVecType); - m_undefIntVec = UndefValue::get(m_intVecType); - m_extSwizzleVec = 0; - - m_numConsts = 0; -} - -//can only build vectors with all members in the [0, 9] range -llvm::Constant *Storage::shuffleMask(int vec) -{ - if (!m_extSwizzleVec) { - std::vector elems; - elems.push_back(ConstantFP::get(APFloat(0.f))); - elems.push_back(ConstantFP::get(APFloat(1.f))); - elems.push_back(ConstantFP::get(APFloat(0.f))); - elems.push_back(ConstantFP::get(APFloat(1.f))); - m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems); - } - - if (m_intVecs.find(vec) != m_intVecs.end()) { - return m_intVecs[vec]; - } - int origVec = vec; - Constant* const_vec = 0; - if (origVec == 0) { - const_vec = Constant::getNullValue(m_intVecType); - } else { - int x = gallivm_x_swizzle(vec); - int y = gallivm_y_swizzle(vec); - int z = gallivm_z_swizzle(vec); - int w = gallivm_w_swizzle(vec); - std::vector elems; - elems.push_back(constantInt(x)); - elems.push_back(constantInt(y)); - elems.push_back(constantInt(z)); - elems.push_back(constantInt(w)); - const_vec = ConstantVector::get(m_intVecType, elems); - } - - m_intVecs[origVec] = const_vec; - return const_vec; -} - -llvm::ConstantInt *Storage::constantInt(int idx) -{ - if (m_constInts.find(idx) != m_constInts.end()) { - return m_constInts[idx]; - } - ConstantInt *const_int = ConstantInt::get(APInt(32, idx)); - m_constInts[idx] = const_int; - return const_int; -} - -llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx) -{ - Value *val = element(InputsArg, idx, indIdx); - LoadInst *load = new LoadInst(val, name("input"), false, m_block); - load->setAlignment(8); - - return load; -} - -llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx) -{ - m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts; - - Value *elem = element(ConstsArg, idx, indIdx); - LoadInst *load = new LoadInst(elem, name("const"), false, m_block); - load->setAlignment(8); - return load; -} - -llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle) -{ - Constant *mask = shuffleMask(shuffle); - ShuffleVectorInst *res = - new ShuffleVectorInst(vec, m_extSwizzleVec, mask, - name("shuffle"), m_block); - return res; -} - - -llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx) -{ - Value *elem = element(TempsArg, idx, indIdx); - - LoadInst *load = new LoadInst(elem, name("temp"), false, m_block); - load->setAlignment(8); - - return load; -} - -void Storage::setTempElement(int idx, llvm::Value *val, int mask) -{ - if (mask != TGSI_WRITEMASK_XYZW) { - llvm::Value *templ = 0; - if (m_tempWriteMap[idx]) - templ = tempElement(idx); - val = maskWrite(val, mask, templ); - } - Value *elem = element(TempsArg, idx); - StoreInst *st = new StoreInst(val, elem, false, m_block); - st->setAlignment(8); - m_tempWriteMap[idx] = true; -} - -void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask) -{ - if (mask != TGSI_WRITEMASK_XYZW) { - llvm::Value *templ = 0; - if (m_destWriteMap[dstIdx]) - templ = outputElement(dstIdx); - val = maskWrite(val, mask, templ); - } - - Value *elem = element(DestsArg, dstIdx); - StoreInst *st = new StoreInst(val, elem, false, m_block); - st->setAlignment(8); - m_destWriteMap[dstIdx] = true; -} - -llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ) -{ - llvm::Value *dst = templ; - if (!dst) - dst = Constant::getNullValue(m_floatVecType); - if ((mask & TGSI_WRITEMASK_X)) { - llvm::Value *x = new ExtractElementInst(src, unsigned(0), - name("x"), m_block); - dst = InsertElementInst::Create(dst, x, unsigned(0), - name("dstx"), m_block); - } - if ((mask & TGSI_WRITEMASK_Y)) { - llvm::Value *y = new ExtractElementInst(src, unsigned(1), - name("y"), m_block); - dst = InsertElementInst::Create(dst, y, unsigned(1), - name("dsty"), m_block); - } - if ((mask & TGSI_WRITEMASK_Z)) { - llvm::Value *z = new ExtractElementInst(src, unsigned(2), - name("z"), m_block); - dst = InsertElementInst::Create(dst, z, unsigned(2), - name("dstz"), m_block); - } - if ((mask & TGSI_WRITEMASK_W)) { - llvm::Value *w = new ExtractElementInst(src, unsigned(3), - name("w"), m_block); - dst = InsertElementInst::Create(dst, w, unsigned(3), - name("dstw"), m_block); - } - return dst; -} - -const char * Storage::name(const char *prefix) -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -int Storage::numConsts() const -{ - return m_numConsts; -} - -llvm::Value * Storage::addrElement(int idx) const -{ - Value *ret = m_addrs[idx]; - if (!ret) - return m_undefFloatVec; - return ret; -} - -void Storage::setAddrElement(int idx, llvm::Value *val, int mask) -{ - if (mask != TGSI_WRITEMASK_XYZW) { - llvm::Value *templ = m_addrs[idx]; - val = maskWrite(val, mask, templ); - } - m_addrs[idx] = val; -} - -llvm::Value * Storage::extractIndex(llvm::Value *vec) -{ - llvm::Value *x = new ExtractElementInst(vec, unsigned(0), - name("x"), m_block); - return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block); -} - -void Storage::setCurrentBlock(llvm::BasicBlock *block) -{ - m_block = block; -} - -llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx) -{ - Value *elem = element(DestsArg, idx, indIdx); - LoadInst *load = new LoadInst(elem, name("output"), false, m_block); - load->setAlignment(8); - - return load; -} - -llvm::Value * Storage::inputPtr() const -{ - return m_INPUT; -} - -void Storage::pushArguments(llvm::Value *input) -{ - m_argStack.push(m_INPUT); - - m_INPUT = input; -} - -void Storage::popArguments() -{ - m_INPUT = m_argStack.top(); - m_argStack.pop(); -} - -void Storage::pushTemps() -{ - m_extSwizzleVec = 0; -} - -void Storage::popTemps() -{ -} - -llvm::Value * Storage::immediateElement(int idx) -{ - return m_immediates[idx]; -} - -void Storage::addImmediate(float *val) -{ - std::vector vec(4); - vec[0] = ConstantFP::get(APFloat(val[0])); - vec[1] = ConstantFP::get(APFloat(val[1])); - vec[2] = ConstantFP::get(APFloat(val[2])); - vec[3] = ConstantFP::get(APFloat(val[3])); - m_immediates.push_back(ConstantVector::get(m_floatVecType, vec)); -} - - -llvm::Value * Storage::elemPtr(Args arg) -{ - std::vector indices; - indices.push_back(constantInt(0)); - indices.push_back(constantInt(static_cast(arg))); - GetElementPtrInst *getElem = GetElementPtrInst::Create(m_INPUT, - indices.begin(), - indices.end(), - name("input_ptr"), - m_block); - return new LoadInst(getElem, name("input_field"), false, m_block); -} - -llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, - llvm::Value *indIdx ) -{ - GetElementPtrInst *getElem = 0; - - if (indIdx) { - getElem = GetElementPtrInst::Create(ptr, - BinaryOperator::Create(Instruction::Add, - indIdx, - constantInt(idx), - name("add"), - m_block), - name("field"), - m_block); - } else { - getElem = GetElementPtrInst::Create(ptr, - constantInt(idx), - name("field"), - m_block); - } - return getElem; -} - -llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx ) -{ - Value *val = elemPtr(arg); - return elemIdx(val, idx, indIdx); -} - -void Storage::setKilElement(llvm::Value *val) -{ - std::vector indices; - indices.push_back(constantInt(0)); - indices.push_back(constantInt(static_cast(KilArg))); - GetElementPtrInst *elem = GetElementPtrInst::Create(m_INPUT, - indices.begin(), - indices.end(), - name("kil_ptr"), - m_block); - StoreInst *st = new StoreInst(val, elem, false, m_block); - st->setAlignment(8); -} - -#endif //MESA_LLVM - - diff --git a/src/gallium/auxiliary/gallivm/storage.h b/src/gallium/auxiliary/gallivm/storage.h deleted file mode 100644 index 8574f7554e3..00000000000 --- a/src/gallium/auxiliary/gallivm/storage.h +++ /dev/null @@ -1,133 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ - -#ifndef STORAGE_H -#define STORAGE_H - -#include -#include -#include -#include - -namespace llvm { - class BasicBlock; - class Constant; - class ConstantInt; - class LoadInst; - class Value; - class VectorType; -} - -class Storage -{ -public: - Storage(llvm::BasicBlock *block, - llvm::Value *input); - - llvm::Value *inputPtr() const; - - void setCurrentBlock(llvm::BasicBlock *block); - - llvm::ConstantInt *constantInt(int); - llvm::Constant *shuffleMask(int vec); - llvm::Value *inputElement(int idx, llvm::Value *indIdx =0); - llvm::Value *constElement(int idx, llvm::Value *indIdx =0); - llvm::Value *outputElement(int idx, llvm::Value *indIdx =0); - llvm::Value *tempElement(int idx, llvm::Value *indIdx =0); - llvm::Value *immediateElement(int idx); - - void setOutputElement(int dstIdx, llvm::Value *val, int mask); - void setTempElement(int idx, llvm::Value *val, int mask); - - llvm::Value *addrElement(int idx) const; - void setAddrElement(int idx, llvm::Value *val, int mask); - - void setKilElement(llvm::Value *val); - - llvm::Value *shuffleVector(llvm::Value *vec, int shuffle); - - llvm::Value *extractIndex(llvm::Value *vec); - - int numConsts() const; - - void pushArguments(llvm::Value *input); - void popArguments(); - void pushTemps(); - void popTemps(); - - void addImmediate(float *val); - -private: - llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ); - const char *name(const char *prefix); - - enum Args { - DestsArg = 0, - InputsArg = 1, - TempsArg = 2, - ConstsArg = 3, - KilArg = 4 - }; - llvm::Value *elemPtr(Args arg); - llvm::Value *elemIdx(llvm::Value *ptr, int idx, - llvm::Value *indIdx = 0); - llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0); - -private: - llvm::BasicBlock *m_block; - llvm::Value *m_INPUT; - - std::map m_constInts; - std::map m_intVecs; - std::vector m_addrs; - std::vector m_immediates; - - llvm::VectorType *m_floatVecType; - llvm::VectorType *m_intVecType; - - char m_name[32]; - int m_idx; - - int m_numConsts; - - std::map m_destWriteMap; - std::map m_tempWriteMap; - - llvm::Value *m_undefFloatVec; - llvm::Value *m_undefIntVec; - llvm::Value *m_extSwizzleVec; - - std::stack m_argStack; - std::stack > m_tempStack; -}; - -#endif diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp deleted file mode 100644 index 4984ce985c6..00000000000 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ /dev/null @@ -1,438 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "storagesoa.h" - -#include "gallivm_p.h" - -#include "pipe/p_shader_tokens.h" -#include "util/u_debug.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -using namespace llvm; - - -StorageSoa::StorageSoa(llvm::BasicBlock *block, - llvm::Value *input, - llvm::Value *output, - llvm::Value *consts) - : m_block(block), - m_input(input), - m_output(output), - m_consts(consts), - m_immediates(0), - m_idx(0) -{ -} - -void StorageSoa::addImmediate(float *vec) -{ - std::vector vals(4); - vals[0] = vec[0]; - vals[1] = vec[1]; - vals[2] = vec[2]; - vals[3] = vec[3]; - m_immediatesToFlush.push_back(vals); -} - -void StorageSoa::declareImmediates() -{ - if (m_immediatesToFlush.empty()) - return; - - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - ArrayType *vectorChannels = ArrayType::get(vectorType, 4); - ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size()); - - m_immediates = new GlobalVariable( - /*Type=*/arrayType, - /*isConstant=*/false, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name("immediates"), - currentModule()); - - std::vector arrayVals; - for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) { - std::vector vec = m_immediatesToFlush[i]; - std::vector vals(4); - std::vector channelArray; - - vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3]; - llvm::Constant *xChannel = createConstGlobalVector(vals); - - vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; - llvm::Constant *yChannel = createConstGlobalVector(vals); - - vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2]; - llvm::Constant *zChannel = createConstGlobalVector(vals); - - vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3]; - llvm::Constant *wChannel = createConstGlobalVector(vals); - channelArray.push_back(xChannel); - channelArray.push_back(yChannel); - channelArray.push_back(zChannel); - channelArray.push_back(wChannel); - Constant *constChannels = ConstantArray::get(vectorChannels, - channelArray); - arrayVals.push_back(constChannels); - } - Constant *constArray = ConstantArray::get(arrayType, arrayVals); - m_immediates->setInitializer(constArray); - - m_immediatesToFlush.clear(); -} - -llvm::Value *StorageSoa::addrElement(int idx) const -{ - std::map::const_iterator itr = m_addresses.find(idx); - if (itr == m_addresses.end()) { - debug_printf("Trying to access invalid shader 'address'\n"); - return 0; - } - llvm::Value * res = (*itr).second; - - res = new LoadInst(res, name("addr"), false, m_block); - - return res; -} - -std::vector StorageSoa::inputElement(llvm::Value *idx) -{ - std::vector res(4); - - res[0] = element(m_input, idx, 0); - res[1] = element(m_input, idx, 1); - res[2] = element(m_input, idx, 2); - res[3] = element(m_input, idx, 3); - - return res; -} - -llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc) -{ - std::vector x(4); - x[0] = m_builder->CreateExtractElement(vector, - constantInt(cc), - name("x")); - - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - Constant *constVector = Constant::getNullValue(vectorType); - Value *res = m_builder->CreateInsertElement(constVector, x[0], - constantInt(0), - name("vecx")); - res = m_builder->CreateInsertElement(res, x[0], constantInt(1), - name("vecxx")); - res = m_builder->CreateInsertElement(res, x[0], constantInt(2), - name("vecxxx")); - res = m_builder->CreateInsertElement(res, x[0], constantInt(3), - name("vecxxxx")); - return res; -} - -std::vector StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx) -{ - llvm::Value* res; - std::vector res2(4); - llvm::Value *xChannel; - - xChannel = elementPointer(m_consts, idx, 0); - - res = alignedArrayLoad(xChannel); - - res2[0]=unpackConstElement(m_builder, res,0); - res2[1]=unpackConstElement(m_builder, res,1); - res2[2]=unpackConstElement(m_builder, res,2); - res2[3]=unpackConstElement(m_builder, res,3); - - return res2; -} - -std::vector StorageSoa::outputElement(llvm::Value *idx) -{ - std::vector res(4); - - res[0] = element(m_output, idx, 0); - res[1] = element(m_output, idx, 1); - res[2] = element(m_output, idx, 2); - res[3] = element(m_output, idx, 3); - - return res; -} - -std::vector StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx) -{ - std::vector res(4); - llvm::Value *temp = m_temps[idx]; - - res[0] = element(temp, constantInt(0), 0); - res[1] = element(temp, constantInt(0), 1); - res[2] = element(temp, constantInt(0), 2); - res[3] = element(temp, constantInt(0), 3); - - return res; -} - -std::vector StorageSoa::immediateElement(llvm::Value *idx) -{ - std::vector res(4); - - res[0] = element(m_immediates, idx, 0); - res[1] = element(m_immediates, idx, 1); - res[2] = element(m_immediates, idx, 2); - res[3] = element(m_immediates, idx, 3); - - return res; -} - -llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index, - int channel) const -{ - std::vector indices; - if (m_immediates == ptr) - indices.push_back(constantInt(0)); - indices.push_back(index); - indices.push_back(constantInt(channel)); - - GetElementPtrInst *getElem = GetElementPtrInst::Create(ptr, - indices.begin(), - indices.end(), - name("ptr"), - m_block); - return getElem; -} - -llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index, - int channel) const -{ - llvm::Value *res = elementPointer(ptr, index, channel); - LoadInst *load = new LoadInst(res, name("element"), false, m_block); - //load->setAlignment(8); - return load; -} - -const char * StorageSoa::name(const char *prefix) const -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -llvm::ConstantInt * StorageSoa::constantInt(int idx) const -{ - if (m_constInts.find(idx) != m_constInts.end()) { - return m_constInts[idx]; - } - ConstantInt *constInt = ConstantInt::get(APInt(32, idx)); - m_constInts[idx] = constInt; - return constInt; -} - -llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - PointerType *vectorPtr = PointerType::get(vectorType, 0); - - CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block); - LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block); - load->setAlignment(8); - return load; -} - -llvm::Module * StorageSoa::currentModule() const -{ - if (!m_block || !m_block->getParent()) - return 0; - - return m_block->getParent()->getParent(); -} - -llvm::Constant * StorageSoa::createConstGlobalFloat(const float val) -{ - Constant*c = ConstantFP::get(APFloat(val)); - return c; -} - -llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector &vec) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - std::vector immValues; - ConstantFP *constx = ConstantFP::get(APFloat(vec[0])); - ConstantFP *consty = ConstantFP::get(APFloat(vec[1])); - ConstantFP *constz = ConstantFP::get(APFloat(vec[2])); - ConstantFP *constw = ConstantFP::get(APFloat(vec[3])); - immValues.push_back(constx); - immValues.push_back(consty); - immValues.push_back(constz); - immValues.push_back(constw); - Constant *constVector = ConstantVector::get(vectorType, immValues); - - return constVector; -} - -std::vector StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, - llvm::IRBuilder<>* m_builder,llvm::Value *indIdx) -{ - std::vector val(4); - - //if we have an indirect index, always use that - // if not use the integer offset to create one - llvm::Value *realIndex = 0; - if (indIdx) - realIndex = indIdx; - else - realIndex = constantInt(idx); - debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); - - switch(type) { - case TGSI_FILE_INPUT: - val = inputElement(realIndex); - break; - case TGSI_FILE_OUTPUT: - val = outputElement(realIndex); - break; - case TGSI_FILE_TEMPORARY: - val = tempElement(m_builder, idx); - break; - case TGSI_FILE_CONSTANT: - val = constElement(m_builder, realIndex); - break; - case TGSI_FILE_IMMEDIATE: - val = immediateElement(realIndex); - break; - case TGSI_FILE_ADDRESS: - debug_printf("Address not handled in the load phase!\n"); - assert(0); - break; - default: - debug_printf("Unknown load!\n"); - assert(0); - break; - } - if (!gallivm_is_swizzle(swizzle)) - return val; - - std::vector res(4); - - res[0] = val[gallivm_x_swizzle(swizzle)]; - res[1] = val[gallivm_y_swizzle(swizzle)]; - res[2] = val[gallivm_z_swizzle(swizzle)]; - res[3] = val[gallivm_w_swizzle(swizzle)]; - return res; -} - -llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder) -{ - VectorType *vector = VectorType::get(Type::FloatTy, 4); - ArrayType *vecArray = ArrayType::get(vector, 4); - AllocaInst *alloca = new AllocaInst(vecArray, "temp", - m_builder->GetInsertBlock()); - - return alloca; -} - - -void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector &val, - int mask, llvm::IRBuilder<>* m_builder) -{ - llvm::Value *out = 0; - llvm::Value *realIndex = 0; - switch(type) { - case TGSI_FILE_OUTPUT: - out = m_output; - realIndex = constantInt(idx); - break; - case TGSI_FILE_TEMPORARY: - // if that temp doesn't already exist, alloca it - if (m_temps.find(idx) == m_temps.end()) - m_temps[idx] = allocaTemp(m_builder); - - out = m_temps[idx]; - - realIndex = constantInt(0); - break; - case TGSI_FILE_INPUT: - out = m_input; - realIndex = constantInt(idx); - break; - case TGSI_FILE_ADDRESS: { - llvm::Value *addr = m_addresses[idx]; - if (!addr) { - addAddress(idx); - addr = m_addresses[idx]; - assert(addr); - } - new StoreInst(val[0], addr, false, m_block); - return; - break; - } - default: - debug_printf("Can't save output of this type: %d !\n", type); - assert(0); - break; - } - if ((mask & TGSI_WRITEMASK_X)) { - llvm::Value *xChannel = elementPointer(out, realIndex, 0); - new StoreInst(val[0], xChannel, false, m_block); - } - if ((mask & TGSI_WRITEMASK_Y)) { - llvm::Value *yChannel = elementPointer(out, realIndex, 1); - new StoreInst(val[1], yChannel, false, m_block); - } - if ((mask & TGSI_WRITEMASK_Z)) { - llvm::Value *zChannel = elementPointer(out, realIndex, 2); - new StoreInst(val[2], zChannel, false, m_block); - } - if ((mask & TGSI_WRITEMASK_W)) { - llvm::Value *wChannel = elementPointer(out, realIndex, 3); - new StoreInst(val[3], wChannel, false, m_block); - } -} - -void StorageSoa::addAddress(int idx) -{ - GlobalVariable *val = new GlobalVariable( - /*Type=*/IntegerType::get(32), - /*isConstant=*/false, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name("address"), - currentModule()); - val->setInitializer(Constant::getNullValue(IntegerType::get(32))); - - debug_printf("adding to %d\n", idx); - m_addresses[idx] = val; -} diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h deleted file mode 100644 index 56886f85e7a..00000000000 --- a/src/gallium/auxiliary/gallivm/storagesoa.h +++ /dev/null @@ -1,107 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef STORAGESOA_H -#define STORAGESOA_H - -#include -#include - -#include -#include -#include - -namespace llvm { - class BasicBlock; - class Constant; - class ConstantInt; - class GlobalVariable; - class LoadInst; - class Value; - class VectorType; - class Module; -} - -class StorageSoa -{ -public: - StorageSoa(llvm::BasicBlock *block, - llvm::Value *input, - llvm::Value *output, - llvm::Value *consts); - - - std::vector load(enum tgsi_file_type type, int idx, int swizzle, - llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0); - void store(enum tgsi_file_type type, int idx, const std::vector &val, - int mask, llvm::IRBuilder<>* m_builder); - - void addImmediate(float *vec); - void declareImmediates(); - - void addAddress(int idx); - - llvm::Value * addrElement(int idx) const; - - llvm::ConstantInt *constantInt(int) const; -private: - llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx, - int channel) const; - llvm::Value *element(llvm::Value *ptr, llvm::Value *idx, - int channel) const; - const char *name(const char *prefix) const; - llvm::Value *alignedArrayLoad(llvm::Value *val); - llvm::Module *currentModule() const; - llvm::Constant *createConstGlobalFloat(const float val); - llvm::Constant *createConstGlobalVector(const std::vector &vec); - - std::vector inputElement(llvm::Value *indIdx); - llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc); - std::vector constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx); - std::vector outputElement(llvm::Value *indIdx); - std::vector tempElement(llvm::IRBuilder<>* m_builder, int idx); - std::vector immediateElement(llvm::Value *indIdx); -private: - llvm::BasicBlock *m_block; - - llvm::Value *m_input; - llvm::Value *m_output; - llvm::Value *m_consts; - std::map m_temps; - llvm::GlobalVariable *m_immediates; - - std::map m_addresses; - - std::vector > m_immediatesToFlush; - llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder); - - mutable std::map m_constInts; - mutable char m_name[32]; - mutable int m_idx; -}; - -#endif diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp deleted file mode 100644 index 8f7d3b71004..00000000000 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ /dev/null @@ -1,1136 +0,0 @@ -#include "tgsitollvm.h" - -#include "gallivm.h" -#include "gallivm_p.h" - -#include "storage.h" -#include "instructions.h" -#include "storagesoa.h" -#include "instructionssoa.h" - -#include "pipe/p_shader_tokens.h" - -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_build.h" -#include "tgsi/tgsi_dump.h" - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -using namespace llvm; - -static inline FunctionType *vertexShaderFunctionType() -{ - //Function takes three arguments, - // the calling code has to make sure the types it will - // pass are castable to the following: - // [4 x <4 x float>] inputs, - // [4 x <4 x float>] output, - // [4 x [1 x float]] consts, - - std::vector funcArgs; - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - ArrayType *vectorArray = ArrayType::get(vectorType, 4); - PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); - - ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); - ArrayType *constsArray = ArrayType::get(floatArray, 1); - PointerType *constsArrayPtr = PointerType::get(constsArray, 0); - - funcArgs.push_back(vectorArrayPtr);//inputs - funcArgs.push_back(vectorArrayPtr);//output - funcArgs.push_back(constsArrayPtr);//consts - - FunctionType *functionType = FunctionType::get( - /*Result=*/Type::VoidTy, - /*Params=*/funcArgs, - /*isVarArg=*/false); - - return functionType; -} - -static inline void -add_interpolator(struct gallivm_ir *ir, - struct gallivm_interpolate *interp) -{ - ir->interpolators[ir->num_interp] = *interp; - ++ir->num_interp; -} - -static void -translate_declaration(struct gallivm_ir *prog, - llvm::Module *module, - Storage *storage, - struct tgsi_full_declaration *decl, - struct tgsi_full_declaration *fd) -{ - if (decl->Declaration.File == TGSI_FILE_INPUT) { - unsigned first, last, mask; - uint interp_method; - - first = decl->Range.First; - last = decl->Range.Last; - mask = decl->Declaration.UsageMask; - - /* Do not touch WPOS.xy */ - if (first == 0) { - mask &= ~TGSI_WRITEMASK_XY; - if (mask == TGSI_WRITEMASK_NONE) { - first++; - if (first > last) { - return; - } - } - } - - interp_method = decl->Declaration.Interpolate; - - if (mask == TGSI_WRITEMASK_XYZW) { - unsigned i, j; - - for (i = first; i <= last; i++) { - for (j = 0; j < NUM_CHANNELS; j++) { - //interp( mach, i, j ); - struct gallivm_interpolate interp; - interp.type = interp_method; - interp.attrib = i; - interp.chan = j; - add_interpolator(prog, &interp); - } - } - } else { - unsigned i, j; - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - struct gallivm_interpolate interp; - interp.type = interp_method; - interp.attrib = i; - interp.chan = j; - add_interpolator(prog, &interp); - } - } - } - } - } -} - -static void -translate_declarationir(struct gallivm_ir *, - llvm::Module *, - StorageSoa *storage, - struct tgsi_full_declaration *decl, - struct tgsi_full_declaration *) -{ - if (decl->Declaration.File == TGSI_FILE_ADDRESS) { - int idx = decl->Range.First; - storage->addAddress(idx); - } -} - -static void -translate_immediate(Storage *storage, - struct tgsi_full_immediate *imm) -{ - float vec[4]; - int i; - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - vec[i] = imm->u[i].Float; - break; - default: - assert(0); - } - } - storage->addImmediate(vec); -} - - -static void -translate_immediateir(StorageSoa *storage, - struct tgsi_full_immediate *imm) -{ - float vec[4]; - int i; - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - vec[i] = imm->u[i].Float; - break; - default: - assert(0); - } - } - storage->addImmediate(vec); -} - -static inline int -swizzleInt(struct tgsi_full_src_register *src) -{ - int swizzle = 0; - int start = 1000; - - for (int k = 0; k < 4; ++k) { - swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; - start /= 10; - } - return swizzle; -} - -static inline llvm::Value * -swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, - Storage *storage) -{ - int swizzle = swizzleInt(src); - - if (gallivm_is_swizzle(swizzle)) { - /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ - val = storage->shuffleVector(val, swizzle); - } - return val; -} - -static void -translate_instruction(llvm::Module *module, - Storage *storage, - Instructions *instr, - struct tgsi_full_instruction *inst, - struct tgsi_full_instruction *fi, - unsigned instno) -{ - llvm::Value *inputs[4]; - inputs[0] = 0; - inputs[1] = 0; - inputs[2] = 0; - inputs[3] = 0; - - for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->Src[i]; - llvm::Value *val = 0; - llvm::Value *indIdx = 0; - - if (src->Register.Indirect) { - indIdx = storage->addrElement(src->Indirect.Index); - indIdx = storage->extractIndex(indIdx); - } - if (src->Register.File == TGSI_FILE_CONSTANT) { - val = storage->constElement(src->Register.Index, indIdx); - } else if (src->Register.File == TGSI_FILE_INPUT) { - val = storage->inputElement(src->Register.Index, indIdx); - } else if (src->Register.File == TGSI_FILE_TEMPORARY) { - val = storage->tempElement(src->Register.Index); - } else if (src->Register.File == TGSI_FILE_OUTPUT) { - val = storage->outputElement(src->Register.Index, indIdx); - } else if (src->Register.File == TGSI_FILE_IMMEDIATE) { - val = storage->immediateElement(src->Register.Index); - } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->Register.File); - return; - } - - inputs[i] = swizzleVector(val, src, storage); - } - - /*if (inputs[0]) - instr->printVector(inputs[0]); - if (inputs[1]) - instr->printVector(inputs[1]);*/ - llvm::Value *out = 0; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: { - out = instr->arl(inputs[0]); - } - break; - case TGSI_OPCODE_MOV: { - out = inputs[0]; - } - break; - case TGSI_OPCODE_LIT: { - out = instr->lit(inputs[0]); - } - break; - case TGSI_OPCODE_RCP: { - out = instr->rcp(inputs[0]); - } - break; - case TGSI_OPCODE_RSQ: { - out = instr->rsq(inputs[0]); - } - break; - case TGSI_OPCODE_EXP: { - out = instr->exp(inputs[0]); - } - break; - case TGSI_OPCODE_LOG: { - out = instr->log(inputs[0]); - } - break; - case TGSI_OPCODE_MUL: { - out = instr->mul(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_ADD: { - out = instr->add(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP3: { - out = instr->dp3(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP4: { - out = instr->dp4(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DST: { - out = instr->dst(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MIN: { - out = instr->min(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAX: { - out = instr->max(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SLT: { - out = instr->slt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SGE: { - out = instr->sge(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAD: { - out = instr->madd(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SUB: { - out = instr->sub(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_LRP: { - out = instr->lerp(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_CND: { - out = instr->cnd(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_CND0: { - out = instr->cnd0(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_DP2A: { - out = instr->dot2add(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_FRC: { - out = instr->frc(inputs[0]); - } - break; - case TGSI_OPCODE_CLAMP: { - out = instr->clamp(inputs[0]); - } - break; - case TGSI_OPCODE_FLR: { - out = instr->floor(inputs[0]); - } - break; - case TGSI_OPCODE_ROUND: - break; - case TGSI_OPCODE_EX2: { - out = instr->ex2(inputs[0]); - } - break; - case TGSI_OPCODE_LG2: { - out = instr->lg2(inputs[0]); - } - break; - case TGSI_OPCODE_POW: { - out = instr->pow(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_XPD: { - out = instr->cross(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_ABS: { - out = instr->abs(inputs[0]); - } - break; - case TGSI_OPCODE_RCC: - break; - case TGSI_OPCODE_DPH: { - out = instr->dph(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_COS: { - out = instr->cos(inputs[0]); - } - break; - case TGSI_OPCODE_DDX: { - out = instr->ddx(inputs[0]); - } - break; - case TGSI_OPCODE_DDY: { - out = instr->ddy(inputs[0]); - } - break; - case TGSI_OPCODE_KILP: - break; - case TGSI_OPCODE_PK2H: - break; - case TGSI_OPCODE_PK2US: - break; - case TGSI_OPCODE_PK4B: - break; - case TGSI_OPCODE_PK4UB: - break; - case TGSI_OPCODE_RFL: - break; - case TGSI_OPCODE_SEQ: { - out = instr->seq(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SFL: { - out = instr->sfl(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SGT: { - out = instr->sgt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SIN: { - out = instr->sin(inputs[0]); - } - break; - case TGSI_OPCODE_SLE: { - out = instr->sle(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SNE: { - out = instr->sne(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_STR: { - out = instr->str(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_TEX: - break; - case TGSI_OPCODE_TXD: - break; - case TGSI_OPCODE_UP2H: - break; - case TGSI_OPCODE_UP2US: - break; - case TGSI_OPCODE_UP4B: - break; - case TGSI_OPCODE_UP4UB: - break; - case TGSI_OPCODE_X2D: { - out = instr->x2d(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_ARA: - break; - case TGSI_OPCODE_ARR: - break; - case TGSI_OPCODE_BRA: - break; - case TGSI_OPCODE_CAL: { - instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); - return; - } - break; - case TGSI_OPCODE_RET: { - instr->end(); - return; - } - break; - case TGSI_OPCODE_SSG: - break; - case TGSI_OPCODE_CMP: { - out = instr->cmp(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SCS: { - out = instr->scs(inputs[0]); - } - break; - case TGSI_OPCODE_TXB: - break; - case TGSI_OPCODE_NRM4: - case TGSI_OPCODE_NRM: { - out = instr->nrm(inputs[0]); - } - break; - case TGSI_OPCODE_DIV: { - out = instr->div(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP2: { - out = instr->dp2(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_TXL: - break; - case TGSI_OPCODE_BRK: { - instr->brk(); - return; - } - break; - case TGSI_OPCODE_IF: { - instr->ifop(inputs[0]); - storage->setCurrentBlock(instr->currentBlock()); - return; //just update the state - } - break; - case TGSI_OPCODE_BGNFOR: - break; - case TGSI_OPCODE_REP: - break; - case TGSI_OPCODE_ELSE: { - instr->elseop(); - storage->setCurrentBlock(instr->currentBlock()); - return; //only state update - } - break; - case TGSI_OPCODE_ENDIF: { - instr->endif(); - storage->setCurrentBlock(instr->currentBlock()); - return; //just update the state - } - break; - case TGSI_OPCODE_ENDFOR: - break; - case TGSI_OPCODE_ENDREP: - break; - case TGSI_OPCODE_PUSHA: - break; - case TGSI_OPCODE_POPA: - break; - case TGSI_OPCODE_CEIL: - break; - case TGSI_OPCODE_I2F: - break; - case TGSI_OPCODE_NOT: - break; - case TGSI_OPCODE_TRUNC: { - out = instr->trunc(inputs[0]); - } - break; - case TGSI_OPCODE_SHL: - break; - case TGSI_OPCODE_ISHR: - break; - case TGSI_OPCODE_AND: - break; - case TGSI_OPCODE_OR: - break; - case TGSI_OPCODE_MOD: - break; - case TGSI_OPCODE_XOR: - break; - case TGSI_OPCODE_SAD: - break; - case TGSI_OPCODE_TXF: - break; - case TGSI_OPCODE_TXQ: - break; - case TGSI_OPCODE_CONT: - break; - case TGSI_OPCODE_EMIT: - break; - case TGSI_OPCODE_ENDPRIM: - break; - case TGSI_OPCODE_BGNLOOP: { - instr->beginLoop(); - storage->setCurrentBlock(instr->currentBlock()); - return; - } - break; - case TGSI_OPCODE_BGNSUB: { - instr->bgnSub(instno); - storage->setCurrentBlock(instr->currentBlock()); - storage->pushTemps(); - return; - } - break; - case TGSI_OPCODE_ENDLOOP: { - instr->endLoop(); - storage->setCurrentBlock(instr->currentBlock()); - return; - } - break; - case TGSI_OPCODE_ENDSUB: { - instr->endSub(); - storage->setCurrentBlock(instr->currentBlock()); - storage->popArguments(); - storage->popTemps(); - return; - } - break; - case TGSI_OPCODE_NOISE1: - break; - case TGSI_OPCODE_NOISE2: - break; - case TGSI_OPCODE_NOISE3: - break; - case TGSI_OPCODE_NOISE4: - break; - case TGSI_OPCODE_NOP: - break; - case TGSI_OPCODE_CALLNZ: - break; - case TGSI_OPCODE_IFC: - break; - case TGSI_OPCODE_BREAKC: - break; - case TGSI_OPCODE_KIL: { - out = instr->kil(inputs[0]); - storage->setKilElement(out); - return; - } - break; - case TGSI_OPCODE_END: - instr->end(); - return; - break; - default: - fprintf(stderr, "ERROR: Unknown opcode %d\n", - inst->Instruction.Opcode); - assert(0); - break; - } - - if (!out) { - fprintf(stderr, "ERROR: unsupported opcode %d\n", - inst->Instruction.Opcode); - assert(!"Unsupported opcode"); - } - - /* # not sure if we need this */ - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - /*TXT( "_SAT" );*/ - break; - case TGSI_SAT_MINUS_PLUS_ONE: - /*TXT( "_SAT[-1,1]" );*/ - break; - default: - assert( 0 ); - } - - /* store results */ - for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->Dst[i]; - - if (dst->Register.File == TGSI_FILE_OUTPUT) { - storage->setOutputElement(dst->Register.Index, out, dst->Register.WriteMask); - } else if (dst->Register.File == TGSI_FILE_TEMPORARY) { - storage->setTempElement(dst->Register.Index, out, dst->Register.WriteMask); - } else if (dst->Register.File == TGSI_FILE_ADDRESS) { - storage->setAddrElement(dst->Register.Index, out, dst->Register.WriteMask); - } else { - fprintf(stderr, "ERROR: unsupported LLVM destination!"); - assert(!"wrong destination"); - } - } -} - - -static void -translate_instructionir(llvm::Module *module, - StorageSoa *storage, - InstructionsSoa *instr, - struct tgsi_full_instruction *inst, - struct tgsi_full_instruction *fi, - unsigned instno) -{ - std::vector< std::vector > inputs(inst->Instruction.NumSrcRegs); - - for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->Src[i]; - std::vector val; - llvm::Value *indIdx = 0; - int swizzle = swizzleInt(src); - - if (src->Register.Indirect) { - indIdx = storage->addrElement(src->Indirect.Index); - } - val = storage->load((enum tgsi_file_type)src->Register.File, - src->Register.Index, swizzle, instr->getIRBuilder(), indIdx); - - inputs[i] = val; - } - - std::vector out(4); - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: { - out = instr->arl(inputs[0]); - } - break; - case TGSI_OPCODE_MOV: { - out = inputs[0]; - } - break; - case TGSI_OPCODE_LIT: { - out = instr->lit(inputs[0]); - } - break; - case TGSI_OPCODE_RCP: { - } - break; - case TGSI_OPCODE_RSQ: { - out = instr->rsq(inputs[0]); - } - break; - case TGSI_OPCODE_EXP: - break; - case TGSI_OPCODE_LOG: - break; - case TGSI_OPCODE_MUL: { - out = instr->mul(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_ADD: { - out = instr->add(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP3: { - out = instr->dp3(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP4: { - out = instr->dp4(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DST: { - } - break; - case TGSI_OPCODE_MIN: { - out = instr->min(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAX: { - out = instr->max(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SLT: { - out = instr->slt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SGE: { - } - break; - case TGSI_OPCODE_MAD: { - out = instr->madd(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SUB: { - out = instr->sub(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_LRP: { - } - break; - case TGSI_OPCODE_CND: - break; - case TGSI_OPCODE_CND0: - break; - case TGSI_OPCODE_DP2A: - break; - case TGSI_OPCODE_FRC: { - } - break; - case TGSI_OPCODE_CLAMP: - break; - case TGSI_OPCODE_FLR: { - } - break; - case TGSI_OPCODE_ROUND: - break; - case TGSI_OPCODE_EX2: { - } - break; - case TGSI_OPCODE_LG2: { - } - break; - case TGSI_OPCODE_POW: { - out = instr->pow(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_XPD: { - } - break; - case TGSI_OPCODE_ABS: { - out = instr->abs(inputs[0]); - } - break; - case TGSI_OPCODE_RCC: - break; - case TGSI_OPCODE_DPH: { - } - break; - case TGSI_OPCODE_COS: { - } - break; - case TGSI_OPCODE_DDX: - break; - case TGSI_OPCODE_DDY: - break; - case TGSI_OPCODE_KILP: - break; - case TGSI_OPCODE_PK2H: - break; - case TGSI_OPCODE_PK2US: - break; - case TGSI_OPCODE_PK4B: - break; - case TGSI_OPCODE_PK4UB: - break; - case TGSI_OPCODE_RFL: - break; - case TGSI_OPCODE_SEQ: - break; - case TGSI_OPCODE_SFL: - break; - case TGSI_OPCODE_SGT: { - } - break; - case TGSI_OPCODE_SIN: { - } - break; - case TGSI_OPCODE_SLE: - break; - case TGSI_OPCODE_SNE: - break; - case TGSI_OPCODE_STR: - break; - case TGSI_OPCODE_TEX: - break; - case TGSI_OPCODE_TXD: - break; - case TGSI_OPCODE_UP2H: - break; - case TGSI_OPCODE_UP2US: - break; - case TGSI_OPCODE_UP4B: - break; - case TGSI_OPCODE_UP4UB: - break; - case TGSI_OPCODE_X2D: - break; - case TGSI_OPCODE_ARA: - break; - case TGSI_OPCODE_ARR: - break; - case TGSI_OPCODE_BRA: - break; - case TGSI_OPCODE_CAL: { - } - break; - case TGSI_OPCODE_RET: { - } - break; - case TGSI_OPCODE_SSG: - break; - case TGSI_OPCODE_CMP: { - } - break; - case TGSI_OPCODE_SCS: { - } - break; - case TGSI_OPCODE_TXB: - break; - case TGSI_OPCODE_NRM: - break; - case TGSI_OPCODE_DIV: - break; - case TGSI_OPCODE_DP2: - break; - case TGSI_OPCODE_TXL: - break; - case TGSI_OPCODE_BRK: { - } - break; - case TGSI_OPCODE_IF: { - } - break; - case TGSI_OPCODE_BGNFOR: - break; - case TGSI_OPCODE_REP: - break; - case TGSI_OPCODE_ELSE: { - } - break; - case TGSI_OPCODE_ENDIF: { - } - break; - case TGSI_OPCODE_ENDFOR: - break; - case TGSI_OPCODE_ENDREP: - break; - case TGSI_OPCODE_PUSHA: - break; - case TGSI_OPCODE_POPA: - break; - case TGSI_OPCODE_CEIL: - break; - case TGSI_OPCODE_I2F: - break; - case TGSI_OPCODE_NOT: - break; - case TGSI_OPCODE_TRUNC: { - } - break; - case TGSI_OPCODE_SHL: - break; - case TGSI_OPCODE_ISHR: - break; - case TGSI_OPCODE_AND: - break; - case TGSI_OPCODE_OR: - break; - case TGSI_OPCODE_MOD: - break; - case TGSI_OPCODE_XOR: - break; - case TGSI_OPCODE_SAD: - break; - case TGSI_OPCODE_TXF: - break; - case TGSI_OPCODE_TXQ: - break; - case TGSI_OPCODE_CONT: - break; - case TGSI_OPCODE_EMIT: - break; - case TGSI_OPCODE_ENDPRIM: - break; - case TGSI_OPCODE_BGNLOOP: { - } - break; - case TGSI_OPCODE_BGNSUB: { - } - break; - case TGSI_OPCODE_ENDLOOP: { - } - break; - case TGSI_OPCODE_ENDSUB: { - } - break; - case TGSI_OPCODE_NOISE1: - break; - case TGSI_OPCODE_NOISE2: - break; - case TGSI_OPCODE_NOISE3: - break; - case TGSI_OPCODE_NOISE4: - break; - case TGSI_OPCODE_NOP: - break; - case TGSI_OPCODE_NRM4: - break; - case TGSI_OPCODE_CALLNZ: - break; - case TGSI_OPCODE_IFC: - break; - case TGSI_OPCODE_BREAKC: - break; - case TGSI_OPCODE_KIL: { - } - break; - case TGSI_OPCODE_END: - instr->end(); - return; - break; - default: - fprintf(stderr, "ERROR: Unknown opcode %d\n", - inst->Instruction.Opcode); - assert(0); - break; - } - - if (!out[0]) { - fprintf(stderr, "ERROR: unsupported opcode %d\n", - inst->Instruction.Opcode); - assert(!"Unsupported opcode"); - } - - /* store results */ - for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->Dst[i]; - storage->store((enum tgsi_file_type)dst->Register.File, - dst->Register.Index, out, dst->Register.WriteMask, - instr->getIRBuilder() ); - } -} - -llvm::Module * -tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) -{ - llvm::Module *mod = new Module("shader"); - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - unsigned instno = 0; - Function* shader = mod->getFunction("execute_shader"); - std::ostringstream stream; - if (ir->type == GALLIVM_VS) { - stream << "vs_shader"; - } else { - stream << "fs_shader"; - } - stream << ir->id; - std::string func_name = stream.str(); - shader->setName(func_name.c_str()); - - Function::arg_iterator args = shader->arg_begin(); - Value *ptr_INPUT = args++; - ptr_INPUT->setName("input"); - - BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); - - tgsi_parse_init(&parse, tokens); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - Storage storage(label_entry, ptr_INPUT); - Instructions instr(mod, shader, label_entry, &storage); - while(!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - translate_declaration(ir, mod, &storage, - &parse.FullToken.FullDeclaration, - &fd); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - translate_immediate(&storage, - &parse.FullToken.FullImmediate); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - translate_instruction(mod, &storage, &instr, - &parse.FullToken.FullInstruction, - &fi, instno); - ++instno; - break; - - default: - assert(0); - } - } - - tgsi_parse_free(&parse); - - ir->num_consts = storage.numConsts(); - return mod; -} - -llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, - const struct tgsi_token *tokens) -{ - llvm::Module *mod = new Module("shader"); - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - unsigned instno = 0; - std::ostringstream stream; - if (ir->type == GALLIVM_VS) { - stream << "vs_shader"; - } else { - stream << "fs_shader"; - } - //stream << ir->id; - std::string func_name = stream.str(); - Function *shader = llvm::cast(mod->getOrInsertFunction( - func_name.c_str(), - vertexShaderFunctionType())); - - Function::arg_iterator args = shader->arg_begin(); - Value *input = args++; - input->setName("inputs"); - Value *output = args++; - output->setName("outputs"); - Value *consts = args++; - consts->setName("consts"); - - BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); - - tgsi_parse_init(&parse, tokens); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - - StorageSoa storage(label_entry, input, output, consts); - InstructionsSoa instr(mod, shader, label_entry, &storage); - - while(!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - translate_declarationir(ir, mod, &storage, - &parse.FullToken.FullDeclaration, - &fd); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - translate_immediateir(&storage, - &parse.FullToken.FullImmediate); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - storage.declareImmediates(); - translate_instructionir(mod, &storage, &instr, - &parse.FullToken.FullInstruction, - &fi, instno); - ++instno; - break; - - default: - assert(0); - } - } - - tgsi_parse_free(&parse); - - return mod; -} diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.h b/src/gallium/auxiliary/gallivm/tgsitollvm.h deleted file mode 100644 index 7ada04d6299..00000000000 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef TGSITOLLVM_H -#define TGSITOLLVM_H - - -namespace llvm { - class Module; -} - -struct gallivm_ir; -struct tgsi_token; - - -llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir, - const struct tgsi_token *tokens); - - -llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, - const struct tgsi_token *tokens); - -#endif diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 840cb0950ec..a39283e5e86 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -21,30 +21,6 @@ env.CodeGenerate( llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ - 'lp_bld_alpha.c', - 'lp_bld_arit.c', - 'lp_bld_blend_aos.c', - 'lp_bld_blend_logicop.c', - 'lp_bld_blend_soa.c', - 'lp_bld_const.c', - 'lp_bld_conv.c', - 'lp_bld_debug.c', - 'lp_bld_depth.c', - 'lp_bld_flow.c', - 'lp_bld_format_aos.c', - 'lp_bld_format_query.c', - 'lp_bld_format_soa.c', - 'lp_bld_interp.c', - 'lp_bld_intr.c', - 'lp_bld_logic.c', - 'lp_bld_misc.cpp', - 'lp_bld_pack.c', - 'lp_bld_sample.c', - 'lp_bld_sample_soa.c', - 'lp_bld_struct.c', - 'lp_bld_swizzle.c', - 'lp_bld_tgsi_soa.c', - 'lp_bld_type.c', 'lp_buffer.c', 'lp_clear.c', 'lp_context.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c deleted file mode 100644 index 7245730350c..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ /dev/null @@ -1,63 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Alpha testing to LLVM IR translation. - * - * @author Jose Fonseca - */ - -#include "pipe/p_state.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_logic.h" -#include "lp_bld_flow.h" -#include "lp_bld_debug.h" -#include "lp_bld_alpha.h" - - -void -lp_build_alpha_test(LLVMBuilderRef builder, - const struct pipe_alpha_state *state, - struct lp_type type, - struct lp_build_mask_context *mask, - LLVMValueRef alpha, - LLVMValueRef ref) -{ - struct lp_build_context bld; - - lp_build_context_init(&bld, builder, type); - - if(state->enabled) { - LLVMValueRef test = lp_build_cmp(&bld, state->func, alpha, ref); - - lp_build_name(test, "alpha_mask"); - - lp_build_mask_update(mask, test); - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h deleted file mode 100644 index 634575670db..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ /dev/null @@ -1,54 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Alpha testing to LLVM IR translation. - * - * @author Jose Fonseca - */ - -#ifndef LP_BLD_ALPHA_H -#define LP_BLD_ALPHA_H - - -#include - -struct pipe_alpha_state; -struct lp_type; -struct lp_build_mask_context; - - -void -lp_build_alpha_test(LLVMBuilderRef builder, - const struct pipe_alpha_state *state, - struct lp_type type, - struct lp_build_mask_context *mask, - LLVMValueRef alpha, - LLVMValueRef ref); - - -#endif /* !LP_BLD_ALPHA_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c deleted file mode 100644 index 54b31befe6d..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ /dev/null @@ -1,1325 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Helper - * - * LLVM IR doesn't support all basic arithmetic operations we care about (most - * notably min/max and saturated operations), and it is often necessary to - * resort machine-specific intrinsics directly. The functions here hide all - * these implementation details from the other modules. - * - * We also do simple expressions simplification here. Reasons are: - * - it is very easy given we have all necessary information readily available - * - LLVM optimization passes fail to simplify several vector expressions - * - We often know value constraints which the optimization passes have no way - * of knowing, such as when source arguments are known to be in [0, 1] range. - * - * @author Jose Fonseca - */ - - -#include "util/u_memory.h" -#include "util/u_debug.h" -#include "util/u_math.h" -#include "util/u_string.h" -#include "util/u_cpu_detect.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_intr.h" -#include "lp_bld_logic.h" -#include "lp_bld_pack.h" -#include "lp_bld_debug.h" -#include "lp_bld_arit.h" - - -/** - * Generate min(a, b) - * No checks for special case values of a or b = 1 or 0 are done. - */ -static LLVMValueRef -lp_build_min_simple(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b) -{ - const struct lp_type type = bld->type; - const char *intrinsic = NULL; - LLVMValueRef cond; - - /* TODO: optimize the constant case */ - - if(type.width * type.length == 128) { - if(type.floating) { - if(type.width == 32 && util_cpu_caps.has_sse) - intrinsic = "llvm.x86.sse.min.ps"; - if(type.width == 64 && util_cpu_caps.has_sse2) - intrinsic = "llvm.x86.sse2.min.pd"; - } - else { - if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) - intrinsic = "llvm.x86.sse2.pminu.b"; - if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) - intrinsic = "llvm.x86.sse41.pminsb"; - if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) - intrinsic = "llvm.x86.sse41.pminuw"; - if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) - intrinsic = "llvm.x86.sse2.pmins.w"; - if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) - intrinsic = "llvm.x86.sse41.pminud"; - if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) - intrinsic = "llvm.x86.sse41.pminsd"; - } - } - - if(intrinsic) - return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); - - cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b); - return lp_build_select(bld, cond, a, b); -} - - -/** - * Generate max(a, b) - * No checks for special case values of a or b = 1 or 0 are done. - */ -static LLVMValueRef -lp_build_max_simple(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b) -{ - const struct lp_type type = bld->type; - const char *intrinsic = NULL; - LLVMValueRef cond; - - /* TODO: optimize the constant case */ - - if(type.width * type.length == 128) { - if(type.floating) { - if(type.width == 32 && util_cpu_caps.has_sse) - intrinsic = "llvm.x86.sse.max.ps"; - if(type.width == 64 && util_cpu_caps.has_sse2) - intrinsic = "llvm.x86.sse2.max.pd"; - } - else { - if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) - intrinsic = "llvm.x86.sse2.pmaxu.b"; - if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) - intrinsic = "llvm.x86.sse41.pmaxsb"; - if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) - intrinsic = "llvm.x86.sse41.pmaxuw"; - if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) - intrinsic = "llvm.x86.sse2.pmaxs.w"; - if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) - intrinsic = "llvm.x86.sse41.pmaxud"; - if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) - intrinsic = "llvm.x86.sse41.pmaxsd"; - } - } - - if(intrinsic) - return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); - - cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b); - return lp_build_select(bld, cond, a, b); -} - - -/** - * Generate 1 - a, or ~a depending on bld->type. - */ -LLVMValueRef -lp_build_comp(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - - if(a == bld->one) - return bld->zero; - if(a == bld->zero) - return bld->one; - - if(type.norm && !type.floating && !type.fixed && !type.sign) { - if(LLVMIsConstant(a)) - return LLVMConstNot(a); - else - return LLVMBuildNot(bld->builder, a, ""); - } - - if(LLVMIsConstant(a)) - return LLVMConstSub(bld->one, a); - else - return LLVMBuildSub(bld->builder, bld->one, a, ""); -} - - -/** - * Generate a + b - */ -LLVMValueRef -lp_build_add(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b) -{ - const struct lp_type type = bld->type; - LLVMValueRef res; - - if(a == bld->zero) - return b; - if(b == bld->zero) - return a; - if(a == bld->undef || b == bld->undef) - return bld->undef; - - if(bld->type.norm) { - const char *intrinsic = NULL; - - if(a == bld->one || b == bld->one) - return bld->one; - - if(util_cpu_caps.has_sse2 && - type.width * type.length == 128 && - !type.floating && !type.fixed) { - if(type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; - if(type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; - } - - if(intrinsic) - return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); - } - - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - res = LLVMConstAdd(a, b); - else - res = LLVMBuildAdd(bld->builder, a, b, ""); - - /* clamp to ceiling of 1.0 */ - if(bld->type.norm && (bld->type.floating || bld->type.fixed)) - res = lp_build_min_simple(bld, res, bld->one); - - /* XXX clamp to floor of -1 or 0??? */ - - return res; -} - - -/** - * Generate a - b - */ -LLVMValueRef -lp_build_sub(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b) -{ - const struct lp_type type = bld->type; - LLVMValueRef res; - - if(b == bld->zero) - return a; - if(a == bld->undef || b == bld->undef) - return bld->undef; - if(a == b) - return bld->zero; - - if(bld->type.norm) { - const char *intrinsic = NULL; - - if(b == bld->one) - return bld->zero; - - if(util_cpu_caps.has_sse2 && - type.width * type.length == 128 && - !type.floating && !type.fixed) { - if(type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; - if(type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; - } - - if(intrinsic) - return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); - } - - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - res = LLVMConstSub(a, b); - else - res = LLVMBuildSub(bld->builder, a, b, ""); - - if(bld->type.norm && (bld->type.floating || bld->type.fixed)) - res = lp_build_max_simple(bld, res, bld->zero); - - return res; -} - - -/** - * Normalized 8bit multiplication. - * - * - alpha plus one - * - * makes the following approximation to the division (Sree) - * - * a*b/255 ~= (a*(b + 1)) >> 256 - * - * which is the fastest method that satisfies the following OpenGL criteria - * - * 0*0 = 0 and 255*255 = 255 - * - * - geometric series - * - * takes the geometric series approximation to the division - * - * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. - * - * in this case just the first two terms to fit in 16bit arithmetic - * - * t/255 ~= (t + (t >> 8)) >> 8 - * - * note that just by itself it doesn't satisfies the OpenGL criteria, as - * 255*255 = 254, so the special case b = 255 must be accounted or roundoff - * must be used - * - * - geometric series plus rounding - * - * when using a geometric series division instead of truncating the result - * use roundoff in the approximation (Jim Blinn) - * - * t/255 ~= (t + (t >> 8) + 0x80) >> 8 - * - * achieving the exact results - * - * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995, - * ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf - * @sa Michael Herf, The "double blend trick", May 2000, - * http://www.stereopsis.com/doubleblend.html - */ -static LLVMValueRef -lp_build_mul_u8n(LLVMBuilderRef builder, - struct lp_type i16_type, - LLVMValueRef a, LLVMValueRef b) -{ - LLVMValueRef c8; - LLVMValueRef ab; - - c8 = lp_build_int_const_scalar(i16_type, 8); - -#if 0 - - /* a*b/255 ~= (a*(b + 1)) >> 256 */ - b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); - ab = LLVMBuildMul(builder, a, b, ""); - -#else - - /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ - ab = LLVMBuildMul(builder, a, b, ""); - ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); - ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); - -#endif - - ab = LLVMBuildLShr(builder, ab, c8, ""); - - return ab; -} - - -/** - * Generate a * b - */ -LLVMValueRef -lp_build_mul(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b) -{ - const struct lp_type type = bld->type; - LLVMValueRef shift; - LLVMValueRef res; - - if(a == bld->zero) - return bld->zero; - if(a == bld->one) - return b; - if(b == bld->zero) - return bld->zero; - if(b == bld->one) - return a; - if(a == bld->undef || b == bld->undef) - return bld->undef; - - if(!type.floating && !type.fixed && type.norm) { - if(type.width == 8) { - struct lp_type i16_type = lp_wider_type(type); - LLVMValueRef al, ah, bl, bh, abl, abh, ab; - - lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah); - lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh); - - /* PMULLW, PSRLW, PADDW */ - abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl); - abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh); - - ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh); - - return ab; - } - - /* FIXME */ - assert(0); - } - - if(type.fixed) - shift = lp_build_int_const_scalar(type, type.width/2); - else - shift = NULL; - - if(LLVMIsConstant(a) && LLVMIsConstant(b)) { - res = LLVMConstMul(a, b); - if(shift) { - if(type.sign) - res = LLVMConstAShr(res, shift); - else - res = LLVMConstLShr(res, shift); - } - } - else { - res = LLVMBuildMul(bld->builder, a, b, ""); - if(shift) { - if(type.sign) - res = LLVMBuildAShr(bld->builder, res, shift, ""); - else - res = LLVMBuildLShr(bld->builder, res, shift, ""); - } - } - - return res; -} - - -/** - * Small vector x scale multiplication optimization. - */ -LLVMValueRef -lp_build_mul_imm(struct lp_build_context *bld, - LLVMValueRef a, - int b) -{ - LLVMValueRef factor; - - if(b == 0) - return bld->zero; - - if(b == 1) - return a; - - if(b == -1) - return LLVMBuildNeg(bld->builder, a, ""); - - if(b == 2 && bld->type.floating) - return lp_build_add(bld, a, a); - - if(util_is_pot(b)) { - unsigned shift = ffs(b) - 1; - - if(bld->type.floating) { -#if 0 - /* - * Power of two multiplication by directly manipulating the mantissa. - * - * XXX: This might not be always faster, it will introduce a small error - * for multiplication by zero, and it will produce wrong results - * for Inf and NaN. - */ - unsigned mantissa = lp_mantissa(bld->type); - factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); - a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); - a = LLVMBuildAdd(bld->builder, a, factor, ""); - a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); - return a; -#endif - } - else { - factor = lp_build_const_scalar(bld->type, shift); - return LLVMBuildShl(bld->builder, a, factor, ""); - } - } - - factor = lp_build_const_scalar(bld->type, (double)b); - return lp_build_mul(bld, a, factor); -} - - -/** - * Generate a / b - */ -LLVMValueRef -lp_build_div(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b) -{ - const struct lp_type type = bld->type; - - if(a == bld->zero) - return bld->zero; - if(a == bld->one) - return lp_build_rcp(bld, b); - if(b == bld->zero) - return bld->undef; - if(b == bld->one) - return a; - if(a == bld->undef || b == bld->undef) - return bld->undef; - - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstFDiv(a, b); - - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - return lp_build_mul(bld, a, lp_build_rcp(bld, b)); - - return LLVMBuildFDiv(bld->builder, a, b, ""); -} - - -/** - * Linear interpolation. - * - * This also works for integer values with a few caveats. - * - * @sa http://www.stereopsis.com/doubleblend.html - */ -LLVMValueRef -lp_build_lerp(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef v0, - LLVMValueRef v1) -{ - LLVMValueRef delta; - LLVMValueRef res; - - delta = lp_build_sub(bld, v1, v0); - - res = lp_build_mul(bld, x, delta); - - res = lp_build_add(bld, v0, res); - - if(bld->type.fixed) - /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, - * but it will be wrong for other uses. Basically we need a more - * powerful lp_type, capable of further distinguishing the values - * interpretation from the value storage. */ - res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); - - return res; -} - - -LLVMValueRef -lp_build_lerp_2d(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef v00, - LLVMValueRef v01, - LLVMValueRef v10, - LLVMValueRef v11) -{ - LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); - LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); - return lp_build_lerp(bld, y, v0, v1); -} - - -/** - * Generate min(a, b) - * Do checks for special cases. - */ -LLVMValueRef -lp_build_min(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b) -{ - if(a == bld->undef || b == bld->undef) - return bld->undef; - - if(a == b) - return a; - - if(bld->type.norm) { - if(a == bld->zero || b == bld->zero) - return bld->zero; - if(a == bld->one) - return b; - if(b == bld->one) - return a; - } - - return lp_build_min_simple(bld, a, b); -} - - -/** - * Generate max(a, b) - * Do checks for special cases. - */ -LLVMValueRef -lp_build_max(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b) -{ - if(a == bld->undef || b == bld->undef) - return bld->undef; - - if(a == b) - return a; - - if(bld->type.norm) { - if(a == bld->one || b == bld->one) - return bld->one; - if(a == bld->zero) - return b; - if(b == bld->zero) - return a; - } - - return lp_build_max_simple(bld, a, b); -} - - -/** - * Generate abs(a) - */ -LLVMValueRef -lp_build_abs(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - - if(!type.sign) - return a; - - if(type.floating) { - /* Mask out the sign bit */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - a = LLVMBuildAnd(bld->builder, a, mask, ""); - a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); - return a; - } - - if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { - switch(type.width) { - case 8: - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); - case 16: - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); - case 32: - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); - } - } - - return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, "")); -} - - -LLVMValueRef -lp_build_sgn(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef cond; - LLVMValueRef res; - - /* Handle non-zero case */ - if(!type.sign) { - /* if not zero then sign must be positive */ - res = bld->one; - } - else if(type.floating) { - /* Take the sign bit and add it to 1 constant */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; - LLVMValueRef one; - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - one = LLVMConstBitCast(bld->one, int_vec_type); - res = LLVMBuildOr(bld->builder, sign, one, ""); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); - } - else - { - LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); - cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); - res = lp_build_select(bld, cond, bld->one, minus_one); - } - - /* Handle zero */ - cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); - res = lp_build_select(bld, cond, bld->zero, bld->one); - - return res; -} - - -enum lp_build_round_sse41_mode -{ - LP_BUILD_ROUND_SSE41_NEAREST = 0, - LP_BUILD_ROUND_SSE41_FLOOR = 1, - LP_BUILD_ROUND_SSE41_CEIL = 2, - LP_BUILD_ROUND_SSE41_TRUNCATE = 3 -}; - - -static INLINE LLVMValueRef -lp_build_round_sse41(struct lp_build_context *bld, - LLVMValueRef a, - enum lp_build_round_sse41_mode mode) -{ - const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - const char *intrinsic; - - assert(type.floating); - assert(type.width*type.length == 128); - assert(lp_check_value(type, a)); - assert(util_cpu_caps.has_sse4_1); - - switch(type.width) { - case 32: - intrinsic = "llvm.x86.sse41.round.ps"; - break; - case 64: - intrinsic = "llvm.x86.sse41.round.pd"; - break; - default: - assert(0); - return bld->undef; - } - - return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, - LLVMConstInt(LLVMInt32Type(), mode, 0)); -} - - -LLVMValueRef -lp_build_trunc(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - - assert(type.floating); - assert(lp_check_value(type, a)); - - if(util_cpu_caps.has_sse4_1) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); - else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef res; - res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); - res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); - return res; - } -} - - -LLVMValueRef -lp_build_round(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - - assert(type.floating); - assert(lp_check_value(type, a)); - - if(util_cpu_caps.has_sse4_1) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); - else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef res; - res = lp_build_iround(bld, a); - res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); - return res; - } -} - - -LLVMValueRef -lp_build_floor(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - - assert(type.floating); - - if(util_cpu_caps.has_sse4_1) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); - else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef res; - res = lp_build_ifloor(bld, a); - res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); - return res; - } -} - - -LLVMValueRef -lp_build_ceil(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - - assert(type.floating); - assert(lp_check_value(type, a)); - - if(util_cpu_caps.has_sse4_1) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); - else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef res; - res = lp_build_iceil(bld, a); - res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); - return res; - } -} - - -/** - * Convert to integer, through whichever rounding method that's fastest, - * typically truncating to zero. - */ -LLVMValueRef -lp_build_itrunc(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - - assert(type.floating); - assert(lp_check_value(type, a)); - - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); -} - - -LLVMValueRef -lp_build_iround(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef res; - - assert(type.floating); - assert(lp_check_value(type, a)); - - if(util_cpu_caps.has_sse4_1) { - res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); - } - else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; - LLVMValueRef half; - - /* get sign bit */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - - /* sign * 0.5 */ - half = lp_build_const_scalar(type, 0.5); - half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); - half = LLVMBuildOr(bld->builder, sign, half, ""); - half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); - - res = LLVMBuildAdd(bld->builder, a, half, ""); - } - - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); - - return res; -} - - -/** - * Convert float[] to int[] with floor(). - */ -LLVMValueRef -lp_build_ifloor(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef res; - - assert(type.floating); - assert(lp_check_value(type, a)); - - if(util_cpu_caps.has_sse4_1) { - res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); - } - else { - /* Take the sign bit and add it to 1 constant */ - LLVMTypeRef vec_type = lp_build_vec_type(type); - unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; - LLVMValueRef offset; - - /* sign = a < 0 ? ~0 : 0 */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); - lp_build_name(sign, "floor.sign"); - - /* offset = -0.99999(9)f */ - offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); - offset = LLVMConstBitCast(offset, int_vec_type); - - /* offset = a < 0 ? -0.99999(9)f : 0.0f */ - offset = LLVMBuildAnd(bld->builder, offset, sign, ""); - offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); - lp_build_name(offset, "floor.offset"); - - res = LLVMBuildAdd(bld->builder, a, offset, ""); - lp_build_name(res, "floor.res"); - } - - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); - lp_build_name(res, "floor"); - - return res; -} - - -LLVMValueRef -lp_build_iceil(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef res; - - assert(type.floating); - assert(lp_check_value(type, a)); - - if(util_cpu_caps.has_sse4_1) { - res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); - } - else { - assert(0); - res = bld->undef; - } - - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); - - return res; -} - - -LLVMValueRef -lp_build_sqrt(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - char intrinsic[32]; - - /* TODO: optimize the constant case */ - /* TODO: optimize the constant case */ - - assert(type.floating); - util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width); - - return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); -} - - -LLVMValueRef -lp_build_rcp(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - - if(a == bld->zero) - return bld->undef; - if(a == bld->one) - return bld->one; - if(a == bld->undef) - return bld->undef; - - assert(type.floating); - - if(LLVMIsConstant(a)) - return LLVMConstFDiv(bld->one, a); - - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - /* FIXME: improve precision */ - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); - - return LLVMBuildFDiv(bld->builder, bld->one, a, ""); -} - - -/** - * Generate 1/sqrt(a) - */ -LLVMValueRef -lp_build_rsqrt(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - - assert(type.floating); - - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); - - return lp_build_rcp(bld, lp_build_sqrt(bld, a)); -} - - -/** - * Generate cos(a) - */ -LLVMValueRef -lp_build_cos(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - char intrinsic[32]; - - /* TODO: optimize the constant case */ - - assert(type.floating); - util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width); - - return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); -} - - -/** - * Generate sin(a) - */ -LLVMValueRef -lp_build_sin(struct lp_build_context *bld, - LLVMValueRef a) -{ - const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - char intrinsic[32]; - - /* TODO: optimize the constant case */ - - assert(type.floating); - util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width); - - return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); -} - - -/** - * Generate pow(x, y) - */ -LLVMValueRef -lp_build_pow(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef y) -{ - /* TODO: optimize the constant case */ - if(LLVMIsConstant(x) && LLVMIsConstant(y)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); - - return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y)); -} - - -/** - * Generate exp(x) - */ -LLVMValueRef -lp_build_exp(struct lp_build_context *bld, - LLVMValueRef x) -{ - /* log2(e) = 1/log(2) */ - LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634); - - return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); -} - - -/** - * Generate log(x) - */ -LLVMValueRef -lp_build_log(struct lp_build_context *bld, - LLVMValueRef x) -{ - /* log(2) */ - LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529); - - return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); -} - - -#define EXP_POLY_DEGREE 3 -#define LOG_POLY_DEGREE 5 - - -/** - * Generate polynomial. - * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2]. - */ -static LLVMValueRef -lp_build_polynomial(struct lp_build_context *bld, - LLVMValueRef x, - const double *coeffs, - unsigned num_coeffs) -{ - const struct lp_type type = bld->type; - LLVMValueRef res = NULL; - unsigned i; - - /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); - - for (i = num_coeffs; i--; ) { - LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); - if(res) - res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); - else - res = coeff; - } - - if(res) - return res; - else - return bld->undef; -} - - -/** - * Minimax polynomial fit of 2**x, in range [-0.5, 0.5[ - */ -const double lp_build_exp2_polynomial[] = { -#if EXP_POLY_DEGREE == 5 - 9.9999994e-1, 6.9315308e-1, 2.4015361e-1, 5.5826318e-2, 8.9893397e-3, 1.8775767e-3 -#elif EXP_POLY_DEGREE == 4 - 1.0000026, 6.9300383e-1, 2.4144275e-1, 5.2011464e-2, 1.3534167e-2 -#elif EXP_POLY_DEGREE == 3 - 9.9992520e-1, 6.9583356e-1, 2.2606716e-1, 7.8024521e-2 -#elif EXP_POLY_DEGREE == 2 - 1.0017247, 6.5763628e-1, 3.3718944e-1 -#else -#error -#endif -}; - - -void -lp_build_exp2_approx(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef *p_exp2_int_part, - LLVMValueRef *p_frac_part, - LLVMValueRef *p_exp2) -{ - const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef ipart = NULL; - LLVMValueRef fpart = NULL; - LLVMValueRef expipart = NULL; - LLVMValueRef expfpart = NULL; - LLVMValueRef res = NULL; - - if(p_exp2_int_part || p_frac_part || p_exp2) { - /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); - - assert(type.floating && type.width == 32); - - x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); - x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); - - /* ipart = int(x - 0.5) */ - ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); - ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); - - /* fpart = x - ipart */ - fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); - fpart = LLVMBuildSub(bld->builder, x, fpart, ""); - } - - if(p_exp2_int_part || p_exp2) { - /* expipart = (float) (1 << ipart) */ - expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); - expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); - expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); - } - - if(p_exp2) { - expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, - Elements(lp_build_exp2_polynomial)); - - res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); - } - - if(p_exp2_int_part) - *p_exp2_int_part = expipart; - - if(p_frac_part) - *p_frac_part = fpart; - - if(p_exp2) - *p_exp2 = res; -} - - -LLVMValueRef -lp_build_exp2(struct lp_build_context *bld, - LLVMValueRef x) -{ - LLVMValueRef res; - lp_build_exp2_approx(bld, x, NULL, NULL, &res); - return res; -} - - -/** - * Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ - * These coefficients can be generate with - * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html - */ -const double lp_build_log2_polynomial[] = { -#if LOG_POLY_DEGREE == 6 - 3.11578814719469302614, -3.32419399085241980044, 2.59883907202499966007, -1.23152682416275988241, 0.318212422185251071475, -0.0344359067839062357313 -#elif LOG_POLY_DEGREE == 5 - 2.8882704548164776201, -2.52074962577807006663, 1.48116647521213171641, -0.465725644288844778798, 0.0596515482674574969533 -#elif LOG_POLY_DEGREE == 4 - 2.61761038894603480148, -1.75647175389045657003, 0.688243882994381274313, -0.107254423828329604454 -#elif LOG_POLY_DEGREE == 3 - 2.28330284476918490682, -1.04913055217340124191, 0.204446009836232697516 -#else -#error -#endif -}; - - -/** - * See http://www.devmaster.net/forums/showthread.php?p=43580 - */ -void -lp_build_log2_approx(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef *p_exp, - LLVMValueRef *p_floor_log2, - LLVMValueRef *p_log2) -{ - const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - - LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); - LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); - LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); - - LLVMValueRef i = NULL; - LLVMValueRef exp = NULL; - LLVMValueRef mant = NULL; - LLVMValueRef logexp = NULL; - LLVMValueRef logmant = NULL; - LLVMValueRef res = NULL; - - if(p_exp || p_floor_log2 || p_log2) { - /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); - - assert(type.floating && type.width == 32); - - i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); - - /* exp = (float) exponent(x) */ - exp = LLVMBuildAnd(bld->builder, i, expmask, ""); - } - - if(p_floor_log2 || p_log2) { - logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); - logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); - logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); - } - - if(p_log2) { - /* mant = (float) mantissa(x) */ - mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); - mant = LLVMBuildOr(bld->builder, mant, one, ""); - mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); - - logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, - Elements(lp_build_log2_polynomial)); - - /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); - - res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); - } - - if(p_exp) - *p_exp = exp; - - if(p_floor_log2) - *p_floor_log2 = logexp; - - if(p_log2) - *p_log2 = res; -} - - -LLVMValueRef -lp_build_log2(struct lp_build_context *bld, - LLVMValueRef x) -{ - LLVMValueRef res; - lp_build_log2_approx(bld, x, NULL, NULL, &res); - return res; -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h deleted file mode 100644 index 62be4b9aee1..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ /dev/null @@ -1,203 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper arithmetic functions. - * - * @author Jose Fonseca - */ - - -#ifndef LP_BLD_ARIT_H -#define LP_BLD_ARIT_H - - -#include - - -struct lp_type; -struct lp_build_context; - - -/** - * Complement, i.e., 1 - a. - */ -LLVMValueRef -lp_build_comp(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_add(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b); - -LLVMValueRef -lp_build_sub(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b); - -LLVMValueRef -lp_build_mul(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b); - -LLVMValueRef -lp_build_mul_imm(struct lp_build_context *bld, - LLVMValueRef a, - int b); - -LLVMValueRef -lp_build_div(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b); - -LLVMValueRef -lp_build_lerp(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef v0, - LLVMValueRef v1); - -/** - * Bilinear interpolation. - * - * Values indices are in v_{yx}. - */ -LLVMValueRef -lp_build_lerp_2d(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef v00, - LLVMValueRef v01, - LLVMValueRef v10, - LLVMValueRef v11); - -LLVMValueRef -lp_build_min(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b); - -LLVMValueRef -lp_build_max(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b); - -LLVMValueRef -lp_build_abs(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_sgn(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_round(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_floor(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_ceil(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_trunc(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_ifloor(struct lp_build_context *bld, - LLVMValueRef a); -LLVMValueRef -lp_build_iceil(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_iround(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_itrunc(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_sqrt(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_rcp(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_rsqrt(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_cos(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_sin(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_pow(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b); - -LLVMValueRef -lp_build_exp(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_log(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_exp2(struct lp_build_context *bld, - LLVMValueRef a); - -LLVMValueRef -lp_build_log2(struct lp_build_context *bld, - LLVMValueRef a); - -void -lp_build_exp2_approx(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef *p_exp2_int_part, - LLVMValueRef *p_frac_part, - LLVMValueRef *p_exp2); - -void -lp_build_log2_approx(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef *p_exp, - LLVMValueRef *p_floor_log2, - LLVMValueRef *p_log2); - -#endif /* !LP_BLD_ARIT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h deleted file mode 100644 index da272e549f3..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ /dev/null @@ -1,107 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_BLD_BLEND_H -#define LP_BLD_BLEND_H - - -/** - * @file - * LLVM IR building helpers interfaces. - * - * We use LLVM-C bindings for now. They are not documented, but follow the C++ - * interfaces very closely, and appear to be complete enough for code - * genration. See - * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - * for a standalone example. - */ - -#include - -#include "pipe/p_format.h" - - -struct pipe_blend_state; -struct lp_type; -struct lp_build_context; - - -/** - * Whether the blending function is commutative or not. - */ -boolean -lp_build_blend_func_commutative(unsigned func); - - -/** - * Whether the blending functions are the reverse of each other. - */ -boolean -lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func); - - -LLVMValueRef -lp_build_blend_func(struct lp_build_context *bld, - unsigned func, - LLVMValueRef term1, - LLVMValueRef term2); - - -LLVMValueRef -lp_build_blend_aos(LLVMBuilderRef builder, - const struct pipe_blend_state *blend, - struct lp_type type, - LLVMValueRef src, - LLVMValueRef dst, - LLVMValueRef const_, - unsigned alpha_swizzle); - - -void -lp_build_blend_soa(LLVMBuilderRef builder, - const struct pipe_blend_state *blend, - struct lp_type type, - LLVMValueRef src[4], - LLVMValueRef dst[4], - LLVMValueRef const_[4], - LLVMValueRef res[4]); - - -/** - * Apply a logic op. - * - * src/dst parameters are packed values. It should work regardless the inputs - * are scalars, or a vector. - */ -LLVMValueRef -lp_build_logicop(LLVMBuilderRef builder, - unsigned logicop_func, - LLVMValueRef src, - LLVMValueRef dst); - - -#endif /* !LP_BLD_BLEND_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c deleted file mode 100644 index 0215bb72ac6..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ /dev/null @@ -1,360 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Blend LLVM IR generation -- AoS layout. - * - * AoS blending is in general much slower than SoA, but there are some cases - * where it might be faster. In particular, if a pixel is rendered only once - * then the overhead of tiling and untiling will dominate over the speedup that - * SoA gives. So we might want to detect such cases and fallback to AoS in the - * future, but for now this function is here for historical/benchmarking - * purposes. - * - * Run lp_blend_test after any change to this file. - * - * @author Jose Fonseca - */ - - -#include "pipe/p_state.h" -#include "util/u_debug.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_arit.h" -#include "lp_bld_logic.h" -#include "lp_bld_swizzle.h" -#include "lp_bld_blend.h" -#include "lp_bld_debug.h" - - -/** - * We may the same values several times, so we keep them here to avoid - * recomputing them. Also reusing the values allows us to do simplifications - * that LLVM optimization passes wouldn't normally be able to do. - */ -struct lp_build_blend_aos_context -{ - struct lp_build_context base; - - LLVMValueRef src; - LLVMValueRef dst; - LLVMValueRef const_; - - LLVMValueRef inv_src; - LLVMValueRef inv_dst; - LLVMValueRef inv_const; - LLVMValueRef saturate; - - LLVMValueRef rgb_src_factor; - LLVMValueRef alpha_src_factor; - LLVMValueRef rgb_dst_factor; - LLVMValueRef alpha_dst_factor; -}; - - -static LLVMValueRef -lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, - unsigned factor, - boolean alpha) -{ - switch (factor) { - case PIPE_BLENDFACTOR_ZERO: - return bld->base.zero; - case PIPE_BLENDFACTOR_ONE: - return bld->base.one; - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_SRC_ALPHA: - return bld->src; - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_DST_ALPHA: - return bld->dst; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - if(alpha) - return bld->base.one; - else { - if(!bld->inv_dst) - bld->inv_dst = lp_build_comp(&bld->base, bld->dst); - if(!bld->saturate) - bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); - return bld->saturate; - } - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_CONST_ALPHA: - return bld->const_; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* TODO */ - assert(0); - return bld->base.zero; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - if(!bld->inv_src) - bld->inv_src = lp_build_comp(&bld->base, bld->src); - return bld->inv_src; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - if(!bld->inv_dst) - bld->inv_dst = lp_build_comp(&bld->base, bld->dst); - return bld->inv_dst; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - if(!bld->inv_const) - bld->inv_const = lp_build_comp(&bld->base, bld->const_); - return bld->inv_const; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* TODO */ - assert(0); - return bld->base.zero; - default: - assert(0); - return bld->base.zero; - } -} - - -enum lp_build_blend_swizzle { - LP_BUILD_BLEND_SWIZZLE_RGBA = 0, - LP_BUILD_BLEND_SWIZZLE_AAAA = 1 -}; - - -/** - * How should we shuffle the base factor. - */ -static enum lp_build_blend_swizzle -lp_build_blend_factor_swizzle(unsigned factor) -{ - switch (factor) { - case PIPE_BLENDFACTOR_ONE: - case PIPE_BLENDFACTOR_ZERO: - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return LP_BUILD_BLEND_SWIZZLE_RGBA; - case PIPE_BLENDFACTOR_SRC_ALPHA: - case PIPE_BLENDFACTOR_DST_ALPHA: - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_CONST_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return LP_BUILD_BLEND_SWIZZLE_AAAA; - default: - assert(0); - return LP_BUILD_BLEND_SWIZZLE_RGBA; - } -} - - -static LLVMValueRef -lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, - LLVMValueRef rgb, - LLVMValueRef alpha, - enum lp_build_blend_swizzle rgb_swizzle, - unsigned alpha_swizzle) -{ - if(rgb == alpha) { - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) - return rgb; - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) - return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); - } - else { - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) { - boolean cond[4] = {0, 0, 0, 0}; - cond[alpha_swizzle] = 1; - return lp_build_select_aos(&bld->base, alpha, rgb, cond); - } - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { - unsigned char swizzle[4]; - swizzle[0] = alpha_swizzle; - swizzle[1] = alpha_swizzle; - swizzle[2] = alpha_swizzle; - swizzle[3] = alpha_swizzle; - swizzle[alpha_swizzle] += 4; - return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle); - } - } - assert(0); - return bld->base.undef; -} - - -/** - * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml - */ -static LLVMValueRef -lp_build_blend_factor(struct lp_build_blend_aos_context *bld, - LLVMValueRef factor1, - unsigned rgb_factor, - unsigned alpha_factor, - unsigned alpha_swizzle) -{ - LLVMValueRef rgb_factor_; - LLVMValueRef alpha_factor_; - LLVMValueRef factor2; - enum lp_build_blend_swizzle rgb_swizzle; - - rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); - alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); - - rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); - - factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); - - return lp_build_mul(&bld->base, factor1, factor2); -} - - -boolean -lp_build_blend_func_commutative(unsigned func) -{ - switch (func) { - case PIPE_BLEND_ADD: - case PIPE_BLEND_MIN: - case PIPE_BLEND_MAX: - return TRUE; - case PIPE_BLEND_SUBTRACT: - case PIPE_BLEND_REVERSE_SUBTRACT: - return FALSE; - default: - assert(0); - return TRUE; - } -} - - -boolean -lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) -{ - if(rgb_func == alpha_func) - return FALSE; - if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) - return TRUE; - if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) - return TRUE; - return FALSE; -} - - -/** - * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml - */ -LLVMValueRef -lp_build_blend_func(struct lp_build_context *bld, - unsigned func, - LLVMValueRef term1, - LLVMValueRef term2) -{ - switch (func) { - case PIPE_BLEND_ADD: - return lp_build_add(bld, term1, term2); - break; - case PIPE_BLEND_SUBTRACT: - return lp_build_sub(bld, term1, term2); - case PIPE_BLEND_REVERSE_SUBTRACT: - return lp_build_sub(bld, term2, term1); - case PIPE_BLEND_MIN: - return lp_build_min(bld, term1, term2); - case PIPE_BLEND_MAX: - return lp_build_max(bld, term1, term2); - default: - assert(0); - return bld->zero; - } -} - - -LLVMValueRef -lp_build_blend_aos(LLVMBuilderRef builder, - const struct pipe_blend_state *blend, - struct lp_type type, - LLVMValueRef src, - LLVMValueRef dst, - LLVMValueRef const_, - unsigned alpha_swizzle) -{ - struct lp_build_blend_aos_context bld; - LLVMValueRef src_term; - LLVMValueRef dst_term; - - /* FIXME */ - assert(blend->independent_blend_enable == 0); - assert(blend->rt[0].colormask == 0xf); - - if(!blend->rt[0].blend_enable) - return src; - - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); - - /* Setup build context */ - memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, builder, type); - bld.src = src; - bld.dst = dst; - bld.const_ = const_; - - /* TODO: There are still a few optimization opportunities here. For certain - * combinations it is possible to reorder the operations and therefore saving - * some instructions. */ - - src_term = lp_build_blend_factor(&bld, src, blend->rt[0].rgb_src_factor, - blend->rt[0].alpha_src_factor, alpha_swizzle); - dst_term = lp_build_blend_factor(&bld, dst, blend->rt[0].rgb_dst_factor, - blend->rt[0].alpha_dst_factor, alpha_swizzle); - - lp_build_name(src_term, "src_term"); - lp_build_name(dst_term, "dst_term"); - - if(blend->rt[0].rgb_func == blend->rt[0].alpha_func) { - return lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); - } - else { - /* Seperate RGB / A functions */ - - LLVMValueRef rgb; - LLVMValueRef alpha; - - rgb = lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&bld.base, blend->rt[0].alpha_func, src_term, dst_term); - - return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c deleted file mode 100644 index 1eac0a5c891..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c +++ /dev/null @@ -1,109 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Blend LLVM IR generation -- logic ops. - * - * @author Jose Fonseca - */ - - -#include "pipe/p_state.h" -#include "util/u_debug.h" - -#include "lp_bld_blend.h" - - -LLVMValueRef -lp_build_logicop(LLVMBuilderRef builder, - unsigned logicop_func, - LLVMValueRef src, - LLVMValueRef dst) -{ - LLVMTypeRef type; - LLVMValueRef res; - - type = LLVMTypeOf(src); - - switch (logicop_func) { - case PIPE_LOGICOP_CLEAR: - res = LLVMConstNull(type); - break; - case PIPE_LOGICOP_NOR: - res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), ""); - break; - case PIPE_LOGICOP_AND_INVERTED: - res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, ""); - break; - case PIPE_LOGICOP_COPY_INVERTED: - res = LLVMBuildNot(builder, src, ""); - break; - case PIPE_LOGICOP_AND_REVERSE: - res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), ""); - break; - case PIPE_LOGICOP_INVERT: - res = LLVMBuildNot(builder, dst, ""); - break; - case PIPE_LOGICOP_XOR: - res = LLVMBuildXor(builder, src, dst, ""); - break; - case PIPE_LOGICOP_NAND: - res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), ""); - break; - case PIPE_LOGICOP_AND: - res = LLVMBuildAnd(builder, src, dst, ""); - break; - case PIPE_LOGICOP_EQUIV: - res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), ""); - break; - case PIPE_LOGICOP_NOOP: - res = dst; - break; - case PIPE_LOGICOP_OR_INVERTED: - res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, ""); - break; - case PIPE_LOGICOP_COPY: - res = src; - break; - case PIPE_LOGICOP_OR_REVERSE: - res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), ""); - break; - case PIPE_LOGICOP_OR: - res = LLVMBuildOr(builder, src, dst, ""); - break; - case PIPE_LOGICOP_SET: - res = LLVMConstAllOnes(type); - break; - default: - assert(0); - res = src; - } - - return res; -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c deleted file mode 100644 index 6d5a45db7a3..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ /dev/null @@ -1,298 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Blend LLVM IR generation -- SoA layout. - * - * Blending in SoA is much faster than AoS, especially when separate rgb/alpha - * factors/functions are used, since no channel masking/shuffling is necessary - * and we can achieve the full throughput of the SIMD operations. Furthermore - * the fragment shader output is also in SoA, so it fits nicely with the rest of - * the fragment pipeline. - * - * The drawback is that to be displayed the color buffer needs to be in AoS - * layout, so we need to tile/untile the color buffer before/after rendering. - * A color buffer like - * - * R11 G11 B11 A11 R12 G12 B12 A12 R13 G13 B13 A13 R14 G14 B14 A14 ... - * R21 G21 B21 A21 R22 G22 B22 A22 R23 G23 B23 A23 R24 G24 B24 A24 ... - * - * R31 G31 B31 A31 R32 G32 B32 A32 R33 G33 B33 A33 R34 G34 B34 A34 ... - * R41 G41 B41 A41 R42 G42 B42 A42 R43 G43 B43 A43 R44 G44 B44 A44 ... - * - * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... - * - * will actually be stored in memory as - * - * R11 R12 R21 R22 R13 R14 R23 R24 ... G11 G12 G21 G22 G13 G14 G23 G24 ... B11 B12 B21 B22 B13 B14 B23 B24 ... A11 A12 A21 A22 A13 A14 A23 A24 ... - * R31 R32 R41 R42 R33 R34 R43 R44 ... G31 G32 G41 G42 G33 G34 G43 G44 ... B31 B32 B41 B42 B33 B34 B43 B44 ... A31 A32 A41 A42 A33 A34 A43 A44 ... - * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... - * - * NOTE: Run lp_blend_test after any change to this file. - * - * You can also run lp_blend_test to obtain AoS vs SoA benchmarks. Invoking it - * as: - * - * lp_blend_test -o blend.tsv - * - * will generate a tab-seperated-file with the test results and performance - * measurements. - * - * @author Jose Fonseca - */ - - -#include "pipe/p_state.h" -#include "util/u_debug.h" - -#include "lp_bld_type.h" -#include "lp_bld_arit.h" -#include "lp_bld_blend.h" - - -/** - * We may the same values several times, so we keep them here to avoid - * recomputing them. Also reusing the values allows us to do simplifications - * that LLVM optimization passes wouldn't normally be able to do. - */ -struct lp_build_blend_soa_context -{ - struct lp_build_context base; - - LLVMValueRef src[4]; - LLVMValueRef dst[4]; - LLVMValueRef con[4]; - - LLVMValueRef inv_src[4]; - LLVMValueRef inv_dst[4]; - LLVMValueRef inv_con[4]; - - LLVMValueRef src_alpha_saturate; - - /** - * We store all factors in a table in order to eliminate redundant - * multiplications later. - */ - LLVMValueRef factor[2][2][4]; - - /** - * Table with all terms. - */ - LLVMValueRef term[2][4]; -}; - - -static LLVMValueRef -lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, - unsigned factor, unsigned i) -{ - /* - * Compute src/first term RGB - */ - switch (factor) { - case PIPE_BLENDFACTOR_ONE: - return bld->base.one; - case PIPE_BLENDFACTOR_SRC_COLOR: - return bld->src[i]; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return bld->src[3]; - case PIPE_BLENDFACTOR_DST_COLOR: - return bld->dst[i]; - case PIPE_BLENDFACTOR_DST_ALPHA: - return bld->dst[3]; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - if(i == 3) - return bld->base.one; - else { - if(!bld->inv_dst[3]) - bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); - if(!bld->src_alpha_saturate) - bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]); - return bld->src_alpha_saturate; - } - case PIPE_BLENDFACTOR_CONST_COLOR: - return bld->con[i]; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return bld->con[3]; - case PIPE_BLENDFACTOR_SRC1_COLOR: - /* TODO */ - assert(0); - return bld->base.zero; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* TODO */ - assert(0); - return bld->base.zero; - case PIPE_BLENDFACTOR_ZERO: - return bld->base.zero; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - if(!bld->inv_src[i]) - bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]); - return bld->inv_src[i]; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - if(!bld->inv_src[3]) - bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]); - return bld->inv_src[3]; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - if(!bld->inv_dst[i]) - bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]); - return bld->inv_dst[i]; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - if(!bld->inv_dst[3]) - bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); - return bld->inv_dst[3]; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - if(!bld->inv_con[i]) - bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]); - return bld->inv_con[i]; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - if(!bld->inv_con[3]) - bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]); - return bld->inv_con[3]; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - /* TODO */ - assert(0); - return bld->base.zero; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* TODO */ - assert(0); - return bld->base.zero; - default: - assert(0); - return bld->base.zero; - } -} - - -/** - * Generate blend code in SOA mode. - * \param src src/fragment color - * \param dst dst/framebuffer color - * \param con constant blend color - * \param res the result/output - */ -void -lp_build_blend_soa(LLVMBuilderRef builder, - const struct pipe_blend_state *blend, - struct lp_type type, - LLVMValueRef src[4], - LLVMValueRef dst[4], - LLVMValueRef con[4], - LLVMValueRef res[4]) -{ - struct lp_build_blend_soa_context bld; - unsigned i, j, k; - - /* Setup build context */ - memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, builder, type); - for (i = 0; i < 4; ++i) { - bld.src[i] = src[i]; - bld.dst[i] = dst[i]; - bld.con[i] = con[i]; - } - - for (i = 0; i < 4; ++i) { - if (blend->rt[0].colormask & (1 << i)) { - if (blend->logicop_enable) { - if(!type.floating) { - res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); - } - else - res[i] = dst[i]; - } - else if (blend->rt[0].blend_enable) { - unsigned src_factor = i < 3 ? blend->rt[0].rgb_src_factor : blend->rt[0].alpha_src_factor; - unsigned dst_factor = i < 3 ? blend->rt[0].rgb_dst_factor : blend->rt[0].alpha_dst_factor; - unsigned func = i < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; - boolean func_commutative = lp_build_blend_func_commutative(func); - - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); - - /* - * Compute src/dst factors. - */ - - bld.factor[0][0][i] = src[i]; - bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i); - bld.factor[1][0][i] = dst[i]; - bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i); - - /* - * Compute src/dst terms - */ - - for(k = 0; k < 2; ++k) { - /* See if this multiplication has been previously computed */ - for(j = 0; j < i; ++j) { - if((bld.factor[k][0][j] == bld.factor[k][0][i] && - bld.factor[k][1][j] == bld.factor[k][1][i]) || - (bld.factor[k][0][j] == bld.factor[k][1][i] && - bld.factor[k][1][j] == bld.factor[k][0][i])) - break; - } - - if(j < i) - bld.term[k][i] = bld.term[k][j]; - else - bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); - } - - /* - * Combine terms - */ - - /* See if this function has been previously applied */ - for(j = 0; j < i; ++j) { - unsigned prev_func = j < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; - unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); - - if((!func_reverse && - bld.term[0][j] == bld.term[0][i] && - bld.term[1][j] == bld.term[1][i]) || - ((func_commutative || func_reverse) && - bld.term[0][j] == bld.term[1][i] && - bld.term[1][j] == bld.term[0][i])) - break; - } - - if(j < i) - res[i] = res[j]; - else - res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]); - } - else { - res[i] = src[i]; - } - } - else { - res[i] = dst[i]; - } - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c deleted file mode 100644 index c8eaa8c3940..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.c +++ /dev/null @@ -1,369 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Helper functions for constant building. - * - * @author Jose Fonseca - */ - -#include - -#include "util/u_debug.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" - - -unsigned -lp_mantissa(struct lp_type type) -{ - assert(type.floating); - - if(type.floating) { - switch(type.width) { - case 32: - return 23; - case 64: - return 53; - default: - assert(0); - return 0; - } - } - else { - if(type.sign) - return type.width - 1; - else - return type.width; - } -} - - -/** - * Shift of the unity. - * - * Same as lp_const_scale(), but in terms of shifts. - */ -unsigned -lp_const_shift(struct lp_type type) -{ - if(type.floating) - return 0; - else if(type.fixed) - return type.width/2; - else if(type.norm) - return type.sign ? type.width - 1 : type.width; - else - return 0; -} - - -unsigned -lp_const_offset(struct lp_type type) -{ - if(type.floating || type.fixed) - return 0; - else if(type.norm) - return 1; - else - return 0; -} - - -/** - * Scaling factor between the LLVM native value and its interpretation. - * - * This is 1.0 for all floating types and unnormalized integers, and something - * else for the fixed points types and normalized integers. - */ -double -lp_const_scale(struct lp_type type) -{ - unsigned long long llscale; - double dscale; - - llscale = (unsigned long long)1 << lp_const_shift(type); - llscale -= lp_const_offset(type); - dscale = (double)llscale; - assert((unsigned long long)dscale == llscale); - - return dscale; -} - - -/** - * Minimum value representable by the type. - */ -double -lp_const_min(struct lp_type type) -{ - unsigned bits; - - if(!type.sign) - return 0.0; - - if(type.norm) - return -1.0; - - if (type.floating) { - switch(type.width) { - case 32: - return -FLT_MAX; - case 64: - return -DBL_MAX; - default: - assert(0); - return 0.0; - } - } - - if(type.fixed) - /* FIXME: consider the fractional bits? */ - bits = type.width / 2 - 1; - else - bits = type.width - 1; - - return (double)-((long long)1 << bits); -} - - -/** - * Maximum value representable by the type. - */ -double -lp_const_max(struct lp_type type) -{ - unsigned bits; - - if(type.norm) - return 1.0; - - if (type.floating) { - switch(type.width) { - case 32: - return FLT_MAX; - case 64: - return DBL_MAX; - default: - assert(0); - return 0.0; - } - } - - if(type.fixed) - bits = type.width / 2; - else - bits = type.width; - - if(type.sign) - bits -= 1; - - return (double)(((unsigned long long)1 << bits) - 1); -} - - -double -lp_const_eps(struct lp_type type) -{ - if (type.floating) { - switch(type.width) { - case 32: - return FLT_EPSILON; - case 64: - return DBL_EPSILON; - default: - assert(0); - return 0.0; - } - } - else { - double scale = lp_const_scale(type); - return 1.0/scale; - } -} - - -LLVMValueRef -lp_build_undef(struct lp_type type) -{ - LLVMTypeRef vec_type = lp_build_vec_type(type); - return LLVMGetUndef(vec_type); -} - - -LLVMValueRef -lp_build_zero(struct lp_type type) -{ - LLVMTypeRef vec_type = lp_build_vec_type(type); - return LLVMConstNull(vec_type); -} - - -LLVMValueRef -lp_build_one(struct lp_type type) -{ - LLVMTypeRef elem_type; - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(type.length <= LP_MAX_VECTOR_LENGTH); - - elem_type = lp_build_elem_type(type); - - if(type.floating) - elems[0] = LLVMConstReal(elem_type, 1.0); - else if(type.fixed) - elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0); - else if(!type.norm) - elems[0] = LLVMConstInt(elem_type, 1, 0); - else if(type.sign) - elems[0] = LLVMConstInt(elem_type, (1LL << (type.width - 1)) - 1, 0); - else { - /* special case' -- 1.0 for normalized types is more easily attained if - * we start with a vector consisting of all bits set */ - LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length); - LLVMValueRef vec = LLVMConstAllOnes(vec_type); - -#if 0 - if(type.sign) - /* TODO: Unfortunately this caused "Tried to create a shift operation - * on a non-integer type!" */ - vec = LLVMConstLShr(vec, lp_build_int_const_scalar(type, 1)); -#endif - - return vec; - } - - for(i = 1; i < type.length; ++i) - elems[i] = elems[0]; - - return LLVMConstVector(elems, type.length); -} - - -LLVMValueRef -lp_build_const_scalar(struct lp_type type, - double val) -{ - LLVMTypeRef elem_type = lp_build_elem_type(type); - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(type.length <= LP_MAX_VECTOR_LENGTH); - - if(type.floating) { - elems[0] = LLVMConstReal(elem_type, val); - } - else { - double dscale = lp_const_scale(type); - - elems[0] = LLVMConstInt(elem_type, val*dscale + 0.5, 0); - } - - for(i = 1; i < type.length; ++i) - elems[i] = elems[0]; - - return LLVMConstVector(elems, type.length); -} - - -LLVMValueRef -lp_build_int_const_scalar(struct lp_type type, - long long val) -{ - LLVMTypeRef elem_type = lp_build_int_elem_type(type); - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(type.length <= LP_MAX_VECTOR_LENGTH); - - for(i = 0; i < type.length; ++i) - elems[i] = LLVMConstInt(elem_type, val, type.sign ? 1 : 0); - - return LLVMConstVector(elems, type.length); -} - - -LLVMValueRef -lp_build_const_aos(struct lp_type type, - double r, double g, double b, double a, - const unsigned char *swizzle) -{ - const unsigned char default_swizzle[4] = {0, 1, 2, 3}; - LLVMTypeRef elem_type; - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(type.length % 4 == 0); - assert(type.length <= LP_MAX_VECTOR_LENGTH); - - elem_type = lp_build_elem_type(type); - - if(swizzle == NULL) - swizzle = default_swizzle; - - if(type.floating) { - elems[swizzle[0]] = LLVMConstReal(elem_type, r); - elems[swizzle[1]] = LLVMConstReal(elem_type, g); - elems[swizzle[2]] = LLVMConstReal(elem_type, b); - elems[swizzle[3]] = LLVMConstReal(elem_type, a); - } - else { - double dscale = lp_const_scale(type); - - elems[swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0); - elems[swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0); - elems[swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0); - elems[swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0); - } - - for(i = 4; i < type.length; ++i) - elems[i] = elems[i % 4]; - - return LLVMConstVector(elems, type.length); -} - - -LLVMValueRef -lp_build_const_mask_aos(struct lp_type type, - const boolean cond[4]) -{ - LLVMTypeRef elem_type = LLVMIntType(type.width); - LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; - - assert(type.length <= LP_MAX_VECTOR_LENGTH); - - for(j = 0; j < type.length; j += 4) - for(i = 0; i < 4; ++i) - masks[j + i] = LLVMConstInt(elem_type, cond[i] ? ~0 : 0, 0); - - return LLVMConstVector(masks, type.length); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h deleted file mode 100644 index cb8e1c7b006..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ /dev/null @@ -1,108 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper functions for constant building. - * - * @author Jose Fonseca - */ - - -#ifndef LP_BLD_CONST_H -#define LP_BLD_CONST_H - - -#include - -#include - - -struct lp_type; - - -unsigned -lp_mantissa(struct lp_type type); - - -unsigned -lp_const_shift(struct lp_type type); - - -unsigned -lp_const_offset(struct lp_type type); - - -double -lp_const_scale(struct lp_type type); - -double -lp_const_min(struct lp_type type); - - -double -lp_const_max(struct lp_type type); - - -double -lp_const_eps(struct lp_type type); - - -LLVMValueRef -lp_build_undef(struct lp_type type); - - -LLVMValueRef -lp_build_zero(struct lp_type type); - - -LLVMValueRef -lp_build_one(struct lp_type type); - - -LLVMValueRef -lp_build_const_scalar(struct lp_type type, - double val); - - -LLVMValueRef -lp_build_int_const_scalar(struct lp_type type, - long long val); - - -LLVMValueRef -lp_build_const_aos(struct lp_type type, - double r, double g, double b, double a, - const unsigned char *swizzle); - - -LLVMValueRef -lp_build_const_mask_aos(struct lp_type type, - const boolean cond[4]); - - -#endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c deleted file mode 100644 index f77cf787213..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ /dev/null @@ -1,469 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Helper functions for type conversions. - * - * We want to use the fastest type for a given computation whenever feasible. - * The other side of this is that we need to be able convert between several - * types accurately and efficiently. - * - * Conversion between types of different bit width is quite complex since a - * - * To remember there are a few invariants in type conversions: - * - * - register width must remain constant: - * - * src_type.width * src_type.length == dst_type.width * dst_type.length - * - * - total number of elements must remain constant: - * - * src_type.length * num_srcs == dst_type.length * num_dsts - * - * It is not always possible to do the conversion both accurately and - * efficiently, usually due to lack of adequate machine instructions. In these - * cases it is important not to cut shortcuts here and sacrifice accuracy, as - * there this functions can be used anywhere. In the future we might have a - * precision parameter which can gauge the accuracy vs efficiency compromise, - * but for now if the data conversion between two stages happens to be the - * bottleneck, then most likely should just avoid converting at all and run - * both stages with the same type. - * - * Make sure to run lp_test_conv unit test after any change to this file. - * - * @author Jose Fonseca - */ - - -#include "util/u_debug.h" -#include "util/u_math.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_arit.h" -#include "lp_bld_pack.h" -#include "lp_bld_conv.h" - - -/** - * Special case for converting clamped IEEE-754 floats to unsigned norms. - * - * The mathematical voodoo below may seem excessive but it is actually - * paramount we do it this way for several reasons. First, there is no single - * precision FP to unsigned integer conversion Intel SSE instruction. Second, - * secondly, even if there was, since the FP's mantissa takes only a fraction - * of register bits the typically scale and cast approach would require double - * precision for accurate results, and therefore half the throughput - * - * Although the result values can be scaled to an arbitrary bit width specified - * by dst_width, the actual result type will have the same width. - */ -LLVMValueRef -lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - struct lp_type src_type, - unsigned dst_width, - LLVMValueRef src) -{ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); - LLVMValueRef res; - unsigned mantissa; - unsigned n; - unsigned long long ubound; - unsigned long long mask; - double scale; - double bias; - - assert(src_type.floating); - - mantissa = lp_mantissa(src_type); - - /* We cannot carry more bits than the mantissa */ - n = MIN2(mantissa, dst_width); - - /* This magic coefficients will make the desired result to appear in the - * lowest significant bits of the mantissa. - */ - ubound = ((unsigned long long)1 << n); - mask = ubound - 1; - scale = (double)mask/ubound; - bias = (double)((unsigned long long)1 << (mantissa - n)); - - res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); - res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); - res = LLVMBuildBitCast(builder, res, int_vec_type, ""); - - if(dst_width > n) { - int shift = dst_width - n; - res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); - - /* TODO: Fill in the empty lower bits for additional precision? */ - /* YES: this fixes progs/trivial/tri-z-eq.c. - * Otherwise vertex Z=1.0 values get converted to something like - * 0xfffffb00 and the test for equality with 0xffffffff fails. - */ -#if 0 - { - LLVMValueRef msb; - msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); - msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); - msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); - res = LLVMBuildOr(builder, res, msb, ""); - } -#elif 0 - while(shift > 0) { - res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); - shift -= n; - n *= 2; - } -#endif - } - else - res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); - - return res; -} - - -/** - * Inverse of lp_build_clamped_float_to_unsigned_norm above. - */ -LLVMValueRef -lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, - unsigned src_width, - struct lp_type dst_type, - LLVMValueRef src) -{ - LLVMTypeRef vec_type = lp_build_vec_type(dst_type); - LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type); - LLVMValueRef bias_; - LLVMValueRef res; - unsigned mantissa; - unsigned n; - unsigned long long ubound; - unsigned long long mask; - double scale; - double bias; - - mantissa = lp_mantissa(dst_type); - - n = MIN2(mantissa, src_width); - - ubound = ((unsigned long long)1 << n); - mask = ubound - 1; - scale = (double)ubound/mask; - bias = (double)((unsigned long long)1 << (mantissa - n)); - - res = src; - - if(src_width > mantissa) { - int shift = src_width - mantissa; - res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), ""); - } - - bias_ = lp_build_const_scalar(dst_type, bias); - - res = LLVMBuildOr(builder, - res, - LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); - - res = LLVMBuildBitCast(builder, res, vec_type, ""); - - res = LLVMBuildSub(builder, res, bias_, ""); - res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), ""); - - return res; -} - - -/** - * Generic type conversion. - * - * TODO: Take a precision argument, or even better, add a new precision member - * to the lp_type union. - */ -void -lp_build_conv(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - const LLVMValueRef *src, unsigned num_srcs, - LLVMValueRef *dst, unsigned num_dsts) -{ - struct lp_type tmp_type; - LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; - unsigned num_tmps; - unsigned i; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length * num_srcs == dst_type.length * num_dsts); - - assert(src_type.length <= LP_MAX_VECTOR_LENGTH); - assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); - - tmp_type = src_type; - for(i = 0; i < num_srcs; ++i) - tmp[i] = src[i]; - num_tmps = num_srcs; - - /* - * Clamp if necessary - */ - - if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { - struct lp_build_context bld; - double src_min = lp_const_min(src_type); - double dst_min = lp_const_min(dst_type); - double src_max = lp_const_max(src_type); - double dst_max = lp_const_max(dst_type); - LLVMValueRef thres; - - lp_build_context_init(&bld, builder, tmp_type); - - if(src_min < dst_min) { - if(dst_min == 0.0) - thres = bld.zero; - else - thres = lp_build_const_scalar(src_type, dst_min); - for(i = 0; i < num_tmps; ++i) - tmp[i] = lp_build_max(&bld, tmp[i], thres); - } - - if(src_max > dst_max) { - if(dst_max == 1.0) - thres = bld.one; - else - thres = lp_build_const_scalar(src_type, dst_max); - for(i = 0; i < num_tmps; ++i) - tmp[i] = lp_build_min(&bld, tmp[i], thres); - } - } - - /* - * Scale to the narrowest range - */ - - if(dst_type.floating) { - /* Nothing to do */ - } - else if(tmp_type.floating) { - if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { - for(i = 0; i < num_tmps; ++i) { - tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder, - tmp_type, - dst_type.width, - tmp[i]); - } - tmp_type.floating = FALSE; - } - else { - double dst_scale = lp_const_scale(dst_type); - LLVMTypeRef tmp_vec_type; - - if (dst_scale != 1.0) { - LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale); - for(i = 0; i < num_tmps; ++i) - tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); - } - - /* Use an equally sized integer for intermediate computations */ - tmp_type.floating = FALSE; - tmp_vec_type = lp_build_vec_type(tmp_type); - for(i = 0; i < num_tmps; ++i) { -#if 0 - if(dst_type.sign) - tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); - else - tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); -#else - /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ - tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); -#endif - } - } - } - else { - unsigned src_shift = lp_const_shift(src_type); - unsigned dst_shift = lp_const_shift(dst_type); - - /* FIXME: compensate different offsets too */ - if(src_shift > dst_shift) { - LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift); - for(i = 0; i < num_tmps; ++i) - if(src_type.sign) - tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); - else - tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); - } - } - - /* - * Truncate or expand bit width - */ - - assert(!tmp_type.floating || tmp_type.width == dst_type.width); - - if(tmp_type.width > dst_type.width) { - assert(num_dsts == 1); - tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); - tmp_type.width = dst_type.width; - tmp_type.length = dst_type.length; - num_tmps = 1; - } - - if(tmp_type.width < dst_type.width) { - assert(num_tmps == 1); - lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); - tmp_type.width = dst_type.width; - tmp_type.length = dst_type.length; - num_tmps = num_dsts; - } - - assert(tmp_type.width == dst_type.width); - assert(tmp_type.length == dst_type.length); - assert(num_tmps == num_dsts); - - /* - * Scale to the widest range - */ - - if(src_type.floating) { - /* Nothing to do */ - } - else if(!src_type.floating && dst_type.floating) { - if(!src_type.fixed && !src_type.sign && src_type.norm) { - for(i = 0; i < num_tmps; ++i) { - tmp[i] = lp_build_unsigned_norm_to_float(builder, - src_type.width, - dst_type, - tmp[i]); - } - tmp_type.floating = TRUE; - } - else { - double src_scale = lp_const_scale(src_type); - LLVMTypeRef tmp_vec_type; - - /* Use an equally sized integer for intermediate computations */ - tmp_type.floating = TRUE; - tmp_type.sign = TRUE; - tmp_vec_type = lp_build_vec_type(tmp_type); - for(i = 0; i < num_tmps; ++i) { -#if 0 - if(dst_type.sign) - tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); - else - tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, ""); -#else - /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */ - tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); -#endif - } - - if (src_scale != 1.0) { - LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale); - for(i = 0; i < num_tmps; ++i) - tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); - } - } - } - else { - unsigned src_shift = lp_const_shift(src_type); - unsigned dst_shift = lp_const_shift(dst_type); - - /* FIXME: compensate different offsets too */ - if(src_shift < dst_shift) { - LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift); - for(i = 0; i < num_tmps; ++i) - tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); - } - } - - for(i = 0; i < num_dsts; ++i) - dst[i] = tmp[i]; -} - - -/** - * Bit mask conversion. - * - * This will convert the integer masks that match the given types. - * - * The mask values should 0 or -1, i.e., all bits either set to zero or one. - * Any other value will likely cause in unpredictable results. - * - * This is basically a very trimmed down version of lp_build_conv. - */ -void -lp_build_conv_mask(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - const LLVMValueRef *src, unsigned num_srcs, - LLVMValueRef *dst, unsigned num_dsts) -{ - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length * num_srcs == dst_type.length * num_dsts); - - /* - * Drop - * - * We assume all values are 0 or -1 - */ - - src_type.floating = FALSE; - src_type.fixed = FALSE; - src_type.sign = TRUE; - src_type.norm = FALSE; - - dst_type.floating = FALSE; - dst_type.fixed = FALSE; - dst_type.sign = TRUE; - dst_type.norm = FALSE; - - /* - * Truncate or expand bit width - */ - - if(src_type.width > dst_type.width) { - assert(num_dsts == 1); - dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); - } - else if(src_type.width < dst_type.width) { - assert(num_srcs == 1); - lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts); - } - else { - assert(num_srcs == num_dsts); - memcpy(dst, src, num_dsts * sizeof *dst); - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h deleted file mode 100644 index 948e68fae4f..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ /dev/null @@ -1,73 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper functions for type conversions. - * - * @author Jose Fonseca - */ - - -#ifndef LP_BLD_CONV_H -#define LP_BLD_CONV_H - - -#include - - -struct lp_type; - - -LLVMValueRef -lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - struct lp_type src_type, - unsigned dst_width, - LLVMValueRef src); - -LLVMValueRef -lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, - unsigned src_width, - struct lp_type dst_type, - LLVMValueRef src); - - -void -lp_build_conv(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - const LLVMValueRef *srcs, unsigned num_srcs, - LLVMValueRef *dsts, unsigned num_dsts); - -void -lp_build_conv_mask(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - const LLVMValueRef *src, unsigned num_srcs, - LLVMValueRef *dst, unsigned num_dsts); - -#endif /* !LP_BLD_CONV_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c deleted file mode 100644 index 39dfc51e503..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ /dev/null @@ -1,132 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifdef HAVE_UDIS86 -#include -#endif - -#include "util/u_math.h" -#include "util/u_debug.h" -#include "lp_bld_debug.h" - - -/** - * Check alignment. - * - * It is important that this check is not implemented as a macro or inlined - * function, as the compiler assumptions in respect to alignment of global - * and stack variables would often make the check a no op, defeating the - * whole purpose of the exercise. - */ -boolean -lp_check_alignment(const void *ptr, unsigned alignment) -{ - assert(util_is_pot(alignment)); - return ((uintptr_t)ptr & (alignment - 1)) == 0; -} - - -void -lp_disassemble(const void* func) -{ -#ifdef HAVE_UDIS86 - ud_t ud_obj; - uint64_t max_jmp_pc; - - ud_init(&ud_obj); - - ud_set_input_buffer(&ud_obj, (void*)func, 0xffff); - - max_jmp_pc = (uint64_t) (uintptr_t) func; - ud_set_pc(&ud_obj, max_jmp_pc); - -#ifdef PIPE_ARCH_X86 - ud_set_mode(&ud_obj, 32); -#endif -#ifdef PIPE_ARCH_X86_64 - ud_set_mode(&ud_obj, 64); -#endif - - ud_set_syntax(&ud_obj, UD_SYN_ATT); - - while (ud_disassemble(&ud_obj)) { - -#ifdef PIPE_ARCH_X86 - debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); -#endif -#ifdef PIPE_ARCH_X86_64 - debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); -#endif - -#if 0 - debug_printf("%-16s ", ud_insn_hex(&ud_obj)); -#endif - - debug_printf("%s\n", ud_insn_asm(&ud_obj)); - - if(ud_obj.mnemonic != UD_Icall) { - unsigned i; - for(i = 0; i < 3; ++i) { - const struct ud_operand *op = &ud_obj.operand[i]; - if (op->type == UD_OP_JIMM){ - uint64_t pc = ud_obj.pc; - - switch (op->size) { - case 8: - pc += op->lval.sbyte; - break; - case 16: - pc += op->lval.sword; - break; - case 32: - pc += op->lval.sdword; - break; - default: - break; - } - if(pc > max_jmp_pc) - max_jmp_pc = pc; - } - } - } - - if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) || - ud_obj.mnemonic == UD_Iinvalid) - break; - } - -#if 0 - /* Print GDB command, useful to verify udis86 output */ - debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc); -#endif - - debug_printf("\n"); -#else - (void)func; -#endif -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h deleted file mode 100644 index 583e6132b4b..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h +++ /dev/null @@ -1,64 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_BLD_DEBUG_H -#define LP_BLD_DEBUG_H - - -#include - -#include "pipe/p_compiler.h" -#include "util/u_string.h" - - -static INLINE void -lp_build_name(LLVMValueRef val, const char *format, ...) -{ -#ifdef DEBUG - char name[32]; - va_list ap; - va_start(ap, format); - util_vsnprintf(name, sizeof name, format, ap); - va_end(ap); - LLVMSetValueName(val, name); -#else - (void)val; - (void)format; -#endif -} - - -boolean -lp_check_alignment(const void *ptr, unsigned alignment); - - -void -lp_disassemble(const void* func); - - -#endif /* !LP_BLD_DEBUG_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c deleted file mode 100644 index d438c0e63d7..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ /dev/null @@ -1,213 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Depth/stencil testing to LLVM IR translation. - * - * To be done accurately/efficiently the depth/stencil test must be done with - * the same type/format of the depth/stencil buffer, which implies massaging - * the incoming depths to fit into place. Using a more straightforward - * type/format for depth/stencil values internally and only convert when - * flushing would avoid this, but it would most likely result in depth fighting - * artifacts. - * - * We are free to use a different pixel layout though. Since our basic - * processing unit is a quad (2x2 pixel block) we store the depth/stencil - * values tiled, a quad at time. That is, a depth buffer containing - * - * Z11 Z12 Z13 Z14 ... - * Z21 Z22 Z23 Z24 ... - * Z31 Z32 Z33 Z34 ... - * Z41 Z42 Z43 Z44 ... - * ... ... ... ... ... - * - * will actually be stored in memory as - * - * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ... - * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... - * ... ... ... ... ... ... ... ... ... - * - * FIXME: Code generate stencil test - * - * @author Jose Fonseca - */ - -#include "pipe/p_state.h" -#include "util/u_format.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_logic.h" -#include "lp_bld_flow.h" -#include "lp_bld_debug.h" -#include "lp_bld_depth.h" - - -/** - * Return a type appropriate for depth/stencil testing. - */ -struct lp_type -lp_depth_type(const struct util_format_description *format_desc, - unsigned length) -{ - struct lp_type type; - unsigned swizzle; - - assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); - assert(format_desc->block.width == 1); - assert(format_desc->block.height == 1); - - swizzle = format_desc->swizzle[0]; - assert(swizzle < 4); - - memset(&type, 0, sizeof type); - type.width = format_desc->block.bits; - - if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { - type.floating = TRUE; - assert(swizzle == 0); - assert(format_desc->channel[swizzle].size == format_desc->block.bits); - } - else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { - assert(format_desc->block.bits <= 32); - if(format_desc->channel[swizzle].normalized) - type.norm = TRUE; - } - else - assert(0); - - assert(type.width <= length); - type.length = length / type.width; - - return type; -} - - -/** - * Depth test. - */ -void -lp_build_depth_test(LLVMBuilderRef builder, - const struct pipe_depth_state *state, - struct lp_type type, - const struct util_format_description *format_desc, - struct lp_build_mask_context *mask, - LLVMValueRef src, - LLVMValueRef dst_ptr) -{ - struct lp_build_context bld; - unsigned z_swizzle; - LLVMValueRef dst; - LLVMValueRef z_bitmask = NULL; - LLVMValueRef test; - - if(!state->enabled) - return; - - assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); - assert(format_desc->block.width == 1); - assert(format_desc->block.height == 1); - - z_swizzle = format_desc->swizzle[0]; - if(z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) - return; - - /* Sanity checking */ - assert(z_swizzle < 4); - assert(format_desc->block.bits == type.width); - if(type.floating) { - assert(z_swizzle == 0); - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); - assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); - } - else { - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(format_desc->channel[z_swizzle].normalized); - assert(!type.fixed); - assert(!type.sign); - assert(type.norm); - } - - /* Setup build context */ - lp_build_context_init(&bld, builder, type); - - dst = LLVMBuildLoad(builder, dst_ptr, ""); - - lp_build_name(dst, "zsbuf"); - - /* Align the source depth bits with the destination's, and mask out any - * stencil or padding bits from both */ - if(format_desc->channel[z_swizzle].size == format_desc->block.bits) { - assert(z_swizzle == 0); - /* nothing to do */ - } - else { - unsigned padding_left; - unsigned padding_right; - unsigned chan; - - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits); - assert(format_desc->channel[z_swizzle].normalized); - - padding_right = 0; - for(chan = 0; chan < z_swizzle; ++chan) - padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - - (padding_right + format_desc->channel[z_swizzle].size); - - if(padding_left || padding_right) { - const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; - const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); - } - - if(padding_left) - src = LLVMBuildLShr(builder, src, lp_build_int_const_scalar(type, padding_left), ""); - if(padding_right) - src = LLVMBuildAnd(builder, src, z_bitmask, ""); - if(padding_left || padding_right) - dst = LLVMBuildAnd(builder, dst, z_bitmask, ""); - } - - lp_build_name(dst, "zsbuf.z"); - - test = lp_build_cmp(&bld, state->func, src, dst); - lp_build_mask_update(mask, test); - - if(state->writemask) { - if(z_bitmask) - z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, ""); - else - z_bitmask = mask->value; - - dst = lp_build_select(&bld, z_bitmask, src, dst); - LLVMBuildStore(builder, dst, dst_ptr); - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h deleted file mode 100644 index 79d6981bb51..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ /dev/null @@ -1,63 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Depth/stencil testing to LLVM IR translation. - * - * @author Jose Fonseca - */ - -#ifndef LP_BLD_DEPTH_H -#define LP_BLD_DEPTH_H - - -#include - - -struct pipe_depth_state; -struct util_format_description; -struct lp_type; -struct lp_build_mask_context; - - -struct lp_type -lp_depth_type(const struct util_format_description *format_desc, - unsigned length); - - -void -lp_build_depth_test(LLVMBuilderRef builder, - const struct pipe_depth_state *state, - struct lp_type type, - const struct util_format_description *format_desc, - struct lp_build_mask_context *mask, - LLVMValueRef src, - LLVMValueRef dst_ptr); - - -#endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c deleted file mode 100644 index bc831389085..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ /dev/null @@ -1,757 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * LLVM control flow build helpers. - * - * @author Jose Fonseca - */ - -#include "util/u_debug.h" -#include "util/u_memory.h" - -#include "lp_bld_type.h" -#include "lp_bld_flow.h" - - -#define LP_BUILD_FLOW_MAX_VARIABLES 32 -#define LP_BUILD_FLOW_MAX_DEPTH 32 - -/** - * Enumeration of all possible flow constructs. - */ -enum lp_build_flow_construct_kind { - LP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP, - LP_BUILD_FLOW_IF -}; - - -/** - * Variable declaration scope. - */ -struct lp_build_flow_scope -{ - /** Number of variables declared in this scope */ - unsigned num_variables; -}; - - -/** - * Early exit. Useful to skip to the end of a function or block when - * the execution mask becomes zero or when there is an error condition. - */ -struct lp_build_flow_skip -{ - /** Block to skip to */ - LLVMBasicBlockRef block; - - /** Number of variables declared at the beginning */ - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ -}; - - -/** - * if/else/endif. - */ -struct lp_build_flow_if -{ - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ - - LLVMValueRef condition; - LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; -}; - - -/** - * Union of all possible flow constructs' data - */ -union lp_build_flow_construct_data -{ - struct lp_build_flow_scope scope; - struct lp_build_flow_skip skip; - struct lp_build_flow_if ifthen; -}; - - -/** - * Element of the flow construct stack. - */ -struct lp_build_flow_construct -{ - enum lp_build_flow_construct_kind kind; - union lp_build_flow_construct_data data; -}; - - -/** - * All necessary data to generate LLVM control flow constructs. - * - * Besides keeping track of the control flow construct themselves we also - * need to keep track of variables in order to generate SSA Phi values. - */ -struct lp_build_flow_context -{ - LLVMBuilderRef builder; - - /** - * Control flow stack. - */ - struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; - unsigned num_constructs; - - /** - * Variable stack - */ - LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; - unsigned num_variables; -}; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder) -{ - struct lp_build_flow_context *flow; - - flow = CALLOC_STRUCT(lp_build_flow_context); - if(!flow) - return NULL; - - flow->builder = builder; - - return flow; -} - - -void -lp_build_flow_destroy(struct lp_build_flow_context *flow) -{ - assert(flow->num_constructs == 0); - assert(flow->num_variables == 0); - FREE(flow); -} - - -/** - * Begin/push a new flow control construct, such as a loop, skip block - * or variable scope. - */ -static union lp_build_flow_construct_data * -lp_build_flow_push(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); - if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) - return NULL; - - flow->constructs[flow->num_constructs].kind = kind; - return &flow->constructs[flow->num_constructs++].data; -} - - -/** - * Return the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_peek(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[flow->num_constructs - 1].data; -} - - -/** - * End/pop the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_pop(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[--flow->num_constructs].data; -} - - -/** - * Begin a variable scope. - * - * - */ -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - scope->num_variables = 0; -} - - -/** - * Declare a variable. - * - * A variable is a named entity which can have different LLVMValueRef's at - * different points of the program. This is relevant for control flow because - * when there are multiple branches to a same location we need to replace - * the variable's value with a Phi function as explained in - * http://en.wikipedia.org/wiki/Static_single_assignment_form . - * - * We keep track of variables by keeping around a pointer to where they're - * current. - * - * There are a few cautions to observe: - * - * - Variable's value must not be NULL. If there is no initial value then - * LLVMGetUndef() should be used. - * - * - Variable's value must be kept up-to-date. If the variable is going to be - * modified by a function then a pointer should be passed so that its value - * is accurate. Failure to do this will cause some of the variables' - * transient values to be lost, leading to wrong results. - * - * - A program should be written from top to bottom, by always appending - * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or - * modifying existing statements will most likely lead to wrong results. - * - */ -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(*variable); - if(!*variable) - return; - - assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); - if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) - return; - - flow->variables[flow->num_variables++] = variable; - ++scope->num_variables; -} - - -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(flow->num_variables >= scope->num_variables); - if(flow->num_variables < scope->num_variables) { - flow->num_variables = 0; - return; - } - - flow->num_variables -= scope->num_variables; -} - - -/** - * Note: this function has no dependencies on the flow code and could - * be used elsewhere. - */ -static LLVMBasicBlockRef -lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) -{ - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; - - /* get current basic block */ - current_block = LLVMGetInsertBlock(builder); - - /* check if there's another block after this one */ - next_block = LLVMGetNextBasicBlock(current_block); - if (next_block) { - /* insert the new block before the next block */ - new_block = LLVMInsertBasicBlock(next_block, name); - } - else { - /* append new block after current block */ - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - new_block = LLVMAppendBasicBlock(function, name); - } - - return new_block; -} - - -static LLVMBasicBlockRef -lp_build_flow_insert_block(struct lp_build_flow_context *flow) -{ - return lp_build_insert_new_block(flow->builder, ""); -} - - -/** - * Begin a "skip" block. Inside this block we can test a condition and - * skip to the end of the block if the condition is false. - */ -void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_skip *skip; - LLVMBuilderRef builder; - unsigned i; - - skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - - /* create new basic block */ - skip->block = lp_build_flow_insert_block(flow); - - skip->num_variables = flow->num_variables; - if(!skip->num_variables) { - skip->phi = NULL; - return; - } - - /* Allocate a Phi node for each variable in this skip scope */ - skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); - if(!skip->phi) { - skip->num_variables = 0; - return; - } - - builder = LLVMCreateBuilder(); - LLVMPositionBuilderAtEnd(builder, skip->block); - - /* create a Phi node for each variable */ - for(i = 0; i < skip->num_variables; ++i) - skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - - LLVMDisposeBuilder(builder); -} - - -/** - * Insert code to test a condition and branch to the end of the current - * skip block if the condition is true. - */ -void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, - LLVMValueRef cond) -{ - struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef new_block; - unsigned i; - - skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - - current_block = LLVMGetInsertBlock(flow->builder); - - new_block = lp_build_flow_insert_block(flow); - - /* for each variable, update the Phi node with a (variable, block) pair */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - } - - /* if cond is true, goto skip->block, else goto new_block */ - LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); - - LLVMPositionBuilderAtEnd(flow->builder, new_block); -} - - -void -lp_build_flow_skip_end(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; - unsigned i; - - skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - - current_block = LLVMGetInsertBlock(flow->builder); - - /* add (variable, block) tuples to the phi nodes */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - *flow->variables[i] = skip->phi[i]; - } - - /* goto block */ - LLVMBuildBr(flow->builder, skip->block); - LLVMPositionBuilderAtEnd(flow->builder, skip->block); - - FREE(skip->phi); -} - - -/** - * Check if the mask predicate is zero. If so, jump to the end of the block. - */ -static void -lp_build_mask_check(struct lp_build_mask_context *mask) -{ - LLVMBuilderRef builder = mask->flow->builder; - LLVMValueRef cond; - - /* cond = (mask == 0) */ - cond = LLVMBuildICmp(builder, - LLVMIntEQ, - LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), - LLVMConstNull(mask->reg_type), - ""); - - /* if cond, goto end of block */ - lp_build_flow_skip_cond_break(mask->flow, cond); -} - - -/** - * Begin a section of code which is predicated on a mask. - * \param mask the mask context, initialized here - * \param flow the flow context - * \param type the type of the mask - * \param value storage for the mask - */ -void -lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, - struct lp_type type, - LLVMValueRef value) -{ - memset(mask, 0, sizeof *mask); - - mask->flow = flow; - mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; - - lp_build_flow_scope_begin(flow); - lp_build_flow_scope_declare(flow, &mask->value); - lp_build_flow_skip_begin(flow); - - lp_build_mask_check(mask); -} - - -/** - * Update boolean mask with given value (bitwise AND). - * Typically used to update the quad's pixel alive/killed mask - * after depth testing, alpha testing, TGSI_OPCODE_KIL, etc. - */ -void -lp_build_mask_update(struct lp_build_mask_context *mask, - LLVMValueRef value) -{ - mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); - - lp_build_mask_check(mask); -} - - -/** - * End section of code which is predicated on a mask. - */ -LLVMValueRef -lp_build_mask_end(struct lp_build_mask_context *mask) -{ - lp_build_flow_skip_end(mask->flow); - lp_build_flow_scope_end(mask->flow); - return mask->value; -} - - - -void -lp_build_loop_begin(LLVMBuilderRef builder, - LLVMValueRef start, - struct lp_build_loop_state *state) -{ - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); - - state->block = LLVMAppendBasicBlock(function, "loop"); - - LLVMBuildBr(builder, state->block); - - LLVMPositionBuilderAtEnd(builder, state->block); - - state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), ""); - - LLVMAddIncoming(state->counter, &start, &block, 1); - -} - - -void -lp_build_loop_end(LLVMBuilderRef builder, - LLVMValueRef end, - LLVMValueRef step, - struct lp_build_loop_state *state) -{ - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); - LLVMValueRef next; - LLVMValueRef cond; - LLVMBasicBlockRef after_block; - - if (!step) - step = LLVMConstInt(LLVMTypeOf(end), 1, 0); - - next = LLVMBuildAdd(builder, state->counter, step, ""); - - cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, ""); - - after_block = LLVMAppendBasicBlock(function, ""); - - LLVMBuildCondBr(builder, cond, after_block, state->block); - - LLVMAddIncoming(state->counter, &next, &block, 1); - - LLVMPositionBuilderAtEnd(builder, after_block); -} - - - -/* - Example of if/then/else building: - - int x; - if (cond) { - x = 1 + 2; - } - else { - x = 2 + 3; - } - - Is built with: - - LLVMValueRef x = LLVMGetUndef(); // or something else - - flow = lp_build_flow_create(builder); - - lp_build_flow_scope_begin(flow); - - // x needs a phi node - lp_build_flow_scope_declare(flow, &x); - - lp_build_if(ctx, flow, builder, cond); - x = LLVMAdd(1, 2); - lp_build_else(ctx); - x = LLVMAdd(2, 3); - lp_build_endif(ctx); - - lp_build_flow_scope_end(flow); - - lp_build_flow_destroy(flow); - */ - - - -/** - * Begin an if/else/endif construct. - */ -void -lp_build_if(struct lp_build_if_state *ctx, - struct lp_build_flow_context *flow, - LLVMBuilderRef builder, - LLVMValueRef condition) -{ - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - struct lp_build_flow_if *ifthen; - unsigned i; - - memset(ctx, 0, sizeof(*ctx)); - ctx->builder = builder; - ctx->flow = flow; - - /* push/create new scope */ - ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - - ifthen->num_variables = flow->num_variables; - ifthen->condition = condition; - ifthen->entry_block = block; - - /* create a Phi node for each variable in this flow scope */ - ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); - if (!ifthen->phi) { - ifthen->num_variables = 0; - return; - } - - /* create endif/merge basic block for the phi functions */ - ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); - LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); - - /* create a phi node for each variable */ - for (i = 0; i < flow->num_variables; i++) { - ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - - /* add add the initial value of the var from the entry block */ - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1); - } - - /* create/insert true_block before merge_block */ - ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); - - /* successive code goes into the true block */ - LLVMPositionBuilderAtEnd(builder, ifthen->true_block); -} - - -/** - * Begin else-part of a conditional - */ -void -lp_build_else(struct lp_build_if_state *ctx) -{ - struct lp_build_flow_context *flow = ctx->flow; - struct lp_build_flow_if *ifthen; - unsigned i; - - ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - - /* for each variable, update the Phi node with a (variable, block) pair */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - } - - /* create/insert false_block before the merge block */ - ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); - - /* successive code goes into the else block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); -} - - -/** - * End a conditional. - */ -void -lp_build_endif(struct lp_build_if_state *ctx) -{ - struct lp_build_flow_context *flow = ctx->flow; - struct lp_build_flow_if *ifthen; - unsigned i; - - ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - - if (ifthen->false_block) { - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - /* for each variable, update the Phi node with a (variable, block) pair */ - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); - - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - else { - /* no else clause */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - - FREE(ifthen->phi); - - /*** - *** Now patch in the various branch instructions. - ***/ - - /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block); - if (ifthen->false_block) { - /* we have an else clause */ - LLVMBuildCondBr(ctx->builder, ifthen->condition, - ifthen->true_block, ifthen->false_block); - } - else { - /* no else clause */ - LLVMBuildCondBr(ctx->builder, ifthen->condition, - ifthen->true_block, ifthen->merge_block); - } - - /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); - LLVMBuildBr(ctx->builder, ifthen->merge_block); - if (ifthen->false_block) { - /* Append an unconditional Br(anch) instruction on the false_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); - LLVMBuildBr(ctx->builder, ifthen->merge_block); - } - - - /* Resume building code at end of the ifthen->merge_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h deleted file mode 100644 index 4c225a0d4f9..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * LLVM control flow build helpers. - * - * @author Jose Fonseca - */ - -#ifndef LP_BLD_FLOW_H -#define LP_BLD_FLOW_H - - -#include - - -struct lp_type; - - -struct lp_build_flow_context; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder); - -void -lp_build_flow_destroy(struct lp_build_flow_context *flow); - -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow); - -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable); - -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow); - -void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow); - -void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, - LLVMValueRef cond); - -void -lp_build_flow_skip_end(struct lp_build_flow_context *flow); - - -struct lp_build_mask_context -{ - struct lp_build_flow_context *flow; - - LLVMTypeRef reg_type; - - LLVMValueRef value; -}; - - -void -lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, - struct lp_type type, - LLVMValueRef value); - -/** - * Bitwise AND the mask with the given value, if a previous mask was set. - */ -void -lp_build_mask_update(struct lp_build_mask_context *mask, - LLVMValueRef value); - -LLVMValueRef -lp_build_mask_end(struct lp_build_mask_context *mask); - - -/** - * LLVM's IR doesn't represent for-loops directly. Furthermore it - * it requires creating code blocks, branches, phi variables, so it - * requires a fair amount of code. - * - * @sa http://www.llvm.org/docs/tutorial/LangImpl5.html#for - */ -struct lp_build_loop_state -{ - LLVMBasicBlockRef block; - LLVMValueRef counter; -}; - - -void -lp_build_loop_begin(LLVMBuilderRef builder, - LLVMValueRef start, - struct lp_build_loop_state *state); - - -void -lp_build_loop_end(LLVMBuilderRef builder, - LLVMValueRef end, - LLVMValueRef step, - struct lp_build_loop_state *state); - - - - -struct lp_build_if_state -{ - LLVMBuilderRef builder; - struct lp_build_flow_context *flow; -}; - - -void -lp_build_if(struct lp_build_if_state *ctx, - struct lp_build_flow_context *flow, - LLVMBuilderRef builder, - LLVMValueRef condition); - -void -lp_build_else(struct lp_build_if_state *ctx); - -void -lp_build_endif(struct lp_build_if_state *ctx); - - - -#endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h deleted file mode 100644 index 970bee379f5..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ /dev/null @@ -1,83 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_BLD_FORMAT_H -#define LP_BLD_FORMAT_H - - -/** - * @file - * Pixel format helpers. - */ - -#include - -#include "pipe/p_format.h" - -struct util_format_description; -struct lp_type; - - -boolean -lp_format_is_rgba8(const struct util_format_description *desc); - - -void -lp_build_format_swizzle_soa(const struct util_format_description *format_desc, - struct lp_type type, - const LLVMValueRef *unswizzled, - LLVMValueRef *swizzled); - - -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef packed); - - -LLVMValueRef -lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - struct lp_type type, - LLVMValueRef packed); - - -LLVMValueRef -lp_build_pack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef rgba); - - -void -lp_build_unpack_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef packed, - LLVMValueRef *rgba); - - -#endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c deleted file mode 100644 index dfa080b8533..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ /dev/null @@ -1,383 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * AoS pixel format manipulation. - * - * @author Jose Fonseca - */ - - -#include "util/u_cpu_detect.h" -#include "util/u_format.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_swizzle.h" -#include "lp_bld_format.h" - - -/** - * Unpack a single pixel into its RGBA components. - * - * @param packed integer. - * - * @return RGBA in a 4 floats vector. - * - * XXX: This is mostly for reference and testing -- operating a single pixel at - * a time is rarely if ever needed. - */ -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef packed) -{ - LLVMTypeRef type; - LLVMValueRef shifted, casted, scaled, masked; - LLVMValueRef shifts[4]; - LLVMValueRef masks[4]; - LLVMValueRef scales[4]; - LLVMValueRef swizzles[4]; - LLVMValueRef aux[4]; - bool normalized; - int empty_channel; - unsigned shift; - unsigned i; - - /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - assert(desc->block.bits <= 32); - - type = LLVMIntType(desc->block.bits); - - /* Do the intermediate integer computations with 32bit integers since it - * matches floating point size */ - if (desc->block.bits < 32) - packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); - - /* Broadcast the packed value to all four channels */ - packed = LLVMBuildInsertElement(builder, - LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), - packed, - LLVMConstNull(LLVMInt32Type()), - ""); - packed = LLVMBuildShuffleVector(builder, - packed, - LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), - LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)), - ""); - - /* Initialize vector constants */ - normalized = FALSE; - empty_channel = -1; - shift = 0; - for (i = 0; i < 4; ++i) { - unsigned bits = desc->channel[i].size; - - if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { - shifts[i] = LLVMGetUndef(LLVMInt32Type()); - masks[i] = LLVMConstNull(LLVMInt32Type()); - scales[i] = LLVMConstNull(LLVMFloatType()); - empty_channel = i; - } - else { - unsigned mask = (1 << bits) - 1; - - assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(bits < 32); - - shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); - masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0); - - if (desc->channel[i].normalized) { - scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask); - normalized = TRUE; - } - else - scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); - } - - shift += bits; - } - - shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); - masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); - /* UIToFP can't be expressed in SSE2 */ - casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); - - if (normalized) - scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), ""); - else - scaled = casted; - - for (i = 0; i < 4; ++i) - aux[i] = LLVMGetUndef(LLVMFloatType()); - - for (i = 0; i < 4; ++i) { - enum util_format_swizzle swizzle = desc->swizzle[i]; - - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0); - break; - case UTIL_FORMAT_SWIZZLE_0: - assert(empty_channel >= 0); - swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0); - break; - case UTIL_FORMAT_SWIZZLE_1: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0); - aux[0] = LLVMConstReal(LLVMFloatType(), 1.0); - break; - case UTIL_FORMAT_SWIZZLE_NONE: - swizzles[i] = LLVMGetUndef(LLVMFloatType()); - assert(0); - break; - } - } - - return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), ""); -} - - -/** - * Take a vector with packed pixels and unpack into a rgba8 vector. - * - * Formats with bit depth smaller than 32bits are accepted, but they must be - * padded to 32bits. - */ -LLVMValueRef -lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - struct lp_type type, - LLVMValueRef packed) -{ - struct lp_build_context bld; - bool rgba8; - LLVMValueRef res; - unsigned i; - - lp_build_context_init(&bld, builder, type); - - /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - assert(desc->block.bits <= 32); - - assert(!type.floating); - assert(!type.fixed); - assert(type.norm); - assert(type.width == 8); - assert(type.length % 4 == 0); - - rgba8 = TRUE; - for(i = 0; i < 4; ++i) { - assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || - desc->channel[i].type == UTIL_FORMAT_TYPE_VOID); - if(desc->channel[0].size != 8) - rgba8 = FALSE; - } - - if(rgba8) { - /* - * The pixel is already in a rgba8 format variant. All it is necessary - * is to swizzle the channels. - */ - - unsigned char swizzles[4]; - boolean zeros[4]; /* bitwise AND mask */ - boolean ones[4]; /* bitwise OR mask */ - boolean swizzles_needed = FALSE; - boolean zeros_needed = FALSE; - boolean ones_needed = FALSE; - - for(i = 0; i < 4; ++i) { - enum util_format_swizzle swizzle = desc->swizzle[i]; - - /* Initialize with the no-op case */ - swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i; - zeros[i] = TRUE; - ones[i] = FALSE; - - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - if(swizzle != swizzles[i]) { - swizzles[i] = swizzle; - swizzles_needed = TRUE; - } - break; - case UTIL_FORMAT_SWIZZLE_0: - zeros[i] = FALSE; - zeros_needed = TRUE; - break; - case UTIL_FORMAT_SWIZZLE_1: - ones[i] = TRUE; - ones_needed = TRUE; - break; - case UTIL_FORMAT_SWIZZLE_NONE: - assert(0); - break; - } - } - - res = packed; - - if(swizzles_needed) - res = lp_build_swizzle1_aos(&bld, res, swizzles); - - if(zeros_needed) { - /* Mask out zero channels */ - LLVMValueRef mask = lp_build_const_mask_aos(type, zeros); - res = LLVMBuildAnd(builder, res, mask, ""); - } - - if(ones_needed) { - /* Or one channels */ - LLVMValueRef mask = lp_build_const_mask_aos(type, ones); - res = LLVMBuildOr(builder, res, mask, ""); - } - } - else { - /* FIXME */ - assert(0); - res = lp_build_undef(type); - } - - return res; -} - - -/** - * Pack a single pixel. - * - * @param rgba 4 float vector with the unpacked components. - * - * XXX: This is mostly for reference and testing -- operating a single pixel at - * a time is rarely if ever needed. - */ -LLVMValueRef -lp_build_pack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef rgba) -{ - LLVMTypeRef type; - LLVMValueRef packed = NULL; - LLVMValueRef swizzles[4]; - LLVMValueRef shifted, casted, scaled, unswizzled; - LLVMValueRef shifts[4]; - LLVMValueRef scales[4]; - bool normalized; - unsigned shift; - unsigned i, j; - - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - - type = LLVMIntType(desc->block.bits); - - /* Unswizzle the color components into the source vector. */ - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - if (desc->swizzle[j] == i) - break; - } - if (j < 4) - swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0); - else - swizzles[i] = LLVMGetUndef(LLVMInt32Type()); - } - - unswizzled = LLVMBuildShuffleVector(builder, rgba, - LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)), - LLVMConstVector(swizzles, 4), ""); - - normalized = FALSE; - shift = 0; - for (i = 0; i < 4; ++i) { - unsigned bits = desc->channel[i].size; - - if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { - shifts[i] = LLVMGetUndef(LLVMInt32Type()); - scales[i] = LLVMGetUndef(LLVMFloatType()); - } - else { - unsigned mask = (1 << bits) - 1; - - assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(bits < 32); - - shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); - - if (desc->channel[i].normalized) { - scales[i] = LLVMConstReal(LLVMFloatType(), mask); - normalized = TRUE; - } - else - scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); - } - - shift += bits; - } - - if (normalized) - scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); - else - scaled = unswizzled; - - casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), ""); - - shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); - - /* Bitwise or all components */ - for (i = 0; i < 4; ++i) { - if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { - LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), ""); - if (packed) - packed = LLVMBuildOr(builder, packed, component, ""); - else - packed = component; - } - } - - if (!packed) - packed = LLVMGetUndef(LLVMInt32Type()); - - if (desc->block.bits < 32) - packed = LLVMBuildTrunc(builder, packed, type, ""); - - return packed; -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c deleted file mode 100644 index f3832d07ff9..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c +++ /dev/null @@ -1,72 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Utility functions to make assertions about formats. - * - * This module centralizes most of logic used when determining what algorithm - * is most suitable (i.e., most efficient yet correct) for a given format. - * - * It might be possible to move some of these functions to u_format module, - * but since tiny differences in the format my render it more/less - * appropriate to a given algorithm it is impossible to make any long term - * guarantee about the semantics of these functions. - * - * @author Jose Fonseca - */ - - -#include "util/u_format.h" - -#include "lp_bld_format.h" - - -/** - * Whether this format is a 4 rgba8 variant - */ -boolean -lp_format_is_rgba8(const struct util_format_description *desc) -{ - unsigned chan; - - if(desc->block.width != 1 || - desc->block.height != 1 || - desc->block.bits != 32) - return FALSE; - - for(chan = 0; chan < 4; ++chan) { - if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && - desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED && - desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) - return FALSE; - if(desc->channel[chan].size != 8) - return FALSE; - } - - return TRUE; -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c deleted file mode 100644 index 64151d169da..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ /dev/null @@ -1,149 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_format.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_conv.h" -#include "lp_bld_format.h" - - -static LLVMValueRef -lp_build_format_swizzle_chan_soa(struct lp_type type, - const LLVMValueRef *unswizzled, - enum util_format_swizzle swizzle) -{ - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - return unswizzled[swizzle]; - case UTIL_FORMAT_SWIZZLE_0: - return lp_build_zero(type); - case UTIL_FORMAT_SWIZZLE_1: - return lp_build_one(type); - case UTIL_FORMAT_SWIZZLE_NONE: - return lp_build_undef(type); - default: - assert(0); - return lp_build_undef(type); - } -} - - -void -lp_build_format_swizzle_soa(const struct util_format_description *format_desc, - struct lp_type type, - const LLVMValueRef *unswizzled, - LLVMValueRef *swizzled) -{ - if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - enum util_format_swizzle swizzle = format_desc->swizzle[0]; - LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); - swizzled[2] = swizzled[1] = swizzled[0] = depth; - swizzled[3] = lp_build_one(type); - } - else { - unsigned chan; - for (chan = 0; chan < 4; ++chan) { - enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); - } - } -} - - -void -lp_build_unpack_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef packed, - LLVMValueRef *rgba) -{ - LLVMValueRef inputs[4]; - unsigned start; - unsigned chan; - - /* FIXME: Support more formats */ - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH || - (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY && - format_desc->block.bits == format_desc->channel[0].size)); - assert(format_desc->block.width == 1); - assert(format_desc->block.height == 1); - assert(format_desc->block.bits <= 32); - - /* Decode the input vector components */ - start = 0; - for (chan = 0; chan < 4; ++chan) { - unsigned width = format_desc->channel[chan].size; - unsigned stop = start + width; - LLVMValueRef input; - - input = packed; - - switch(format_desc->channel[chan].type) { - case UTIL_FORMAT_TYPE_VOID: - input = NULL; - break; - - case UTIL_FORMAT_TYPE_UNSIGNED: - if(type.floating) { - if(start) - input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); - if(stop < format_desc->block.bits) { - unsigned mask = ((unsigned long long)1 << width) - 1; - input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); - } - - if(format_desc->channel[chan].normalized) - input = lp_build_unsigned_norm_to_float(builder, width, type, input); - else - input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), ""); - } - else { - /* FIXME */ - assert(0); - input = lp_build_undef(type); - } - break; - - default: - /* fall through */ - input = lp_build_undef(type); - break; - } - - inputs[chan] = input; - - start = stop; - } - - lp_build_format_swizzle_soa(format_desc, type, inputs, rgba); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c deleted file mode 100644 index a6acaead887..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ /dev/null @@ -1,407 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Position and shader input interpolation. - * - * @author Jose Fonseca - */ - -#include "pipe/p_shader_tokens.h" -#include "util/u_debug.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "tgsi/tgsi_parse.h" -#include "lp_bld_debug.h" -#include "lp_bld_const.h" -#include "lp_bld_arit.h" -#include "lp_bld_swizzle.h" -#include "lp_bld_interp.h" - - -/* - * The shader JIT function operates on blocks of quads. - * Each block has 2x2 quads and each quad has 2x2 pixels. - * - * We iterate over the quads in order 0, 1, 2, 3: - * - * ################# - * # | # | # - * #---0---#---1---# - * # | # | # - * ################# - * # | # | # - * #---2---#---3---# - * # | # | # - * ################# - * - * Within each quad, we have four pixels which are represented in SOA - * order: - * - * ######### - * # 0 | 1 # - * #---+---# - * # 2 | 3 # - * ######### - * - * So the green channel (for example) of the four pixels is stored in - * a single vector register: {g0, g1, g2, g3}. - */ - - -static void -attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) -{ - if(attrib == 0) - lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix); - else - lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); -} - - -/** - * Initialize the bld->a0, dadx, dady fields. This involves fetching - * those values from the arrays which are passed into the JIT function. - */ -static void -coeffs_init(struct lp_build_interp_soa_context *bld, - LLVMValueRef a0_ptr, - LLVMValueRef dadx_ptr, - LLVMValueRef dady_ptr) -{ - LLVMBuilderRef builder = bld->base.builder; - unsigned attrib; - unsigned chan; - - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0); - LLVMValueRef a0 = NULL; - LLVMValueRef dadx = NULL; - LLVMValueRef dady = NULL; - - switch( mode ) { - case TGSI_INTERPOLATE_PERSPECTIVE: - /* fall-through */ - - case TGSI_INTERPOLATE_LINEAR: - dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); - dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); - dadx = lp_build_broadcast_scalar(&bld->base, dadx); - dady = lp_build_broadcast_scalar(&bld->base, dady); - attrib_name(dadx, attrib, chan, ".dadx"); - attrib_name(dady, attrib, chan, ".dady"); - /* fall-through */ - - case TGSI_INTERPOLATE_CONSTANT: - a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); - a0 = lp_build_broadcast_scalar(&bld->base, a0); - attrib_name(a0, attrib, chan, ".a0"); - break; - - default: - assert(0); - break; - } - - bld->a0 [attrib][chan] = a0; - bld->dadx[attrib][chan] = dadx; - bld->dady[attrib][chan] = dady; - } - } - } -} - - -/** - * Emit LLVM code to compute the fragment shader input attribute values. - * For example, for a color input, we'll compute red, green, blue and alpha - * values for the four pixels in a quad. - * Recall that we're operating on 4-element vectors so each arithmetic - * operation is operating on the four pixels in a quad. - */ -static void -attribs_init(struct lp_build_interp_soa_context *bld) -{ - LLVMValueRef x = bld->pos[0]; - LLVMValueRef y = bld->pos[1]; - LLVMValueRef oow = NULL; - unsigned attrib; - unsigned chan; - - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - LLVMValueRef a0 = bld->a0 [attrib][chan]; - LLVMValueRef dadx = bld->dadx[attrib][chan]; - LLVMValueRef dady = bld->dady[attrib][chan]; - LLVMValueRef res; - - res = a0; - - if (mode != TGSI_INTERPOLATE_CONSTANT) { - /* res = res + x * dadx */ - res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx)); - /* res = res + y * dady */ - res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady)); - } - - /* Keep the value of the attribue before perspective divide for faster updates */ - bld->attribs_pre[attrib][chan] = res; - - if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { - LLVMValueRef w = bld->pos[3]; - assert(attrib != 0); - if(!oow) - oow = lp_build_rcp(&bld->base, w); - res = lp_build_mul(&bld->base, res, oow); - } - - attrib_name(res, attrib, chan, ""); - - bld->attribs[attrib][chan] = res; - } - } - } -} - - -/** - * Increment the shader input attribute values. - * This is called when we move from one quad to the next. - */ -static void -attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) -{ - LLVMValueRef oow = NULL; - unsigned attrib; - unsigned chan; - - assert(quad_index < 4); - - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - - if (mode != TGSI_INTERPOLATE_CONSTANT) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - LLVMValueRef dadx = bld->dadx[attrib][chan]; - LLVMValueRef dady = bld->dady[attrib][chan]; - LLVMValueRef res; - - res = bld->attribs_pre[attrib][chan]; - - if (quad_index == 1 || quad_index == 3) { - /* top-right or bottom-right quad */ - /* build res = res + dadx + dadx */ - res = lp_build_add(&bld->base, res, dadx); - res = lp_build_add(&bld->base, res, dadx); - } - - if (quad_index == 2 || quad_index == 3) { - /* bottom-left or bottom-right quad */ - /* build res = res + dady + dady */ - res = lp_build_add(&bld->base, res, dady); - res = lp_build_add(&bld->base, res, dady); - } - - //XXX bld->attribs_pre[attrib][chan] = res; - - if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { - LLVMValueRef w = bld->pos[3]; - assert(attrib != 0); - if(!oow) - oow = lp_build_rcp(&bld->base, w); - res = lp_build_mul(&bld->base, res, oow); - } - - attrib_name(res, attrib, chan, ""); - - bld->attribs[attrib][chan] = res; - } - } - } - } -} - - -/** - * Generate the position vectors. - * - * Parameter x0, y0 are the integer values with the quad upper left coordinates. - */ -static void -pos_init(struct lp_build_interp_soa_context *bld, - LLVMValueRef x0, - LLVMValueRef y0) -{ - lp_build_name(x0, "pos.x"); - lp_build_name(y0, "pos.y"); - - bld->attribs[0][0] = x0; - bld->attribs[0][1] = y0; -} - - -/** - * Update quad position values when moving to the next quad. - */ -static void -pos_update(struct lp_build_interp_soa_context *bld, int quad_index) -{ - LLVMValueRef x = bld->attribs[0][0]; - LLVMValueRef y = bld->attribs[0][1]; - const int xstep = 2, ystep = 2; - - if (quad_index == 1 || quad_index == 3) { - /* top-right or bottom-right quad in block */ - /* build x += xstep */ - x = lp_build_add(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); - } - - if (quad_index == 2) { - /* bottom-left quad in block */ - /* build y += ystep */ - y = lp_build_add(&bld->base, y, - lp_build_const_scalar(bld->base.type, ystep)); - /* build x -= xstep */ - x = lp_build_sub(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); - } - - lp_build_name(x, "pos.x"); - lp_build_name(y, "pos.y"); - - bld->attribs[0][0] = x; - bld->attribs[0][1] = y; -} - - -/** - * Initialize fragment shader input attribute info. - */ -void -lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, - const struct tgsi_token *tokens, - boolean flatshade, - LLVMBuilderRef builder, - struct lp_type type, - LLVMValueRef a0_ptr, - LLVMValueRef dadx_ptr, - LLVMValueRef dady_ptr, - LLVMValueRef x0, - LLVMValueRef y0) -{ - struct tgsi_parse_context parse; - struct tgsi_full_declaration *decl; - - memset(bld, 0, sizeof *bld); - - lp_build_context_init(&bld->base, builder, type); - - /* For convenience */ - bld->pos = bld->attribs[0]; - bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1]; - - /* Position */ - bld->num_attribs = 1; - bld->mask[0] = TGSI_WRITEMASK_ZW; - bld->mode[0] = TGSI_INTERPOLATE_LINEAR; - - /* Inputs */ - tgsi_parse_init( &parse, tokens ); - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - decl = &parse.FullToken.FullDeclaration; - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - unsigned attrib; - - first = decl->Range.First; - last = decl->Range.Last; - mask = decl->Declaration.UsageMask; - - for( attrib = first; attrib <= last; ++attrib ) { - bld->mask[1 + attrib] = mask; - - /* XXX: have mesa set INTERP_CONSTANT in the fragment - * shader. - */ - if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && - flatshade) - bld->mode[1 + attrib] = TGSI_INTERPOLATE_CONSTANT; - else - bld->mode[1 + attrib] = decl->Declaration.Interpolate; - } - - bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1); - } - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - case TGSI_TOKEN_TYPE_IMMEDIATE: - break; - - default: - assert( 0 ); - } - } - tgsi_parse_free( &parse ); - - coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); - - pos_init(bld, x0, y0); - - attribs_init(bld); -} - - -/** - * Advance the position and inputs to the given quad within the block. - */ -void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, - int quad_index) -{ - assert(quad_index < 4); - - pos_update(bld, quad_index); - - attribs_update(bld, quad_index); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h deleted file mode 100644 index ca958cdf343..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ /dev/null @@ -1,96 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Position and shader input interpolation. - * - * Special attention is given to the interpolation of side by side quads. - * Multiplications are made only for the first quad. Interpolation of - * inputs for posterior quads are done exclusively with additions, and - * perspective divide if necessary. - * - * @author Jose Fonseca - */ - -#ifndef LP_BLD_INTERP_H -#define LP_BLD_INTERP_H - - -#include - -#include "tgsi/tgsi_exec.h" - -#include "lp_bld_type.h" - - -struct tgsi_token; - - -struct lp_build_interp_soa_context -{ - struct lp_build_context base; - - unsigned num_attribs; - unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; - unsigned mode[1 + PIPE_MAX_SHADER_INPUTS]; - - LLVMValueRef a0 [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - - /* Attribute values before perspective divide */ - LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - - LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - - /* - * Convenience pointers. Callers may access this one. - */ - const LLVMValueRef *pos; - const LLVMValueRef (*inputs)[NUM_CHANNELS]; -}; - - -void -lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, - const struct tgsi_token *tokens, - boolean flatshade, - LLVMBuilderRef builder, - struct lp_type type, - LLVMValueRef a0_ptr, - LLVMValueRef dadx_ptr, - LLVMValueRef dady_ptr, - LLVMValueRef x0, - LLVMValueRef y0); - -void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, - int quad_index); - - -#endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_intr.c b/src/gallium/drivers/llvmpipe/lp_bld_intr.c deleted file mode 100644 index 9895749d568..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_intr.c +++ /dev/null @@ -1,192 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Helpers for emiting intrinsic calls. - * - * LLVM vanilla IR doesn't represent all basic arithmetic operations we care - * about, and it is often necessary to resort target-specific intrinsics for - * performance, convenience. - * - * Ideally we would like to stay away from target specific intrinsics and - * move all the instruction selection logic into upstream LLVM where it belongs. - * - * These functions are also used for calling C functions provided by us from - * generated LLVM code. - * - * @author Jose Fonseca - */ - - -#include "util/u_debug.h" - -#include "lp_bld_intr.h" - - -LLVMValueRef -lp_declare_intrinsic(LLVMModuleRef module, - const char *name, - LLVMTypeRef ret_type, - LLVMTypeRef *arg_types, - unsigned num_args) -{ - LLVMTypeRef function_type; - LLVMValueRef function; - - assert(!LLVMGetNamedFunction(module, name)); - - function_type = LLVMFunctionType(ret_type, arg_types, num_args, 0); - function = LLVMAddFunction(module, name, function_type); - - LLVMSetFunctionCallConv(function, LLVMCCallConv); - LLVMSetLinkage(function, LLVMExternalLinkage); - - assert(LLVMIsDeclaration(function)); - - if(name[0] == 'l' && - name[1] == 'l' && - name[2] == 'v' && - name[3] == 'm' && - name[4] == '.') - assert(LLVMGetIntrinsicID(function)); - - return function; -} - - -LLVMValueRef -lp_build_intrinsic(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef *args, - unsigned num_args) -{ - LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); - LLVMValueRef function; - - function = LLVMGetNamedFunction(module, name); - if(!function) { - LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS]; - unsigned i; - - assert(num_args <= LP_MAX_FUNC_ARGS); - - for(i = 0; i < num_args; ++i) { - assert(args[i]); - arg_types[i] = LLVMTypeOf(args[i]); - } - - function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); - } - - return LLVMBuildCall(builder, function, args, num_args, ""); -} - - -LLVMValueRef -lp_build_intrinsic_unary(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef a) -{ - return lp_build_intrinsic(builder, name, ret_type, &a, 1); -} - - -LLVMValueRef -lp_build_intrinsic_binary(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef a, - LLVMValueRef b) -{ - LLVMValueRef args[2]; - - args[0] = a; - args[1] = b; - - return lp_build_intrinsic(builder, name, ret_type, args, 2); -} - - -LLVMValueRef -lp_build_intrinsic_map(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef *args, - unsigned num_args) -{ - LLVMTypeRef ret_elem_type = LLVMGetElementType(ret_type); - unsigned n = LLVMGetVectorSize(ret_type); - unsigned i, j; - LLVMValueRef res; - - assert(num_args <= LP_MAX_FUNC_ARGS); - - res = LLVMGetUndef(ret_type); - for(i = 0; i < n; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef arg_elems[LP_MAX_FUNC_ARGS]; - LLVMValueRef res_elem; - for(j = 0; j < num_args; ++j) - arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, ""); - res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args); - res = LLVMBuildInsertElement(builder, res, res_elem, index, ""); - } - - return res; -} - - -LLVMValueRef -lp_build_intrinsic_map_unary(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef a) -{ - return lp_build_intrinsic_map(builder, name, ret_type, &a, 1); -} - - -LLVMValueRef -lp_build_intrinsic_map_binary(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef a, - LLVMValueRef b) -{ - LLVMValueRef args[2]; - - args[0] = a; - args[1] = b; - - return lp_build_intrinsic_map(builder, name, ret_type, args, 2); -} - - diff --git a/src/gallium/drivers/llvmpipe/lp_bld_intr.h b/src/gallium/drivers/llvmpipe/lp_bld_intr.h deleted file mode 100644 index f813f27074b..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_intr.h +++ /dev/null @@ -1,102 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper functions for calling intrinsics. - * - * @author Jose Fonseca - */ - - -#ifndef LP_BLD_INTR_H -#define LP_BLD_INTR_H - - -#include - - -/** - * Max number of arguments in an intrinsic. - */ -#define LP_MAX_FUNC_ARGS 32 - - -LLVMValueRef -lp_declare_intrinsic(LLVMModuleRef module, - const char *name, - LLVMTypeRef ret_type, - LLVMTypeRef *arg_types, - unsigned num_args); - -LLVMValueRef -lp_build_intrinsic(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef *args, - unsigned num_args); - - -LLVMValueRef -lp_build_intrinsic_unary(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef a); - - -LLVMValueRef -lp_build_intrinsic_binary(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef a, - LLVMValueRef b); - - -LLVMValueRef -lp_build_intrinsic_map(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef *args, - unsigned num_args); - - -LLVMValueRef -lp_build_intrinsic_map_unary(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef a); - - -LLVMValueRef -lp_build_intrinsic_map_binary(LLVMBuilderRef builder, - const char *name, - LLVMTypeRef ret_type, - LLVMValueRef a, - LLVMValueRef b); - - -#endif /* !LP_BLD_INTR_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c deleted file mode 100644 index d23de4f0ef8..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ /dev/null @@ -1,421 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper functions for logical operations. - * - * @author Jose Fonseca - */ - - -#include "util/u_cpu_detect.h" -#include "util/u_debug.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_intr.h" -#include "lp_bld_logic.h" - - -/** - * Build code to compare two values 'a' and 'b' of 'type' using the given func. - * \param func one of PIPE_FUNC_x - */ -LLVMValueRef -lp_build_compare(LLVMBuilderRef builder, - const struct lp_type type, - unsigned func, - LLVMValueRef a, - LLVMValueRef b) -{ - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef zeros = LLVMConstNull(int_vec_type); - LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); - LLVMValueRef cond; - LLVMValueRef res; - unsigned i; - - assert(func >= PIPE_FUNC_NEVER); - assert(func <= PIPE_FUNC_ALWAYS); - - if(func == PIPE_FUNC_NEVER) - return zeros; - if(func == PIPE_FUNC_ALWAYS) - return ones; - - /* TODO: optimize the constant case */ - - /* XXX: It is not clear if we should use the ordered or unordered operators */ - -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128) { - if(type.floating && util_cpu_caps.has_sse) { - /* float[4] comparison */ - LLVMValueRef args[3]; - unsigned cc; - boolean swap; - - swap = FALSE; - switch(func) { - case PIPE_FUNC_EQUAL: - cc = 0; - break; - case PIPE_FUNC_NOTEQUAL: - cc = 4; - break; - case PIPE_FUNC_LESS: - cc = 1; - break; - case PIPE_FUNC_LEQUAL: - cc = 2; - break; - case PIPE_FUNC_GREATER: - cc = 1; - swap = TRUE; - break; - case PIPE_FUNC_GEQUAL: - cc = 2; - swap = TRUE; - break; - default: - assert(0); - return lp_build_undef(type); - } - - if(swap) { - args[0] = b; - args[1] = a; - } - else { - args[0] = a; - args[1] = b; - } - - args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); - res = lp_build_intrinsic(builder, - "llvm.x86.sse.cmp.ps", - vec_type, - args, 3); - res = LLVMBuildBitCast(builder, res, int_vec_type, ""); - return res; - } - else if(util_cpu_caps.has_sse2) { - /* int[4] comparison */ - static const struct { - unsigned swap:1; - unsigned eq:1; - unsigned gt:1; - unsigned not:1; - } table[] = { - {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ - {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ - {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ - {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ - {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ - {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ - {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ - {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ - }; - const char *pcmpeq; - const char *pcmpgt; - LLVMValueRef args[2]; - LLVMValueRef res; - - switch (type.width) { - case 8: - pcmpeq = "llvm.x86.sse2.pcmpeq.b"; - pcmpgt = "llvm.x86.sse2.pcmpgt.b"; - break; - case 16: - pcmpeq = "llvm.x86.sse2.pcmpeq.w"; - pcmpgt = "llvm.x86.sse2.pcmpgt.w"; - break; - case 32: - pcmpeq = "llvm.x86.sse2.pcmpeq.d"; - pcmpgt = "llvm.x86.sse2.pcmpgt.d"; - break; - default: - assert(0); - return lp_build_undef(type); - } - - /* There are no signed byte and unsigned word/dword comparison - * instructions. So flip the sign bit so that the results match. - */ - if(table[func].gt && - ((type.width == 8 && type.sign) || - (type.width != 8 && !type.sign))) { - LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); - a = LLVMBuildXor(builder, a, msb, ""); - b = LLVMBuildXor(builder, b, msb, ""); - } - - if(table[func].swap) { - args[0] = b; - args[1] = a; - } - else { - args[0] = a; - args[1] = b; - } - - if(table[func].eq) - res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); - else if (table[func].gt) - res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); - else - res = LLVMConstNull(vec_type); - - if(table[func].not) - res = LLVMBuildNot(builder, res, ""); - - return res; - } - } -#endif - - if(type.floating) { - LLVMRealPredicate op; - switch(func) { - case PIPE_FUNC_NEVER: - op = LLVMRealPredicateFalse; - break; - case PIPE_FUNC_ALWAYS: - op = LLVMRealPredicateTrue; - break; - case PIPE_FUNC_EQUAL: - op = LLVMRealUEQ; - break; - case PIPE_FUNC_NOTEQUAL: - op = LLVMRealUNE; - break; - case PIPE_FUNC_LESS: - op = LLVMRealULT; - break; - case PIPE_FUNC_LEQUAL: - op = LLVMRealULE; - break; - case PIPE_FUNC_GREATER: - op = LLVMRealUGT; - break; - case PIPE_FUNC_GEQUAL: - op = LLVMRealUGE; - break; - default: - assert(0); - return lp_build_undef(type); - } - -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ - cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); -#else - debug_printf("%s: warning: using slow element-wise vector comparison\n", - __FUNCTION__); - res = LLVMGetUndef(int_vec_type); - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildFCmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); - } -#endif - } - else { - LLVMIntPredicate op; - switch(func) { - case PIPE_FUNC_EQUAL: - op = LLVMIntEQ; - break; - case PIPE_FUNC_NOTEQUAL: - op = LLVMIntNE; - break; - case PIPE_FUNC_LESS: - op = type.sign ? LLVMIntSLT : LLVMIntULT; - break; - case PIPE_FUNC_LEQUAL: - op = type.sign ? LLVMIntSLE : LLVMIntULE; - break; - case PIPE_FUNC_GREATER: - op = type.sign ? LLVMIntSGT : LLVMIntUGT; - break; - case PIPE_FUNC_GEQUAL: - op = type.sign ? LLVMIntSGE : LLVMIntUGE; - break; - default: - assert(0); - return lp_build_undef(type); - } - -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ - cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); -#else - debug_printf("%s: warning: using slow element-wise int vector comparison\n", - __FUNCTION__); - res = LLVMGetUndef(int_vec_type); - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildICmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); - } -#endif - } - - return res; -} - - - -/** - * Build code to compare two values 'a' and 'b' using the given func. - * \param func one of PIPE_FUNC_x - */ -LLVMValueRef -lp_build_cmp(struct lp_build_context *bld, - unsigned func, - LLVMValueRef a, - LLVMValueRef b) -{ - return lp_build_compare(bld->builder, bld->type, func, a, b); -} - - -LLVMValueRef -lp_build_select(struct lp_build_context *bld, - LLVMValueRef mask, - LLVMValueRef a, - LLVMValueRef b) -{ - struct lp_type type = bld->type; - LLVMValueRef res; - - if(a == b) - return a; - - if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); - } - - a = LLVMBuildAnd(bld->builder, a, mask, ""); - - /* This often gets translated to PANDN, but sometimes the NOT is - * pre-computed and stored in another constant. The best strategy depends - * on available registers, so it is not a big deal -- hopefully LLVM does - * the right decision attending the rest of the program. - */ - b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); - - res = LLVMBuildOr(bld->builder, a, b, ""); - - if(type.floating) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); - } - - return res; -} - - -LLVMValueRef -lp_build_select_aos(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b, - const boolean cond[4]) -{ - const struct lp_type type = bld->type; - const unsigned n = type.length; - unsigned i, j; - - if(a == b) - return a; - if(cond[0] && cond[1] && cond[2] && cond[3]) - return a; - if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) - return b; - if(a == bld->undef || b == bld->undef) - return bld->undef; - - /* - * There are three major ways of accomplishing this: - * - with a shuffle, - * - with a select, - * - or with a bit mask. - * - * Select isn't supported for vector types yet. - * The flip between these is empirical and might need to be. - */ - if (n <= 4) { - /* - * Shuffle. - */ - LLVMTypeRef elem_type = LLVMInt32Type(); - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); - - return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); - } - else { -#if 0 - /* XXX: Unfortunately select of vectors do not work */ - /* Use a select */ - LLVMTypeRef elem_type = LLVMInt1Type(); - LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); - - return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); -#else - LLVMValueRef mask = lp_build_const_mask_aos(type, cond); - return lp_build_select(bld, mask, a, b); -#endif - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h deleted file mode 100644 index 40d64eb2c19..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ /dev/null @@ -1,80 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper functions for logical operations. - * - * @author Jose Fonseca - */ - - -#ifndef LP_BLD_LOGIC_H -#define LP_BLD_LOGIC_H - - -#include - -#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ - - -struct lp_type; -struct lp_build_context; - - -LLVMValueRef -lp_build_compare(LLVMBuilderRef builder, - const struct lp_type type, - unsigned func, - LLVMValueRef a, - LLVMValueRef b); - - -/** - * @param func is one of PIPE_FUNC_xxx - */ -LLVMValueRef -lp_build_cmp(struct lp_build_context *bld, - unsigned func, - LLVMValueRef a, - LLVMValueRef b); - - -LLVMValueRef -lp_build_select(struct lp_build_context *bld, - LLVMValueRef mask, - LLVMValueRef a, - LLVMValueRef b); - -LLVMValueRef -lp_build_select_aos(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b, - const boolean cond[4]); - - -#endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp deleted file mode 100644 index 6e79438ead0..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "pipe/p_config.h" - -#include "lp_bld_misc.h" - - -#ifndef LLVM_NATIVE_ARCH - -namespace llvm { - extern void LinkInJIT(); -} - - -void -LLVMLinkInJIT(void) -{ - llvm::LinkInJIT(); -} - - -extern "C" int X86TargetMachineModule; - - -int -LLVMInitializeNativeTarget(void) -{ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - X86TargetMachineModule = 1; -#endif - return 0; -} - - -#endif - - -/* - * Hack to allow the linking of release LLVM static libraries on a debug build. - * - * See also: - * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d - */ -#if defined(_MSC_VER) && defined(_DEBUG) -#include -extern "C" { - _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} -} -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h deleted file mode 100644 index 0e787e0b9cb..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.h +++ /dev/null @@ -1,56 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_BLD_MISC_H -#define LP_BLD_MISC_H - - -#include "llvm/Config/config.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -#ifndef LLVM_NATIVE_ARCH - -void -LLVMLinkInJIT(void); - -int -LLVMInitializeNativeTarget(void); - -#endif /* !LLVM_NATIVE_ARCH */ - - -#ifdef __cplusplus -} -#endif - - -#endif /* !LP_BLD_MISC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c deleted file mode 100644 index bc360ad77ad..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c +++ /dev/null @@ -1,418 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Helper functions for packing/unpacking. - * - * Pack/unpacking is necessary for conversion between types of different - * bit width. - * - * They are also commonly used when an computation needs higher - * precision for the intermediate values. For example, if one needs the - * function: - * - * c = compute(a, b); - * - * to use more precision for intermediate results then one should implement it - * as: - * - * LLVMValueRef - * compute(LLVMBuilderRef builder struct lp_type type, LLVMValueRef a, LLVMValueRef b) - * { - * struct lp_type wide_type = lp_wider_type(type); - * LLVMValueRef al, ah, bl, bh, cl, ch, c; - * - * lp_build_unpack2(builder, type, wide_type, a, &al, &ah); - * lp_build_unpack2(builder, type, wide_type, b, &bl, &bh); - * - * cl = compute_half(al, bl); - * ch = compute_half(ah, bh); - * - * c = lp_build_pack2(bld->builder, wide_type, type, cl, ch); - * - * return c; - * } - * - * where compute_half() would do the computation for half the elements with - * twice the precision. - * - * @author Jose Fonseca - */ - - -#include "util/u_debug.h" -#include "util/u_math.h" -#include "util/u_cpu_detect.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_intr.h" -#include "lp_bld_arit.h" -#include "lp_bld_pack.h" - - -/** - * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. - */ -static LLVMValueRef -lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; - - assert(n <= LP_MAX_VECTOR_LENGTH); - assert(lo_hi < 2); - - /* TODO: cache results in a static table */ - - for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { - elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); - elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); - } - - return LLVMConstVector(elems, n); -} - - -/** - * Build shuffle vectors that match PACKxx instructions. - */ -static LLVMValueRef -lp_build_const_pack_shuffle(unsigned n) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(n <= LP_MAX_VECTOR_LENGTH); - - /* TODO: cache results in a static table */ - - for(i = 0; i < n; ++i) - elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); - - return LLVMConstVector(elems, n); -} - - -/** - * Interleave vector elements. - * - * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions. - */ -LLVMValueRef -lp_build_interleave2(LLVMBuilderRef builder, - struct lp_type type, - LLVMValueRef a, - LLVMValueRef b, - unsigned lo_hi) -{ - LLVMValueRef shuffle; - - shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi); - - return LLVMBuildShuffleVector(builder, a, b, shuffle, ""); -} - - -/** - * Double the bit width. - * - * This will only change the number of bits the values are represented, not the - * values themselves. - */ -void -lp_build_unpack2(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef src, - LLVMValueRef *dst_lo, - LLVMValueRef *dst_hi) -{ - LLVMValueRef msb; - LLVMTypeRef dst_vec_type; - - assert(!src_type.floating); - assert(!dst_type.floating); - assert(dst_type.width == src_type.width * 2); - assert(dst_type.length * 2 == src_type.length); - - if(dst_type.sign && src_type.sign) { - /* Replicate the sign bit in the most significant bits */ - msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); - } - else - /* Most significant bits always zero */ - msb = lp_build_zero(src_type); - - /* Interleave bits */ - if(util_cpu_caps.little_endian) { - *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0); - *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1); - } - else { - *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0); - *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1); - } - - /* Cast the result into the new type (twice as wide) */ - - dst_vec_type = lp_build_vec_type(dst_type); - - *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, ""); - *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, ""); -} - - -/** - * Expand the bit width. - * - * This will only change the number of bits the values are represented, not the - * values themselves. - */ -void -lp_build_unpack(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef src, - LLVMValueRef *dst, unsigned num_dsts) -{ - unsigned num_tmps; - unsigned i; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length == dst_type.length * num_dsts); - - num_tmps = 1; - dst[0] = src; - - while(src_type.width < dst_type.width) { - struct lp_type tmp_type = src_type; - - tmp_type.width *= 2; - tmp_type.length /= 2; - - for(i = num_tmps; i--; ) { - lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]); - } - - src_type = tmp_type; - - num_tmps *= 2; - } - - assert(num_tmps == num_dsts); -} - - -/** - * Non-interleaved pack. - * - * This will move values as - * - * lo = __ l0 __ l1 __ l2 __.. __ ln - * hi = __ h0 __ h1 __ h2 __.. __ hn - * res = l0 l1 l2 .. ln h0 h1 h2 .. hn - * - * This will only change the number of bits the values are represented, not the - * values themselves. - * - * It is assumed the values are already clamped into the destination type range. - * Values outside that range will produce undefined results. Use - * lp_build_packs2 instead. - */ -LLVMValueRef -lp_build_pack2(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef lo, - LLVMValueRef hi) -{ - LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); - LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); - LLVMValueRef shuffle; - LLVMValueRef res; - - dst_vec_type = lp_build_vec_type(dst_type); - - assert(!src_type.floating); - assert(!dst_type.floating); - assert(src_type.width == dst_type.width * 2); - assert(src_type.length * 2 == dst_type.length); - - if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { - switch(src_type.width) { - case 32: - if(dst_type.sign) { - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); - } - else { - if (util_cpu_caps.has_sse4_1) { - /* PACKUSDW is the only instrinsic with a consistent signature */ - return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); - } - else { - assert(0); - return LLVMGetUndef(dst_vec_type); - } - } - break; - - case 16: - if(dst_type.sign) - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); - else - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); - break; - - default: - assert(0); - return LLVMGetUndef(dst_vec_type); - break; - } - - res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); - return res; - } - - lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); - hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); - - shuffle = lp_build_const_pack_shuffle(dst_type.length); - - res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, ""); - - return res; -} - - - -/** - * Non-interleaved pack and saturate. - * - * Same as lp_build_pack2 but will saturate values so that they fit into the - * destination type. - */ -LLVMValueRef -lp_build_packs2(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef lo, - LLVMValueRef hi) -{ - boolean clamp; - - assert(!src_type.floating); - assert(!dst_type.floating); - assert(src_type.sign == dst_type.sign); - assert(src_type.width == dst_type.width * 2); - assert(src_type.length * 2 == dst_type.length); - - clamp = TRUE; - - /* All X86 SSE non-interleaved pack instructions take signed inputs and - * saturate them, so no need to clamp for those cases. */ - if(util_cpu_caps.has_sse2 && - src_type.width * src_type.length == 128 && - src_type.sign) - clamp = FALSE; - - if(clamp) { - struct lp_build_context bld; - unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; - LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); - lp_build_context_init(&bld, builder, src_type); - lo = lp_build_min(&bld, lo, dst_max); - hi = lp_build_min(&bld, hi, dst_max); - /* FIXME: What about lower bound? */ - } - - return lp_build_pack2(builder, src_type, dst_type, lo, hi); -} - - -/** - * Truncate the bit width. - * - * TODO: Handle saturation consistently. - */ -LLVMValueRef -lp_build_pack(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs) -{ - LLVMValueRef (*pack2)(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef lo, - LLVMValueRef hi); - LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length * num_srcs == dst_type.length); - - if(clamped) - pack2 = &lp_build_pack2; - else - pack2 = &lp_build_packs2; - - for(i = 0; i < num_srcs; ++i) - tmp[i] = src[i]; - - while(src_type.width > dst_type.width) { - struct lp_type tmp_type = src_type; - - tmp_type.width /= 2; - tmp_type.length *= 2; - - /* Take in consideration the sign changes only in the last step */ - if(tmp_type.width == dst_type.width) - tmp_type.sign = dst_type.sign; - - num_srcs /= 2; - - for(i = 0; i < num_srcs; ++i) - tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]); - - src_type = tmp_type; - } - - assert(num_srcs == 1); - - return tmp[0]; -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.h b/src/gallium/drivers/llvmpipe/lp_bld_pack.h deleted file mode 100644 index fb2a34984a4..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_pack.h +++ /dev/null @@ -1,95 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper functions for packing/unpacking conversions. - * - * @author Jose Fonseca - */ - - -#ifndef LP_BLD_PACK_H -#define LP_BLD_PACK_H - - -#include - - -struct lp_type; - - -LLVMValueRef -lp_build_interleave2(LLVMBuilderRef builder, - struct lp_type type, - LLVMValueRef a, - LLVMValueRef b, - unsigned lo_hi); - - -void -lp_build_unpack2(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef src, - LLVMValueRef *dst_lo, - LLVMValueRef *dst_hi); - - -void -lp_build_unpack(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef src, - LLVMValueRef *dst, unsigned num_dsts); - - -LLVMValueRef -lp_build_packs2(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef lo, - LLVMValueRef hi); - - -LLVMValueRef -lp_build_pack2(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef lo, - LLVMValueRef hi); - - -LLVMValueRef -lp_build_pack(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs); - - -#endif /* !LP_BLD_PACK_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c deleted file mode 100644 index 9003e108c1c..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Texture sampling -- common code. - * - * @author Jose Fonseca - */ - -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "lp_bld_debug.h" -#include "lp_bld_const.h" -#include "lp_bld_arit.h" -#include "lp_bld_type.h" -#include "lp_bld_format.h" -#include "lp_bld_sample.h" - - -void -lp_sampler_static_state(struct lp_sampler_static_state *state, - const struct pipe_texture *texture, - const struct pipe_sampler_state *sampler) -{ - memset(state, 0, sizeof *state); - - if(!texture) - return; - - if(!sampler) - return; - - state->format = texture->format; - state->target = texture->target; - state->pot_width = util_is_pot(texture->width0); - state->pot_height = util_is_pot(texture->height0); - state->pot_depth = util_is_pot(texture->depth0); - - state->wrap_s = sampler->wrap_s; - state->wrap_t = sampler->wrap_t; - state->wrap_r = sampler->wrap_r; - state->min_img_filter = sampler->min_img_filter; - state->min_mip_filter = sampler->min_mip_filter; - state->mag_img_filter = sampler->mag_img_filter; - state->compare_mode = sampler->compare_mode; - if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { - state->compare_func = sampler->compare_func; - } - state->normalized_coords = sampler->normalized_coords; - state->prefilter = sampler->prefilter; -} - - -/** - * Gather elements from scatter positions in memory into a single vector. - * - * @param src_width src element width - * @param dst_width result element width (source will be expanded to fit) - * @param length length of the offsets, - * @param base_ptr base pointer, should be a i8 pointer type. - * @param offsets vector with offsets - */ -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets) -{ - LLVMTypeRef src_type = LLVMIntType(src_width); - LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); - LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); - LLVMValueRef res; - unsigned i; - - res = LLVMGetUndef(dst_vec_type); - for(i = 0; i < length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef elem_offset; - LLVMValueRef elem_ptr; - LLVMValueRef elem; - - elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); - elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); - elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); - elem = LLVMBuildLoad(builder, elem_ptr, ""); - - assert(src_width <= dst_width); - if(src_width > dst_width) - elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); - if(src_width < dst_width) - elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); - - res = LLVMBuildInsertElement(builder, res, elem, index, ""); - } - - return res; -} - - -/** - * Compute the offset of a pixel. - * - * x, y, y_stride are vectors - */ -LLVMValueRef -lp_build_sample_offset(struct lp_build_context *bld, - const struct util_format_description *format_desc, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_ptr) -{ - LLVMValueRef x_stride; - LLVMValueRef offset; - - x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); - - if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - LLVMValueRef x_lo, x_hi; - LLVMValueRef y_lo, y_hi; - LLVMValueRef x_stride_lo, x_stride_hi; - LLVMValueRef y_stride_lo, y_stride_hi; - LLVMValueRef x_offset_lo, x_offset_hi; - LLVMValueRef y_offset_lo, y_offset_hi; - LLVMValueRef offset_lo, offset_hi; - - x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); - y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); - - x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); - y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); - - x_stride_lo = x_stride; - y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); - - x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); - y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); - - x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); - y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); - offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); - - x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); - y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); - offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); - - offset = lp_build_add(bld, offset_hi, offset_lo); - } - else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; - - x_offset = lp_build_mul(bld, x, x_stride); - y_offset = lp_build_mul(bld, y, y_stride); - - offset = lp_build_add(bld, x_offset, y_offset); - } - - return offset; -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h deleted file mode 100644 index 8cb8210ca76..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ /dev/null @@ -1,155 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Texture sampling. - * - * @author Jose Fonseca - */ - -#ifndef LP_BLD_SAMPLE_H -#define LP_BLD_SAMPLE_H - - -#include - -struct pipe_texture; -struct pipe_sampler_state; -struct util_format_description; -struct lp_type; -struct lp_build_context; - - -/** - * Sampler static state. - * - * These are the bits of state from pipe_texture and pipe_sampler_state that - * are embedded in the generated code. - */ -struct lp_sampler_static_state -{ - /* pipe_texture's state */ - enum pipe_format format; - unsigned target:2; - unsigned pot_width:1; - unsigned pot_height:1; - unsigned pot_depth:1; - - /* pipe_sampler_state's state */ - unsigned wrap_s:3; - unsigned wrap_t:3; - unsigned wrap_r:3; - unsigned min_img_filter:2; - unsigned min_mip_filter:2; - unsigned mag_img_filter:2; - unsigned compare_mode:1; - unsigned compare_func:3; - unsigned normalized_coords:1; - unsigned prefilter:4; -}; - - -/** - * Sampler dynamic state. - * - * These are the bits of state from pipe_texture and pipe_sampler_state that - * are computed in runtime. - * - * There are obtained through callbacks, as we don't want to tie the texture - * sampling code generation logic to any particular texture layout or pipe - * driver. - */ -struct lp_sampler_dynamic_state -{ - - /** Obtain the base texture width. */ - LLVMValueRef - (*width)( struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, - unsigned unit); - - /** Obtain the base texture height. */ - LLVMValueRef - (*height)( struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, - unsigned unit); - - LLVMValueRef - (*stride)( struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, - unsigned unit); - - LLVMValueRef - (*data_ptr)( struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, - unsigned unit); - -}; - - -/** - * Derive the sampler static state. - */ -void -lp_sampler_static_state(struct lp_sampler_static_state *state, - const struct pipe_texture *texture, - const struct pipe_sampler_state *sampler); - - -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets); - - -LLVMValueRef -lp_build_sample_offset(struct lp_build_context *bld, - const struct util_format_description *format_desc, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_ptr); - - -void -lp_build_sample_soa(LLVMBuilderRef builder, - const struct lp_sampler_static_state *static_state, - struct lp_sampler_dynamic_state *dynamic_state, - struct lp_type fp_type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel); - - - -#endif /* LP_BLD_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c deleted file mode 100644 index 854dd0b28c2..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ /dev/null @@ -1,598 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Texture sampling -- SoA. - * - * @author Jose Fonseca - */ - -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_debug.h" -#include "util/u_debug_dump.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_format.h" -#include "util/u_cpu_detect.h" -#include "lp_bld_debug.h" -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_conv.h" -#include "lp_bld_arit.h" -#include "lp_bld_logic.h" -#include "lp_bld_swizzle.h" -#include "lp_bld_pack.h" -#include "lp_bld_format.h" -#include "lp_bld_sample.h" - - -/** - * Keep all information for sampling code generation in a single place. - */ -struct lp_build_sample_context -{ - LLVMBuilderRef builder; - - const struct lp_sampler_static_state *static_state; - - struct lp_sampler_dynamic_state *dynamic_state; - - const struct util_format_description *format_desc; - - /** Incoming coordinates type and build context */ - struct lp_type coord_type; - struct lp_build_context coord_bld; - - /** Integer coordinates */ - struct lp_type int_coord_type; - struct lp_build_context int_coord_bld; - - /** Output texels type and build context */ - struct lp_type texel_type; - struct lp_build_context texel_bld; -}; - - -static void -lp_build_sample_texel_soa(struct lp_build_sample_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) -{ - LLVMValueRef offset; - LLVMValueRef packed; - - offset = lp_build_sample_offset(&bld->int_coord_bld, - bld->format_desc, - x, y, y_stride, - data_ptr); - - assert(bld->format_desc->block.width == 1); - assert(bld->format_desc->block.height == 1); - assert(bld->format_desc->block.bits <= bld->texel_type.width); - - packed = lp_build_gather(bld->builder, - bld->texel_type.length, - bld->format_desc->block.bits, - bld->texel_type.width, - data_ptr, offset); - - lp_build_unpack_rgba_soa(bld->builder, - bld->format_desc, - bld->texel_type, - packed, texel); -} - - -static LLVMValueRef -lp_build_sample_packed(struct lp_build_sample_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_ptr) -{ - LLVMValueRef offset; - - offset = lp_build_sample_offset(&bld->int_coord_bld, - bld->format_desc, - x, y, y_stride, - data_ptr); - - assert(bld->format_desc->block.width == 1); - assert(bld->format_desc->block.height == 1); - assert(bld->format_desc->block.bits <= bld->texel_type.width); - - return lp_build_gather(bld->builder, - bld->texel_type.length, - bld->format_desc->block.bits, - bld->texel_type.width, - data_ptr, offset); -} - - -static LLVMValueRef -lp_build_sample_wrap(struct lp_build_sample_context *bld, - LLVMValueRef coord, - LLVMValueRef length, - boolean is_pot, - unsigned wrap_mode) -{ - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef length_minus_one; - - length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); - - switch(wrap_mode) { - case PIPE_TEX_WRAP_REPEAT: - if(is_pot) - coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); - else - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ - coord = LLVMBuildURem(bld->builder, coord, length, ""); - break; - - case PIPE_TEX_WRAP_CLAMP: - coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); - coord = lp_build_min(int_coord_bld, coord, length_minus_one); - break; - - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - case PIPE_TEX_WRAP_MIRROR_REPEAT: - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - /* FIXME */ - _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n", - debug_dump_tex_wrap(wrap_mode, TRUE)); - coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); - coord = lp_build_min(int_coord_bld, coord, length_minus_one); - break; - - default: - assert(0); - } - - return coord; -} - - -static void -lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) -{ - LLVMValueRef x; - LLVMValueRef y; - - x = lp_build_ifloor(&bld->coord_bld, s); - y = lp_build_ifloor(&bld->coord_bld, t); - lp_build_name(x, "tex.x.floor"); - lp_build_name(y, "tex.y.floor"); - - x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_name(x, "tex.x.wrapped"); - lp_build_name(y, "tex.y.wrapped"); - - lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); -} - - -static void -lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) -{ - LLVMValueRef half; - LLVMValueRef s_ipart; - LLVMValueRef t_ipart; - LLVMValueRef s_fpart; - LLVMValueRef t_fpart; - LLVMValueRef x0, x1; - LLVMValueRef y0, y1; - LLVMValueRef neighbors[2][2][4]; - unsigned chan; - - half = lp_build_const_scalar(bld->coord_type, 0.5); - s = lp_build_sub(&bld->coord_bld, s, half); - t = lp_build_sub(&bld->coord_bld, t, half); - - s_ipart = lp_build_floor(&bld->coord_bld, s); - t_ipart = lp_build_floor(&bld->coord_bld, t); - - s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); - t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); - - x0 = lp_build_itrunc(&bld->coord_bld, s_ipart); - y0 = lp_build_itrunc(&bld->coord_bld, t_ipart); - - x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); - - x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); - y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); - - x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); - - lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]); - - /* TODO: Don't interpolate missing channels */ - for(chan = 0; chan < 4; ++chan) { - texel[chan] = lp_build_lerp_2d(&bld->texel_bld, - s_fpart, t_fpart, - neighbors[0][0][chan], - neighbors[0][1][chan], - neighbors[1][0][chan], - neighbors[1][1][chan]); - } -} - - -static void -lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, - struct lp_type dst_type, - LLVMValueRef packed, - LLVMValueRef *rgba) -{ - LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); - unsigned chan; - - /* Decode the input vector components */ - for (chan = 0; chan < 4; ++chan) { - unsigned start = chan*8; - unsigned stop = start + 8; - LLVMValueRef input; - - input = packed; - - if(start) - input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); - - if(stop < 32) - input = LLVMBuildAnd(builder, input, mask, ""); - - input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); - - rgba[chan] = input; - } -} - - -static void -lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) -{ - LLVMBuilderRef builder = bld->builder; - struct lp_build_context i32, h16, u8n; - LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; - LLVMValueRef i32_c8, i32_c128, i32_c255; - LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; - LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; - LLVMValueRef x0, x1; - LLVMValueRef y0, y1; - LLVMValueRef neighbors[2][2]; - LLVMValueRef neighbors_lo[2][2]; - LLVMValueRef neighbors_hi[2][2]; - LLVMValueRef packed, packed_lo, packed_hi; - LLVMValueRef unswizzled[4]; - - lp_build_context_init(&i32, builder, lp_type_int(32)); - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); - - i32_vec_type = lp_build_vec_type(i32.type); - h16_vec_type = lp_build_vec_type(h16.type); - u8n_vec_type = lp_build_vec_type(u8n.type); - - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - - s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); - t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); - - i32_c128 = lp_build_int_const_scalar(i32.type, -128); - s = LLVMBuildAdd(builder, s, i32_c128, ""); - t = LLVMBuildAdd(builder, t, i32_c128, ""); - - i32_c8 = lp_build_int_const_scalar(i32.type, 8); - s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); - t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); - - i32_c255 = lp_build_int_const_scalar(i32.type, 255); - s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); - t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); - - x0 = s_ipart; - y0 = t_ipart; - - x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); - - x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); - y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); - - x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); - y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); - - /* - * Transform 4 x i32 in - * - * s_fpart = {s0, s1, s2, s3} - * - * into 8 x i16 - * - * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} - * - * into two 8 x i16 - * - * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} - * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} - * - * and likewise for t_fpart. There is no risk of loosing precision here - * since the fractional parts only use the lower 8bits. - */ - - s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); - t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); - - { - LLVMTypeRef elem_type = LLVMInt32Type(); - LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shuffle_lo; - LLVMValueRef shuffle_hi; - unsigned i, j; - - for(j = 0; j < h16.type.length; j += 4) { - unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; - LLVMValueRef index; - - index = LLVMConstInt(elem_type, j/2 + subindex, 0); - for(i = 0; i < 4; ++i) - shuffles_lo[j + i] = index; - - index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); - for(i = 0; i < 4; ++i) - shuffles_hi[j + i] = index; - } - - shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); - shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); - - s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); - t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); - s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); - t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); - } - - /* - * Fetch the pixels as 4 x 32bit (rgba order might differ): - * - * rgba0 rgba1 rgba2 rgba3 - * - * bit cast them into 16 x u8 - * - * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 - * - * unpack them into two 8 x i16: - * - * r0 g0 b0 a0 r1 g1 b1 a1 - * r2 g2 b2 a2 r3 g3 b3 a3 - * - * The higher 8 bits of the resulting elements will be zero. - */ - - neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); - neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); - neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); - neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); - - neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); - neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); - neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); - neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); - - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); - - /* - * Linear interpolate with 8.8 fixed point. - */ - - packed_lo = lp_build_lerp_2d(&h16, - s_fpart_lo, t_fpart_lo, - neighbors_lo[0][0], - neighbors_lo[0][1], - neighbors_lo[1][0], - neighbors_lo[1][1]); - - packed_hi = lp_build_lerp_2d(&h16, - s_fpart_hi, t_fpart_hi, - neighbors_hi[0][0], - neighbors_hi[0][1], - neighbors_hi[1][0], - neighbors_hi[1][1]); - - packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); - - /* - * Convert to SoA and swizzle. - */ - - packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); - - lp_build_rgba8_to_f32_soa(bld->builder, - bld->texel_type, - packed, unswizzled); - - lp_build_format_swizzle_soa(bld->format_desc, - bld->texel_type, unswizzled, - texel); -} - - -static void -lp_build_sample_compare(struct lp_build_sample_context *bld, - LLVMValueRef p, - LLVMValueRef *texel) -{ - struct lp_build_context *texel_bld = &bld->texel_bld; - LLVMValueRef res; - unsigned chan; - - if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) - return; - - /* TODO: Compare before swizzling, to avoid redundant computations */ - res = NULL; - for(chan = 0; chan < 4; ++chan) { - LLVMValueRef cmp; - cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); - cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); - - if(res) - res = lp_build_add(texel_bld, res, cmp); - else - res = cmp; - } - - assert(res); - res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for(chan = 0; chan < 3; ++chan) - texel[chan] = res; - texel[3] = texel_bld->one; -} - - -void -lp_build_sample_soa(LLVMBuilderRef builder, - const struct lp_sampler_static_state *static_state, - struct lp_sampler_dynamic_state *dynamic_state, - struct lp_type type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct lp_build_sample_context bld; - LLVMValueRef width; - LLVMValueRef height; - LLVMValueRef stride; - LLVMValueRef data_ptr; - LLVMValueRef s; - LLVMValueRef t; - LLVMValueRef p; - - /* Setup our build context */ - memset(&bld, 0, sizeof bld); - bld.builder = builder; - bld.static_state = static_state; - bld.dynamic_state = dynamic_state; - bld.format_desc = util_format_description(static_state->format); - bld.coord_type = type; - bld.int_coord_type = lp_int_type(type); - bld.texel_type = type; - lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); - lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); - lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); - - /* Get the dynamic state */ - width = dynamic_state->width(dynamic_state, builder, unit); - height = dynamic_state->height(dynamic_state, builder, unit); - stride = dynamic_state->stride(dynamic_state, builder, unit); - data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); - - s = coords[0]; - t = coords[1]; - p = coords[2]; - - width = lp_build_broadcast_scalar(&bld.int_coord_bld, width); - height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); - stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); - - if(static_state->target == PIPE_TEXTURE_1D) - t = bld.coord_bld.zero; - - if(static_state->normalized_coords) { - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); - LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, ""); - s = lp_build_mul(&bld.coord_bld, s, fp_width); - t = lp_build_mul(&bld.coord_bld, t, fp_height); - } - - switch (static_state->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); - break; - case PIPE_TEX_FILTER_LINEAR: - if(lp_format_is_rgba8(bld.format_desc)) - lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); - else - lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); - break; - default: - assert(0); - } - - /* FIXME: respect static_state->min_mip_filter */; - /* FIXME: respect static_state->mag_img_filter */; - /* FIXME: respect static_state->prefilter */; - - lp_build_sample_compare(&bld, p, texel); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c deleted file mode 100644 index 3998ac374fe..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c +++ /dev/null @@ -1,72 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Helper functions for manipulation structures. - * - * @author Jose Fonseca - */ - - -#include "util/u_debug.h" -#include "util/u_memory.h" - -#include "lp_bld_debug.h" -#include "lp_bld_struct.h" - - -LLVMValueRef -lp_build_struct_get_ptr(LLVMBuilderRef builder, - LLVMValueRef ptr, - unsigned member, - const char *name) -{ - LLVMValueRef indices[2]; - LLVMValueRef member_ptr; - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); - lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); - return member_ptr; -} - - -LLVMValueRef -lp_build_struct_get(LLVMBuilderRef builder, - LLVMValueRef ptr, - unsigned member, - const char *name) -{ - LLVMValueRef member_ptr; - LLVMValueRef res; - member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); - res = LLVMBuildLoad(builder, member_ptr, ""); - lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); - return res; -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h deleted file mode 100644 index 740392f5611..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h +++ /dev/null @@ -1,75 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper functions for type conversions. - * - * @author Jose Fonseca - */ - - -#ifndef LP_BLD_STRUCT_H -#define LP_BLD_STRUCT_H - - -#include -#include - -#include "util/u_debug.h" -#include "util/u_memory.h" - - -#define LP_CHECK_STRUCT_SIZE(_ctype, _ltarget, _ltype) \ - assert(LLVMABISizeOfType(_ltarget, _ltype) == \ - sizeof(_ctype)) - -#define LP_CHECK_MEMBER_OFFSET(_ctype, _cmember, _ltarget, _ltype, _lindex) \ - assert(LLVMOffsetOfElement(_ltarget, _ltype, _lindex) == \ - offsetof(_ctype, _cmember)) - - -/** - * Get value pointer to a structure member. - */ -LLVMValueRef -lp_build_struct_get_ptr(LLVMBuilderRef builder, - LLVMValueRef ptr, - unsigned member, - const char *name); - -/** - * Get the value of a structure member. - */ -LLVMValueRef -lp_build_struct_get(LLVMBuilderRef builder, - LLVMValueRef ptr, - unsigned member, - const char *name); - - -#endif /* !LP_BLD_STRUCT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c deleted file mode 100644 index 64e81f7b1fe..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ /dev/null @@ -1,239 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper functions for swizzling/shuffling. - * - * @author Jose Fonseca - */ - - -#include "util/u_debug.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_logic.h" -#include "lp_bld_swizzle.h" - - -LLVMValueRef -lp_build_broadcast(LLVMBuilderRef builder, - LLVMTypeRef vec_type, - LLVMValueRef scalar) -{ - const unsigned n = LLVMGetVectorSize(vec_type); - LLVMValueRef res; - unsigned i; - - res = LLVMGetUndef(vec_type); - for(i = 0; i < n; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildInsertElement(builder, res, scalar, index, ""); - } - - return res; -} - - -LLVMValueRef -lp_build_broadcast_scalar(struct lp_build_context *bld, - LLVMValueRef scalar) -{ - const struct lp_type type = bld->type; - LLVMValueRef res; - unsigned i; - - res = bld->undef; - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildInsertElement(bld->builder, res, scalar, index, ""); - } - - return res; -} - - -LLVMValueRef -lp_build_broadcast_aos(struct lp_build_context *bld, - LLVMValueRef a, - unsigned channel) -{ - const struct lp_type type = bld->type; - const unsigned n = type.length; - unsigned i, j; - - if(a == bld->undef || a == bld->zero || a == bld->one) - return a; - - /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing - * using shuffles here actually causes worst results. More investigation is - * needed. */ - if (n <= 4) { - /* - * Shuffle. - */ - LLVMTypeRef elem_type = LLVMInt32Type(); - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); - - return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); - } - else { - /* - * Bit mask and recursive shifts - * - * XYZW XYZW .... XYZW <= input - * 0Y00 0Y00 .... 0Y00 - * YY00 YY00 .... YY00 - * YYYY YYYY .... YYYY <= output - */ - struct lp_type type4 = type; - const char shifts[4][2] = { - { 1, 2}, - {-1, 2}, - { 1, -2}, - {-1, -2} - }; - boolean cond[4]; - unsigned i; - - memset(cond, 0, sizeof cond); - cond[channel] = 1; - - a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), ""); - - type4.width *= 4; - type4.length /= 4; - - a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), ""); - - for(i = 0; i < 2; ++i) { - LLVMValueRef tmp = NULL; - int shift = shifts[channel][i]; - -#ifdef PIPE_ARCH_LITTLE_ENDIAN - shift = -shift; -#endif - - if(shift > 0) - tmp = LLVMBuildLShr(bld->builder, a, lp_build_int_const_scalar(type4, shift*type.width), ""); - if(shift < 0) - tmp = LLVMBuildShl(bld->builder, a, lp_build_int_const_scalar(type4, -shift*type.width), ""); - - assert(tmp); - if(tmp) - a = LLVMBuildOr(bld->builder, a, tmp, ""); - } - - return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), ""); - } -} - - -LLVMValueRef -lp_build_swizzle1_aos(struct lp_build_context *bld, - LLVMValueRef a, - const unsigned char swizzle[4]) -{ - const unsigned n = bld->type.length; - unsigned i, j; - - if(a == bld->undef || a == bld->zero || a == bld->one) - return a; - - if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3]) - return lp_build_broadcast_aos(bld, a, swizzle[0]); - - { - /* - * Shuffle. - */ - LLVMTypeRef elem_type = LLVMInt32Type(); - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0); - - return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); - } -} - - -LLVMValueRef -lp_build_swizzle2_aos(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b, - const unsigned char swizzle[4]) -{ - const unsigned n = bld->type.length; - unsigned i, j; - - if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4) - return lp_build_swizzle1_aos(bld, a, swizzle); - - if(a == b) { - unsigned char swizzle1[4]; - swizzle1[0] = swizzle[0] % 4; - swizzle1[1] = swizzle[1] % 4; - swizzle1[2] = swizzle[2] % 4; - swizzle1[3] = swizzle[3] % 4; - return lp_build_swizzle1_aos(bld, a, swizzle1); - } - - if(swizzle[0] % 4 == 0 && - swizzle[1] % 4 == 1 && - swizzle[2] % 4 == 2 && - swizzle[3] % 4 == 3) { - boolean cond[4]; - cond[0] = swizzle[0] / 4; - cond[1] = swizzle[1] / 4; - cond[2] = swizzle[2] / 4; - cond[3] = swizzle[3] / 4; - return lp_build_select_aos(bld, a, b, cond); - } - - { - /* - * Shuffle. - */ - LLVMTypeRef elem_type = LLVMInt32Type(); - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0); - - return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); - } -} - - diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h deleted file mode 100644 index b9472127a63..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ /dev/null @@ -1,91 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Helper functions for swizzling/shuffling. - * - * @author Jose Fonseca - */ - - -#ifndef LP_BLD_SWIZZLE_H -#define LP_BLD_SWIZZLE_H - - -#include - - -struct lp_type; -struct lp_build_context; - - -LLVMValueRef -lp_build_broadcast(LLVMBuilderRef builder, - LLVMTypeRef vec_type, - LLVMValueRef scalar); - - -LLVMValueRef -lp_build_broadcast_scalar(struct lp_build_context *bld, - LLVMValueRef scalar); - - -/** - * Broadcast one channel of a vector composed of arrays of XYZW structures into - * all four channel. - */ -LLVMValueRef -lp_build_broadcast_aos(struct lp_build_context *bld, - LLVMValueRef a, - unsigned channel); - - -/** - * Swizzle a vector consisting of an array of XYZW structs. - * - * @param swizzle is the in [0,4[ range. - */ -LLVMValueRef -lp_build_swizzle1_aos(struct lp_build_context *bld, - LLVMValueRef a, - const unsigned char swizzle[4]); - - -/** - * Swizzle two vector consisting of an array of XYZW structs. - * - * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b. - */ -LLVMValueRef -lp_build_swizzle2_aos(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b, - const unsigned char swizzle[4]); - - -#endif /* !LP_BLD_SWIZZLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h deleted file mode 100644 index eddb7a83fa2..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ /dev/null @@ -1,84 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * TGSI to LLVM IR translation. - * - * @author Jose Fonseca - */ - -#ifndef LP_BLD_TGSI_H -#define LP_BLD_TGSI_H - -#include - - -struct tgsi_token; -struct lp_type; -struct lp_build_context; -struct lp_build_mask_context; - - -/** - * Sampler code generation interface. - * - * Although texture sampling is a requirement for TGSI translation, it is - * a very different problem with several different approaches to it. This - * structure establishes an interface for texture sampling code generation, so - * that we can easily use different texture sampling strategies. - */ -struct lp_build_sampler_soa -{ - void - (*destroy)( struct lp_build_sampler_soa *sampler ); - - void - (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, - LLVMBuilderRef builder, - struct lp_type type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel); -}; - - -void -lp_build_tgsi_soa(LLVMBuilderRef builder, - const struct tgsi_token *tokens, - struct lp_type type, - struct lp_build_mask_context *mask, - LLVMValueRef consts_ptr, - const LLVMValueRef *pos, - const LLVMValueRef (*inputs)[4], - LLVMValueRef (*outputs)[4], - struct lp_build_sampler_soa *sampler); - - -#endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c deleted file mode 100644 index 85e3b1bdd42..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ /dev/null @@ -1,1467 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * TGSI to LLVM IR translation -- SoA. - * - * @author Jose Fonseca - * - * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, - * Brian Paul, and others. - */ - -#include "pipe/p_config.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_debug.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_exec.h" -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_arit.h" -#include "lp_bld_logic.h" -#include "lp_bld_swizzle.h" -#include "lp_bld_flow.h" -#include "lp_bld_tgsi.h" - - -#define LP_MAX_TEMPS 256 -#define LP_MAX_IMMEDIATES 256 - - -#define FOR_EACH_CHANNEL( CHAN )\ - for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) - -#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) - -#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ - FOR_EACH_CHANNEL( CHAN )\ - IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - -#define QUAD_TOP_LEFT 0 -#define QUAD_TOP_RIGHT 1 -#define QUAD_BOTTOM_LEFT 2 -#define QUAD_BOTTOM_RIGHT 3 - - -struct lp_build_tgsi_soa_context -{ - struct lp_build_context base; - - LLVMValueRef consts_ptr; - const LLVMValueRef *pos; - const LLVMValueRef (*inputs)[NUM_CHANNELS]; - LLVMValueRef (*outputs)[NUM_CHANNELS]; - - struct lp_build_sampler_soa *sampler; - - LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; - LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; - - struct lp_build_mask_context *mask; -}; - - -static const unsigned char -swizzle_left[4] = { - QUAD_TOP_LEFT, QUAD_TOP_LEFT, - QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT -}; - -static const unsigned char -swizzle_right[4] = { - QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, - QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT -}; - -static const unsigned char -swizzle_top[4] = { - QUAD_TOP_LEFT, QUAD_TOP_RIGHT, - QUAD_TOP_LEFT, QUAD_TOP_RIGHT -}; - -static const unsigned char -swizzle_bottom[4] = { - QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, - QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT -}; - - -static LLVMValueRef -emit_ddx(struct lp_build_tgsi_soa_context *bld, - LLVMValueRef src) -{ - LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); - LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); - return lp_build_sub(&bld->base, src_right, src_left); -} - - -static LLVMValueRef -emit_ddy(struct lp_build_tgsi_soa_context *bld, - LLVMValueRef src) -{ - LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); - LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); - return lp_build_sub(&bld->base, src_top, src_bottom); -} - - -/** - * Register fetch. - */ -static LLVMValueRef -emit_fetch( - struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst, - unsigned index, - const unsigned chan_index ) -{ - const struct tgsi_full_src_register *reg = &inst->Src[index]; - unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); - LLVMValueRef res; - - switch (swizzle) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - - switch (reg->Register.File) { - case TGSI_FILE_CONSTANT: { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); - LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); - LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); - res = lp_build_broadcast_scalar(&bld->base, scalar); - break; - } - - case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->Register.Index][swizzle]; - assert(res); - break; - - case TGSI_FILE_INPUT: - res = bld->inputs[reg->Register.Index][swizzle]; - assert(res); - break; - - case TGSI_FILE_TEMPORARY: - res = bld->temps[reg->Register.Index][swizzle]; - if(!res) - return bld->base.undef; - break; - - default: - assert( 0 ); - return bld->base.undef; - } - break; - - default: - assert( 0 ); - return bld->base.undef; - } - - switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { - case TGSI_UTIL_SIGN_CLEAR: - res = lp_build_abs( &bld->base, res ); - break; - - case TGSI_UTIL_SIGN_SET: - /* TODO: Use bitwese OR for floating point */ - res = lp_build_abs( &bld->base, res ); - res = LLVMBuildNeg( bld->base.builder, res, "" ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - res = LLVMBuildNeg( bld->base.builder, res, "" ); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } - - return res; -} - - -/** - * Register fetch with derivatives. - */ -static void -emit_fetch_deriv( - struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst, - unsigned index, - const unsigned chan_index, - LLVMValueRef *res, - LLVMValueRef *ddx, - LLVMValueRef *ddy) -{ - LLVMValueRef src; - - src = emit_fetch(bld, inst, index, chan_index); - - if(res) - *res = src; - - /* TODO: use interpolation coeffs for inputs */ - - if(ddx) - *ddx = emit_ddx(bld, src); - - if(ddy) - *ddy = emit_ddy(bld, src); -} - - -/** - * Register store. - */ -static void -emit_store( - struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst, - unsigned index, - unsigned chan_index, - LLVMValueRef value) -{ - const struct tgsi_full_dst_register *reg = &inst->Dst[index]; - - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - - case TGSI_SAT_ZERO_ONE: - value = lp_build_max(&bld->base, value, bld->base.zero); - value = lp_build_min(&bld->base, value, bld->base.one); - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0)); - value = lp_build_min(&bld->base, value, bld->base.one); - break; - - default: - assert(0); - } - - switch( reg->Register.File ) { - case TGSI_FILE_OUTPUT: - bld->outputs[reg->Register.Index][chan_index] = value; - break; - - case TGSI_FILE_TEMPORARY: - bld->temps[reg->Register.Index][chan_index] = value; - break; - - case TGSI_FILE_ADDRESS: - /* FIXME */ - assert(0); - break; - - default: - assert( 0 ); - } -} - - -/** - * High-level instruction translators. - */ - - -static void -emit_tex( struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst, - boolean apply_lodbias, - boolean projected, - LLVMValueRef *texel) -{ - const uint unit = inst->Src[1].Register.Index; - LLVMValueRef lodbias; - LLVMValueRef oow = NULL; - LLVMValueRef coords[3]; - unsigned num_coords; - unsigned i; - - switch (inst->Texture.Texture) { - case TGSI_TEXTURE_1D: - num_coords = 1; - break; - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - num_coords = 2; - break; - case TGSI_TEXTURE_SHADOW1D: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - num_coords = 3; - break; - default: - assert(0); - return; - } - - if(apply_lodbias) - lodbias = emit_fetch( bld, inst, 0, 3 ); - else - lodbias = bld->base.zero; - - if (projected) { - oow = emit_fetch( bld, inst, 0, 3 ); - oow = lp_build_rcp(&bld->base, oow); - } - - for (i = 0; i < num_coords; i++) { - coords[i] = emit_fetch( bld, inst, 0, i ); - if (projected) - coords[i] = lp_build_mul(&bld->base, coords[i], oow); - } - for (i = num_coords; i < 3; i++) { - coords[i] = bld->base.undef; - } - - bld->sampler->emit_fetch_texel(bld->sampler, - bld->base.builder, - bld->base.type, - unit, num_coords, coords, lodbias, - texel); -} - - -static void -emit_kil( - struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst ) -{ - const struct tgsi_full_src_register *reg = &inst->Src[0]; - LLVMValueRef terms[NUM_CHANNELS]; - LLVMValueRef mask; - unsigned chan_index; - - memset(&terms, 0, sizeof terms); - - FOR_EACH_CHANNEL( chan_index ) { - unsigned swizzle; - - /* Unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); - - /* Check if the component has not been already tested. */ - assert(swizzle < NUM_CHANNELS); - if( !terms[swizzle] ) - /* TODO: change the comparison operator instead of setting the sign */ - terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); - } - - mask = NULL; - FOR_EACH_CHANNEL( chan_index ) { - if(terms[chan_index]) { - LLVMValueRef chan_mask; - - chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); - - if(mask) - mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); - else - mask = chan_mask; - } - } - - if(mask) - lp_build_mask_update(bld->mask, mask); -} - - -/** - * Check if inst src/dest regs use indirect addressing into temporary - * register file. - */ -static boolean -indirect_temp_reference(const struct tgsi_full_instruction *inst) -{ - uint i; - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->Src[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; - } - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->Dst[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; - } - return FALSE; -} - - -static int -emit_instruction( - struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst, - const struct tgsi_opcode_info *info) -{ - unsigned chan_index; - LLVMValueRef src0, src1, src2; - LLVMValueRef tmp0, tmp1, tmp2; - LLVMValueRef tmp3 = NULL; - LLVMValueRef tmp4 = NULL; - LLVMValueRef tmp5 = NULL; - LLVMValueRef tmp6 = NULL; - LLVMValueRef tmp7 = NULL; - LLVMValueRef res; - LLVMValueRef dst0[NUM_CHANNELS]; - - /* we can't handle indirect addressing into temp register file yet */ - if (indirect_temp_reference(inst)) - return FALSE; - - assert(info->num_dst <= 1); - if(info->num_dst) { - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = bld->base.undef; - } - } - - switch (inst->Instruction.Opcode) { -#if 0 - case TGSI_OPCODE_ARL: - /* FIXME */ - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr(bld, 0, 0); - emit_f2it( bld, 0 ); - dst0[chan_index] = tmp0; - } - break; -#endif - - case TGSI_OPCODE_MOV: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { - dst0[CHAN_X] = bld->base.one; - } - if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { - src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); - } - if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { - /* XMM[1] = SrcReg[0].yyyy */ - tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); - /* XMM[1] = max(XMM[1], 0) */ - tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); - /* XMM[2] = SrcReg[0].wwww */ - tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); - tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); - dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); - } - if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { - dst0[CHAN_W] = bld->base.one; - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - src0 = emit_fetch( bld, inst, 0, CHAN_X ); - res = lp_build_rcp(&bld->base, src0); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = res; - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - src0 = emit_fetch( bld, inst, 0, CHAN_X ); - src0 = lp_build_abs(&bld->base, src0); - res = lp_build_rsqrt(&bld->base, src0); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = res; - } - break; - - case TGSI_OPCODE_EXP: - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { - LLVMValueRef *p_exp2_int_part = NULL; - LLVMValueRef *p_frac_part = NULL; - LLVMValueRef *p_exp2 = NULL; - - src0 = emit_fetch( bld, inst, 0, CHAN_X ); - - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - p_exp2_int_part = &tmp0; - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) - p_frac_part = &tmp1; - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - p_exp2 = &tmp2; - - lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); - - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - dst0[CHAN_X] = tmp0; - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) - dst0[CHAN_Y] = tmp1; - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - dst0[CHAN_Z] = tmp2; - } - /* dst.w = 1.0 */ - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - dst0[CHAN_W] = bld->base.one; - } - break; - - case TGSI_OPCODE_LOG: - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { - LLVMValueRef *p_floor_log2 = NULL; - LLVMValueRef *p_exp = NULL; - LLVMValueRef *p_log2 = NULL; - - src0 = emit_fetch( bld, inst, 0, CHAN_X ); - src0 = lp_build_abs( &bld->base, src0 ); - - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - p_floor_log2 = &tmp0; - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) - p_exp = &tmp1; - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - p_log2 = &tmp2; - - lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); - - /* dst.x = floor(lg2(abs(src.x))) */ - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - dst0[CHAN_X] = tmp0; - /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { - dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); - } - /* dst.z = lg2(abs(src.x)) */ - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - dst0[CHAN_Z] = tmp2; - } - /* dst.w = 1.0 */ - if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - dst0[CHAN_W] = bld->base.one; - } - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_add(&bld->base, src0, src1); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); - tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); - tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); - tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); - tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); - tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_DST: - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { - dst0[CHAN_X] = bld->base.one; - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { - tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); - tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); - dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; - - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp1 = emit_fetch( bld, inst, 1, chan_index ); - tmp2 = emit_fetch( bld, inst, 2, chan_index ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - tmp0 = lp_build_add( &bld->base, tmp0, tmp2); - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); - } - break; - - case TGSI_OPCODE_LRP: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp0 = lp_build_sub( &bld->base, src1, src2 ); - tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); - dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); - } - break; - - case TGSI_OPCODE_CND: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp1 = lp_build_const_scalar(bld->base.type, 0.5); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); - } - break; - - case TGSI_OPCODE_DP2A: - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ - tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ - tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ - tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ - tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ - } - break; - - case TGSI_OPCODE_FRC: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_floor(&bld->base, src0); - tmp0 = lp_build_sub(&bld->base, src0, tmp0); - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_CLAMP: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp0 = lp_build_max(&bld->base, tmp0, src1); - tmp0 = lp_build_min(&bld->base, tmp0, src2); - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_FLR: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_floor(&bld->base, tmp0); - } - break; - - case TGSI_OPCODE_ROUND: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_round(&bld->base, tmp0); - } - break; - - case TGSI_OPCODE_EX2: { - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_exp2( &bld->base, tmp0); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - } - - case TGSI_OPCODE_LG2: - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_log2( &bld->base, tmp0); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_POW: - src0 = emit_fetch( bld, inst, 0, CHAN_X ); - src1 = emit_fetch( bld, inst, 1, CHAN_X ); - res = lp_build_pow( &bld->base, src0, src1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = res; - } - break; - - case TGSI_OPCODE_XPD: - if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { - tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); - tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); - } - if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { - tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); - tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { - tmp2 = tmp0; - tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); - tmp5 = tmp3; - tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); - tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); - dst0[CHAN_X] = tmp2; - } - if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { - tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); - tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { - tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); - tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); - dst0[CHAN_Y] = tmp3; - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); - tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); - dst0[CHAN_Z] = tmp5; - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - dst0[CHAN_W] = bld->base.one; - } - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); - } - break; - - case TGSI_OPCODE_RCC: - /* deprecated? */ - assert(0); - return 0; - - case TGSI_OPCODE_DPH: - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); - tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); - tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_COS: - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_cos( &bld->base, tmp0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_DDX: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); - } - break; - - case TGSI_OPCODE_DDY: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); - } - break; - - case TGSI_OPCODE_KILP: - /* predicated kill */ - /* FIXME */ - return 0; - break; - - case TGSI_OPCODE_KIL: - /* conditional kill */ - emit_kil( bld, inst ); - break; - - case TGSI_OPCODE_PK2H: - return 0; - break; - - case TGSI_OPCODE_PK2US: - return 0; - break; - - case TGSI_OPCODE_PK4B: - return 0; - break; - - case TGSI_OPCODE_PK4UB: - return 0; - break; - - case TGSI_OPCODE_RFL: - return 0; - break; - - case TGSI_OPCODE_SEQ: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; - - case TGSI_OPCODE_SFL: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = bld->base.zero; - } - break; - - case TGSI_OPCODE_SGT: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; - - case TGSI_OPCODE_SIN: - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_sin( &bld->base, tmp0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_SLE: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; - - case TGSI_OPCODE_SNE: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; - - case TGSI_OPCODE_STR: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = bld->base.one; - } - break; - - case TGSI_OPCODE_TEX: - emit_tex( bld, inst, FALSE, FALSE, dst0 ); - break; - - case TGSI_OPCODE_TXD: - /* FIXME */ - return 0; - break; - - case TGSI_OPCODE_UP2H: - /* deprecated */ - assert (0); - return 0; - break; - - case TGSI_OPCODE_UP2US: - /* deprecated */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_UP4B: - /* deprecated */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_UP4UB: - /* deprecated */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_X2D: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_ARA: - /* deprecated */ - assert(0); - return 0; - break; - -#if 0 - case TGSI_OPCODE_ARR: - /* FIXME */ - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); - emit_f2it( bld, 0 ); - dst0[chan_index] = tmp0; - } - break; -#endif - - case TGSI_OPCODE_BRA: - /* deprecated */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_CAL: - /* FIXME */ - return 0; - break; - - case TGSI_OPCODE_RET: - /* FIXME */ - return 0; - break; - - case TGSI_OPCODE_END: - break; - - case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); - } - break; - - case TGSI_OPCODE_CMP: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); - } - break; - - case TGSI_OPCODE_SCS: - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - dst0[CHAN_Z] = bld->base.zero; - } - IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - dst0[CHAN_W] = bld->base.one; - } - break; - - case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TRUE, FALSE, dst0 ); - break; - - case TGSI_OPCODE_NRM: - /* fall-through */ - case TGSI_OPCODE_NRM4: - /* 3 or 4-component normalization */ - { - uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; - - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || - IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || - IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || - (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { - - /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ - - /* xmm4 = src.x */ - /* xmm0 = src.x * src.x */ - tmp0 = emit_fetch(bld, inst, 0, CHAN_X); - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { - tmp4 = tmp0; - } - tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); - - /* xmm5 = src.y */ - /* xmm0 = xmm0 + src.y * src.y */ - tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { - tmp5 = tmp1; - } - tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - - /* xmm6 = src.z */ - /* xmm0 = xmm0 + src.z * src.z */ - tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { - tmp6 = tmp1; - } - tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - - if (dims == 4) { - /* xmm7 = src.w */ - /* xmm0 = xmm0 + src.w * src.w */ - tmp1 = emit_fetch(bld, inst, 0, CHAN_W); - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { - tmp7 = tmp1; - } - tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - } - - /* xmm1 = 1 / sqrt(xmm0) */ - tmp1 = lp_build_rsqrt( &bld->base, tmp0); - - /* dst.x = xmm1 * src.x */ - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { - dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); - } - - /* dst.y = xmm1 * src.y */ - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { - dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); - } - - /* dst.z = xmm1 * src.z */ - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { - dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); - } - - /* dst.w = xmm1 * src.w */ - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { - dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); - } - } - - /* dst.w = 1.0 */ - if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { - dst0[CHAN_W] = bld->base.one; - } - } - break; - - case TGSI_OPCODE_DIV: - /* deprecated */ - assert( 0 ); - return 0; - break; - - case TGSI_OPCODE_DP2: - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ - tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ - tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ - tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ - } - break; - - case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TRUE, FALSE, dst0 ); - break; - - case TGSI_OPCODE_TXP: - emit_tex( bld, inst, FALSE, TRUE, dst0 ); - break; - - case TGSI_OPCODE_BRK: - /* FIXME */ - return 0; - break; - - case TGSI_OPCODE_IF: - /* FIXME */ - return 0; - break; - - case TGSI_OPCODE_BGNFOR: - /* deprecated */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_REP: - /* deprecated */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_ELSE: - /* FIXME */ - return 0; - break; - - case TGSI_OPCODE_ENDIF: - /* FIXME */ - return 0; - break; - - case TGSI_OPCODE_ENDFOR: - /* deprecated */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_ENDREP: - /* deprecated */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_PUSHA: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_POPA: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_CEIL: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); - } - break; - - case TGSI_OPCODE_I2F: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_NOT: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_TRUNC: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); - } - break; - - case TGSI_OPCODE_SHL: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_ISHR: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_AND: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_OR: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_MOD: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_XOR: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_SAD: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_TXF: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_TXQ: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_CONT: - /* deprecated? */ - assert(0); - return 0; - break; - - case TGSI_OPCODE_EMIT: - return 0; - break; - - case TGSI_OPCODE_ENDPRIM: - return 0; - break; - - case TGSI_OPCODE_NOP: - break; - - default: - return 0; - } - - if(info->num_dst) { - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0[chan_index]); - } - } - - return 1; -} - - -void -lp_build_tgsi_soa(LLVMBuilderRef builder, - const struct tgsi_token *tokens, - struct lp_type type, - struct lp_build_mask_context *mask, - LLVMValueRef consts_ptr, - const LLVMValueRef *pos, - const LLVMValueRef (*inputs)[NUM_CHANNELS], - LLVMValueRef (*outputs)[NUM_CHANNELS], - struct lp_build_sampler_soa *sampler) -{ - struct lp_build_tgsi_soa_context bld; - struct tgsi_parse_context parse; - uint num_immediates = 0; - unsigned i; - - /* Setup build context */ - memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, builder, type); - bld.mask = mask; - bld.pos = pos; - bld.inputs = inputs; - bld.outputs = outputs; - bld.consts_ptr = consts_ptr; - bld.sampler = sampler; - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* Inputs already interpolated */ - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) - _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : ""); - } - - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* simply copy the immediate values into the next immediates[] slot */ - { - const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; - assert(size <= 4); - assert(num_immediates < LP_MAX_IMMEDIATES); - for( i = 0; i < size; ++i ) - bld.immediates[num_immediates][i] = - lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float); - for( i = size; i < 4; ++i ) - bld.immediates[num_immediates][i] = bld.base.undef; - num_immediates++; - } - break; - - default: - assert( 0 ); - } - } - - tgsi_parse_free( &parse ); -} - diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c deleted file mode 100644 index 8270cd057f6..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ /dev/null @@ -1,222 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_debug.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" - - -LLVMTypeRef -lp_build_elem_type(struct lp_type type) -{ - if (type.floating) { - switch(type.width) { - case 32: - return LLVMFloatType(); - break; - case 64: - return LLVMDoubleType(); - break; - default: - assert(0); - return LLVMFloatType(); - } - } - else { - return LLVMIntType(type.width); - } -} - - -LLVMTypeRef -lp_build_vec_type(struct lp_type type) -{ - LLVMTypeRef elem_type = lp_build_elem_type(type); - return LLVMVectorType(elem_type, type.length); -} - - -/** - * This function is a mirror of lp_build_elem_type() above. - * - * XXX: I'm not sure if it wouldn't be easier/efficient to just recreate the - * type and check for identity. - */ -boolean -lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type) -{ - LLVMTypeKind elem_kind; - - assert(elem_type); - if(!elem_type) - return FALSE; - - elem_kind = LLVMGetTypeKind(elem_type); - - if (type.floating) { - switch(type.width) { - case 32: - if(elem_kind != LLVMFloatTypeKind) - return FALSE; - break; - case 64: - if(elem_kind != LLVMDoubleTypeKind) - return FALSE; - break; - default: - assert(0); - return FALSE; - } - } - else { - if(elem_kind != LLVMIntegerTypeKind) - return FALSE; - - if(LLVMGetIntTypeWidth(elem_type) != type.width) - return FALSE; - } - - return TRUE; -} - - -boolean -lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) -{ - LLVMTypeRef elem_type; - - assert(vec_type); - if(!vec_type) - return FALSE; - - if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) - return FALSE; - - if(LLVMGetVectorSize(vec_type) != type.length) - return FALSE; - - elem_type = LLVMGetElementType(vec_type); - - return lp_check_elem_type(type, elem_type); -} - - -boolean -lp_check_value(struct lp_type type, LLVMValueRef val) -{ - LLVMTypeRef vec_type; - - assert(val); - if(!val) - return FALSE; - - vec_type = LLVMTypeOf(val); - - return lp_check_vec_type(type, vec_type); -} - - -LLVMTypeRef -lp_build_int_elem_type(struct lp_type type) -{ - return LLVMIntType(type.width); -} - - -LLVMTypeRef -lp_build_int_vec_type(struct lp_type type) -{ - LLVMTypeRef elem_type = lp_build_int_elem_type(type); - return LLVMVectorType(elem_type, type.length); -} - - -/** - * Build int32[4] vector type - */ -LLVMTypeRef -lp_build_int32_vec4_type(void) -{ - struct lp_type t; - LLVMTypeRef type; - - memset(&t, 0, sizeof(t)); - t.floating = FALSE; /* floating point values */ - t.sign = TRUE; /* values are signed */ - t.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ - t.width = 32; /* 32-bit int */ - t.length = 4; /* 4 elements per vector */ - - type = lp_build_int_elem_type(t); - return LLVMVectorType(type, t.length); -} - - -struct lp_type -lp_int_type(struct lp_type type) -{ - struct lp_type res_type; - - memset(&res_type, 0, sizeof res_type); - res_type.width = type.width; - res_type.length = type.length; - - return res_type; -} - - -/** - * Return the type with twice the bit width (hence half the number of elements). - */ -struct lp_type -lp_wider_type(struct lp_type type) -{ - struct lp_type res_type; - - memcpy(&res_type, &type, sizeof res_type); - res_type.width *= 2; - res_type.length /= 2; - - assert(res_type.length); - - return res_type; -} - - -void -lp_build_context_init(struct lp_build_context *bld, - LLVMBuilderRef builder, - struct lp_type type) -{ - bld->builder = builder; - bld->type = type; - bld->undef = lp_build_undef(type); - bld->zero = lp_build_zero(type); - bld->one = lp_build_one(type); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h deleted file mode 100644 index 62ee05be4df..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ /dev/null @@ -1,273 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Convenient representation of SIMD types. - * - * @author Jose Fonseca - */ - - -#ifndef LP_BLD_TYPE_H -#define LP_BLD_TYPE_H - - -#include - -#include - - -/** - * Native SIMD register width. - * - * 128 for all architectures we care about. - */ -#define LP_NATIVE_VECTOR_WIDTH 128 - -/** - * Several functions can only cope with vectors of length up to this value. - * You may need to increase that value if you want to represent bigger vectors. - */ -#define LP_MAX_VECTOR_LENGTH 16 - - -/** - * The LLVM type system can't conveniently express all the things we care about - * on the types used for intermediate computations, such as signed vs unsigned, - * normalized values, or fixed point. - */ -struct lp_type { - /** - * Floating-point. Cannot be used with fixed. Integer numbers are - * represented by this zero. - */ - unsigned floating:1; - - /** - * Fixed-point. Cannot be used with floating. Integer numbers are - * represented by this zero. - */ - unsigned fixed:1; - - /** - * Whether it can represent negative values or not. - * - * If this is not set for floating point, it means that all values are - * assumed to be positive. - */ - unsigned sign:1; - - /** - * Whether values are normalized to fit [0, 1] interval, or [-1, 1] - * interval for signed types. - * - * For integer types it means the representable integer range should be - * interpreted as the interval above. - * - * For floating and fixed point formats it means the values should be - * clamped to the interval above. - */ - unsigned norm:1; - - /** - * Element width. - * - * For fixed point values, the fixed point is assumed to be at half the - * width. - */ - unsigned width:14; - - /** - * Vector length. - * - * width*length should be a power of two greater or equal to eight. - * - * @sa LP_MAX_VECTOR_LENGTH - */ - unsigned length:14; -}; - - -/** - * We need most of the information here in order to correctly and efficiently - * translate an arithmetic operation into LLVM IR. Putting it here avoids the - * trouble of passing it as parameters. - */ -struct lp_build_context -{ - LLVMBuilderRef builder; - - /** - * This not only describes the input/output LLVM types, but also whether - * to normalize/clamp the results. - */ - struct lp_type type; - - /** Same as lp_build_undef(type) */ - LLVMValueRef undef; - - /** Same as lp_build_zero(type) */ - LLVMValueRef zero; - - /** Same as lp_build_one(type) */ - LLVMValueRef one; -}; - - -static INLINE struct lp_type -lp_type_float(unsigned width) -{ - struct lp_type res_type; - - memset(&res_type, 0, sizeof res_type); - res_type.floating = TRUE; - res_type.sign = TRUE; - res_type.width = width; - res_type.length = LP_NATIVE_VECTOR_WIDTH / width; - - return res_type; -} - - -static INLINE struct lp_type -lp_type_int(unsigned width) -{ - struct lp_type res_type; - - memset(&res_type, 0, sizeof res_type); - res_type.sign = TRUE; - res_type.width = width; - res_type.length = LP_NATIVE_VECTOR_WIDTH / width; - - return res_type; -} - - -static INLINE struct lp_type -lp_type_uint(unsigned width) -{ - struct lp_type res_type; - - memset(&res_type, 0, sizeof res_type); - res_type.width = width; - res_type.length = LP_NATIVE_VECTOR_WIDTH / width; - - return res_type; -} - - -static INLINE struct lp_type -lp_type_unorm(unsigned width) -{ - struct lp_type res_type; - - memset(&res_type, 0, sizeof res_type); - res_type.norm = TRUE; - res_type.width = width; - res_type.length = LP_NATIVE_VECTOR_WIDTH / width; - - return res_type; -} - - -static INLINE struct lp_type -lp_type_fixed(unsigned width) -{ - struct lp_type res_type; - - memset(&res_type, 0, sizeof res_type); - res_type.sign = TRUE; - res_type.fixed = TRUE; - res_type.width = width; - res_type.length = LP_NATIVE_VECTOR_WIDTH / width; - - return res_type; -} - - -static INLINE struct lp_type -lp_type_ufixed(unsigned width) -{ - struct lp_type res_type; - - memset(&res_type, 0, sizeof res_type); - res_type.fixed = TRUE; - res_type.width = width; - res_type.length = LP_NATIVE_VECTOR_WIDTH / width; - - return res_type; -} - - -LLVMTypeRef -lp_build_elem_type(struct lp_type type); - - -LLVMTypeRef -lp_build_vec_type(struct lp_type type); - - -boolean -lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type); - - -boolean -lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type); - - -boolean -lp_check_value(struct lp_type type, LLVMValueRef val); - - -LLVMTypeRef -lp_build_int_elem_type(struct lp_type type); - - -LLVMTypeRef -lp_build_int_vec_type(struct lp_type type); - - -LLVMTypeRef -lp_build_int32_vec4_type(void); - - -struct lp_type -lp_int_type(struct lp_type type); - - -struct lp_type -lp_wider_type(struct lp_type type); - - -void -lp_build_context_init(struct lp_build_context *bld, - LLVMBuilderRef builder, - struct lp_type type); - - -#endif /* !LP_BLD_TYPE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 310fc2b8479..d3d7e26882d 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -39,7 +39,7 @@ #include "util/u_cpu_detect.h" #include "lp_debug.h" #include "lp_screen.h" -#include "lp_bld_intr.h" +#include "gallivm/lp_bld_intr.h" #include "lp_jit.h" diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 9cbe1bd3b1b..8df3015d4b4 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -36,7 +36,7 @@ #define LP_JIT_H -#include "lp_bld_struct.h" +#include "gallivm/lp_bld_struct.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index cee20d8c687..7855e03bed1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -37,7 +37,7 @@ #include "lp_rast.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" -#include "lp_bld_debug.h" +#include "gallivm/lp_bld_debug.h" #include "lp_scene.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 4242653c909..8f68f12bed7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -36,7 +36,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" -#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */ +#include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ #define LP_NEW_VIEWPORT 0x1 diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c5f6df23a16..f37c21f3fe2 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -72,19 +72,19 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_parse.h" -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_conv.h" -#include "lp_bld_intr.h" -#include "lp_bld_logic.h" -#include "lp_bld_depth.h" -#include "lp_bld_interp.h" -#include "lp_bld_tgsi.h" -#include "lp_bld_alpha.h" -#include "lp_bld_blend.h" -#include "lp_bld_swizzle.h" -#include "lp_bld_flow.h" -#include "lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_depth.h" +#include "gallivm/lp_bld_interp.h" +#include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_alpha.h" +#include "gallivm/lp_bld_blend.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" #include "lp_buffer.h" #include "lp_context.h" #include "lp_debug.h" diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 39d80726e65..ca0f737b295 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -53,7 +53,7 @@ #include "util/u_math.h" #include "util/u_debug_dump.h" -#include "lp_bld_type.h" +#include "gallivm/lp_bld_type.h" #define LP_TEST_NUM_SAMPLES 32 diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index ee72f6ce4fc..e49b7055982 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -37,9 +37,9 @@ */ -#include "lp_bld_type.h" -#include "lp_bld_blend.h" -#include "lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_blend.h" +#include "gallivm/lp_bld_debug.h" #include "lp_test.h" diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index c1abee424c9..958cc40538e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -34,10 +34,10 @@ */ -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_conv.h" -#include "lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_debug.h" #include "lp_test.h" diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 2b258f1052e..48828bd0a0f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -38,7 +38,7 @@ #include "util/u_cpu_detect.h" #include "util/u_format.h" -#include "lp_bld_format.h" +#include "gallivm/lp_bld_format.h" #include "lp_test.h" diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 314544aa9a6..14ff00469b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -36,8 +36,8 @@ #include "util/u_cpu_detect.h" -#include "lp_bld_const.h" -#include "lp_bld_misc.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_misc.h" #include "lp_test.h" diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index 7f55f1ae83f..2533275dc18 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -42,10 +42,10 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "lp_bld_debug.h" -#include "lp_bld_type.h" -#include "lp_bld_sample.h" -#include "lp_bld_tgsi.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_sample.h" +#include "gallivm/lp_bld_tgsi.h" #include "lp_jit.h" #include "lp_tex_sample.h" -- cgit v1.2.3 From 38076c99505773de7c3e387ef47ec24548d07961 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 8 Feb 2010 17:51:48 -0700 Subject: gallivm: added lp_bld_misc.cpp to sources list --- src/gallium/auxiliary/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index e952e6affd7..1bc1f6b9fa4 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -184,6 +184,7 @@ if drawllvm: 'gallivm/lp_bld_interp.c', 'gallivm/lp_bld_intr.c', 'gallivm/lp_bld_logic.c', + 'gallivm/lp_bld_misc.cpp', 'gallivm/lp_bld_pack.c', 'gallivm/lp_bld_sample.c', 'gallivm/lp_bld_sample_soa.c', -- cgit v1.2.3 From 5ee856e195ca3745d6460c0874c2c9463cf359df Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 3 Feb 2010 11:19:04 +0100 Subject: tgsi: Remove tgsi_dump_c.[ch]. Little utility after development stabilisation, use tgsi_dump instead. --- src/gallium/auxiliary/SConscript | 1 - src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 462 ------------------------------- src/gallium/auxiliary/tgsi/tgsi_dump_c.h | 49 ---- 3 files changed, 512 deletions(-) delete mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump_c.c delete mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump_c.h (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 1bc1f6b9fa4..9709344b546 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -109,7 +109,6 @@ source = [ 'rtasm/rtasm_ppc_spe.c', 'tgsi/tgsi_build.c', 'tgsi/tgsi_dump.c', - 'tgsi/tgsi_dump_c.c', 'tgsi/tgsi_exec.c', 'tgsi/tgsi_info.c', 'tgsi/tgsi_iterate.c', diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c deleted file mode 100644 index 47fd1dd590e..00000000000 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ /dev/null @@ -1,462 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_debug.h" -#include "util/u_string.h" -#include "tgsi_dump_c.h" -#include "tgsi_build.h" -#include "tgsi_info.h" -#include "tgsi_parse.h" - -static void -dump_enum( - const unsigned e, - const char **enums, - const unsigned enums_count ) -{ - if (e >= enums_count) { - debug_printf( "%u", e ); - } - else { - debug_printf( "%s", enums[e] ); - } -} - -#define EOL() debug_printf( "\n" ) -#define TXT(S) debug_printf( "%s", S ) -#define CHR(C) debug_printf( "%c", C ) -#define UIX(I) debug_printf( "0x%x", I ) -#define UID(I) debug_printf( "%u", I ) -#define SID(I) debug_printf( "%d", I ) -#define FLT(F) debug_printf( "%10.4f", F ) -#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) - -static const char *TGSI_PROCESSOR_TYPES[] = -{ - "PROCESSOR_FRAGMENT", - "PROCESSOR_VERTEX", - "PROCESSOR_GEOMETRY" -}; - -static const char *TGSI_TOKEN_TYPES[] = -{ - "TOKEN_TYPE_DECLARATION", - "TOKEN_TYPE_IMMEDIATE", - "TOKEN_TYPE_INSTRUCTION" -}; - -static const char *TGSI_FILES[TGSI_FILE_COUNT] = -{ - "FILE_NULL", - "FILE_CONSTANT", - "FILE_INPUT", - "FILE_OUTPUT", - "FILE_TEMPORARY", - "FILE_SAMPLER", - "FILE_ADDRESS", - "FILE_IMMEDIATE", - "FILE_LOOP", - "FILE_PREDICATE" -}; - -static const char *TGSI_INTERPOLATES[] = -{ - "INTERPOLATE_CONSTANT", - "INTERPOLATE_LINEAR", - "INTERPOLATE_PERSPECTIVE" -}; - -static const char *TGSI_SEMANTICS[] = -{ - "SEMANTIC_POSITION", - "SEMANTIC_COLOR", - "SEMANTIC_BCOLOR", - "SEMANTIC_FOG", - "SEMANTIC_PSIZE", - "SEMANTIC_GENERIC", - "SEMANTIC_NORMAL" -}; - -static const char *TGSI_IMMS[] = -{ - "IMM_FLOAT32" -}; - -static const char *TGSI_SATS[] = -{ - "SAT_NONE", - "SAT_ZERO_ONE", - "SAT_MINUS_PLUS_ONE" -}; - -static const char *TGSI_SWIZZLES[] = -{ - "SWIZZLE_X", - "SWIZZLE_Y", - "SWIZZLE_Z", - "SWIZZLE_W" -}; - -static const char *TGSI_TEXTURES[] = -{ - "TEXTURE_UNKNOWN", - "TEXTURE_1D", - "TEXTURE_2D", - "TEXTURE_3D", - "TEXTURE_CUBE", - "TEXTURE_RECT", - "TEXTURE_SHADOW1D", - "TEXTURE_SHADOW2D", - "TEXTURE_SHADOWRECT" -}; - -static const char *TGSI_WRITEMASKS[] = -{ - "0", - "WRITEMASK_X", - "WRITEMASK_Y", - "WRITEMASK_XY", - "WRITEMASK_Z", - "WRITEMASK_XZ", - "WRITEMASK_YZ", - "WRITEMASK_XYZ", - "WRITEMASK_W", - "WRITEMASK_XW", - "WRITEMASK_YW", - "WRITEMASK_XYW", - "WRITEMASK_ZW", - "WRITEMASK_XZW", - "WRITEMASK_YZW", - "WRITEMASK_XYZW" -}; - -static void -dump_declaration_verbose( - struct tgsi_full_declaration *decl, - unsigned ignored, - unsigned deflt, - struct tgsi_full_declaration *fd ) -{ - TXT( "\nFile : " ); - ENM( decl->Declaration.File, TGSI_FILES ); - if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { - TXT( "\nUsageMask : " ); - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { - CHR( 'X' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { - CHR( 'Y' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { - CHR( 'Z' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { - CHR( 'W' ); - } - } - if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { - TXT( "\nInterpolate: " ); - ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); - } - if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { - TXT( "\nSemantic : " ); - UID( decl->Declaration.Semantic ); - } - if (deflt || fd->Declaration.Centroid != decl->Declaration.Centroid) { - TXT("\nCentroid : "); - UID(decl->Declaration.Centroid); - } - if (deflt || fd->Declaration.Invariant != decl->Declaration.Invariant) { - TXT("\nInvariant : "); - UID(decl->Declaration.Invariant); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Declaration.Padding ); - } - - EOL(); - TXT( "\nFirst: " ); - UID( decl->Range.First ); - TXT( "\nLast : " ); - UID( decl->Range.Last ); - - if( decl->Declaration.Semantic ) { - EOL(); - TXT( "\nName : " ); - ENM( decl->Semantic.Name, TGSI_SEMANTICS ); - TXT( "\nIndex: " ); - UID( decl->Semantic.Index ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Semantic.Padding ); - } - } -} - -static void -dump_immediate_verbose( - struct tgsi_full_immediate *imm, - unsigned ignored ) -{ - unsigned i; - - TXT( "\nDataType : " ); - ENM( imm->Immediate.DataType, TGSI_IMMS ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( imm->Immediate.Padding ); - } - - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) { - EOL(); - switch( imm->Immediate.DataType ) { - case TGSI_IMM_FLOAT32: - TXT( "\nFloat: " ); - FLT( imm->u[i].Float ); - break; - - default: - assert( 0 ); - } - } -} - -static void -dump_instruction_verbose( - struct tgsi_full_instruction *inst, - unsigned ignored, - unsigned deflt, - struct tgsi_full_instruction *fi ) -{ - unsigned i; - - TXT( "\nOpcode : OPCODE_" ); - TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); - if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { - TXT( "\nSaturate : " ); - ENM( inst->Instruction.Saturate, TGSI_SATS ); - } - if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { - TXT( "\nNumDstRegs : " ); - UID( inst->Instruction.NumDstRegs ); - } - if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { - TXT( "\nNumSrcRegs : " ); - UID( inst->Instruction.NumSrcRegs ); - } - if (deflt || fi->Instruction.Predicate != inst->Instruction.Predicate) { - TXT("\nPredicate : "); - UID(inst->Instruction.Predicate); - } - if (deflt || fi->Instruction.Label != inst->Instruction.Label) { - TXT("\nLabel : "); - UID(inst->Instruction.Label); - } - if (deflt || fi->Instruction.Texture != inst->Instruction.Texture) { - TXT("\nTexture : "); - UID(inst->Instruction.Texture); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->Instruction.Padding ); - } - - if (deflt || inst->Instruction.Label) { - EOL(); - if (deflt || fi->Label.Label != inst->Label.Label) { - TXT( "\nLabel : " ); - UID(inst->Label.Label); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX(inst->Label.Padding); - } - } - - if (deflt || inst->Instruction.Texture) { - EOL(); - if (deflt || fi->Texture.Texture != inst->Texture.Texture) { - TXT( "\nTexture : " ); - ENM(inst->Texture.Texture, TGSI_TEXTURES); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX(inst->Texture.Padding); - } - } - - for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - struct tgsi_full_dst_register *dst = &inst->Dst[i]; - struct tgsi_full_dst_register *fd = &fi->Dst[i]; - - EOL(); - TXT( "\nFile : " ); - ENM( dst->Register.File, TGSI_FILES ); - if( deflt || fd->Register.WriteMask != dst->Register.WriteMask ) { - TXT( "\nWriteMask: " ); - ENM( dst->Register.WriteMask, TGSI_WRITEMASKS ); - } - if( ignored ) { - if( deflt || fd->Register.Indirect != dst->Register.Indirect ) { - TXT( "\nIndirect : " ); - UID( dst->Register.Indirect ); - } - if( deflt || fd->Register.Dimension != dst->Register.Dimension ) { - TXT( "\nDimension: " ); - UID( dst->Register.Dimension ); - } - } - if( deflt || fd->Register.Index != dst->Register.Index ) { - TXT( "\nIndex : " ); - SID( dst->Register.Index ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->Register.Padding ); - } - } - - for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - struct tgsi_full_src_register *src = &inst->Src[i]; - struct tgsi_full_src_register *fs = &fi->Src[i]; - - EOL(); - TXT( "\nFile : "); - ENM( src->Register.File, TGSI_FILES ); - if( deflt || fs->Register.SwizzleX != src->Register.SwizzleX ) { - TXT( "\nSwizzleX : " ); - ENM( src->Register.SwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fs->Register.SwizzleY != src->Register.SwizzleY ) { - TXT( "\nSwizzleY : " ); - ENM( src->Register.SwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fs->Register.SwizzleZ != src->Register.SwizzleZ ) { - TXT( "\nSwizzleZ : " ); - ENM( src->Register.SwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fs->Register.SwizzleW != src->Register.SwizzleW ) { - TXT( "\nSwizzleW : " ); - ENM( src->Register.SwizzleW, TGSI_SWIZZLES ); - } - if (deflt || fs->Register.Absolute != src->Register.Absolute) { - TXT("\nAbsolute : "); - UID(src->Register.Absolute); - } - if( deflt || fs->Register.Negate != src->Register.Negate ) { - TXT( "\nNegate : " ); - UID( src->Register.Negate ); - } - if( ignored ) { - if( deflt || fs->Register.Indirect != src->Register.Indirect ) { - TXT( "\nIndirect : " ); - UID( src->Register.Indirect ); - } - if( deflt || fs->Register.Dimension != src->Register.Dimension ) { - TXT( "\nDimension: " ); - UID( src->Register.Dimension ); - } - } - if( deflt || fs->Register.Index != src->Register.Index ) { - TXT( "\nIndex : " ); - SID( src->Register.Index ); - } - } -} - -void -tgsi_dump_c( - const struct tgsi_token *tokens, - uint flags ) -{ - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - uint ignored = flags & TGSI_DUMP_C_IGNORED; - uint deflt = flags & TGSI_DUMP_C_DEFAULT; - - tgsi_parse_init( &parse, tokens ); - - TXT( "tgsi-dump begin -----------------" ); - - TXT( "\nHeaderSize: " ); - UID( parse.FullHeader.Header.HeaderSize ); - TXT( "\nBodySize : " ); - UID( parse.FullHeader.Header.BodySize ); - TXT( "\nProcessor : " ); - ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); - EOL(); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - TXT( "\nType : " ); - ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); - if( ignored ) { - TXT( "\nSize : " ); - UID( parse.FullToken.Token.NrTokens ); - } - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - dump_declaration_verbose( - &parse.FullToken.FullDeclaration, - ignored, - deflt, - &fd ); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - dump_immediate_verbose( - &parse.FullToken.FullImmediate, - ignored ); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - dump_instruction_verbose( - &parse.FullToken.FullInstruction, - ignored, - deflt, - &fi ); - break; - - default: - assert( 0 ); - } - - EOL(); - } - - TXT( "\ntgsi-dump end -------------------\n" ); - - tgsi_parse_free( &parse ); -} diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/tgsi_dump_c.h deleted file mode 100644 index d91cd35b3b7..00000000000 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.h +++ /dev/null @@ -1,49 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_DUMP_C_H -#define TGSI_DUMP_C_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -#define TGSI_DUMP_C_IGNORED 1 -#define TGSI_DUMP_C_DEFAULT 2 - -void -tgsi_dump_c( - const struct tgsi_token *tokens, - uint flags ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_DUMP_C_H */ -- cgit v1.2.3 From c16a8e0dd552dc7a5f95169da983e6164fc1492e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 14 Feb 2010 15:20:34 +0000 Subject: util: Rename u_debug_dump.[ch] -> u_dump.[ch]. I have more plans for this than mere debugging -- it will be an helper to provide human readible representations of all gallium state for the python state tracker. --- src/gallium/auxiliary/Makefile | 2 +- src/gallium/auxiliary/SConscript | 2 +- src/gallium/auxiliary/util/u_debug_dump.c | 267 ---------------------------- src/gallium/auxiliary/util/u_debug_dump.h | 77 -------- src/gallium/auxiliary/util/u_dump.h | 80 +++++++++ src/gallium/auxiliary/util/u_dump_defines.c | 264 +++++++++++++++++++++++++++ 6 files changed, 346 insertions(+), 346 deletions(-) delete mode 100644 src/gallium/auxiliary/util/u_debug_dump.c delete mode 100644 src/gallium/auxiliary/util/u_debug_dump.h create mode 100644 src/gallium/auxiliary/util/u_dump.h create mode 100644 src/gallium/auxiliary/util/u_dump_defines.c (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 507ca6e6aaa..3f96da61aec 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -91,9 +91,9 @@ C_SOURCES = \ translate/translate.c \ translate/translate_cache.c \ util/u_debug.c \ - util/u_debug_dump.c \ util/u_debug_symbol.c \ util/u_debug_stack.c \ + util/u_dump_defines.c \ util/u_bitmask.c \ util/u_blit.c \ util/u_blitter.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 9709344b546..f226f46285f 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -131,10 +131,10 @@ source = [ 'util/u_cache.c', 'util/u_cpu_detect.c', 'util/u_debug.c', - 'util/u_debug_dump.c', 'util/u_debug_memory.c', 'util/u_debug_stack.c', 'util/u_debug_symbol.c', + 'util/u_dump_defines.c', 'util/u_dl.c', 'util/u_draw_quad.c', 'util/u_format.c', diff --git a/src/gallium/auxiliary/util/u_debug_dump.c b/src/gallium/auxiliary/util/u_debug_dump.c deleted file mode 100644 index 61624d05c0a..00000000000 --- a/src/gallium/auxiliary/util/u_debug_dump.c +++ /dev/null @@ -1,267 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "util/u_debug.h" -#include "util/u_debug_dump.h" - - -#define DEBUG_DUMP_INVALID_NAME "" - - -#if 0 -static const char * -debug_dump_strip_prefix(const char *name, - const char *prefix) -{ - const char *stripped; - assert(name); - assert(prefix); - stripped = name; - while(*prefix) { - if(*stripped != *prefix) - return name; - - ++stripped; - ++prefix; - } - return stripped; -} -#endif - -static const char * -debug_dump_enum_continuous(unsigned value, - unsigned num_names, - const char **names) -{ - if (value >= num_names) - return DEBUG_DUMP_INVALID_NAME; - return names[value]; -} - - -#define DEFINE_DEBUG_DUMP_CONTINUOUS(_name) \ - const char * \ - debug_dump_##_name(unsigned value, boolean shortened) \ - { \ - if(shortened) \ - return debug_dump_enum_continuous(value, Elements(debug_dump_##_name##_short_names), debug_dump_##_name##_short_names); \ - else \ - return debug_dump_enum_continuous(value, Elements(debug_dump_##_name##_names), debug_dump_##_name##_names); \ - } - - -static const char * -debug_dump_blend_factor_names[] = { - DEBUG_DUMP_INVALID_NAME, /* 0x0 */ - "PIPE_BLENDFACTOR_ONE", - "PIPE_BLENDFACTOR_SRC_COLOR", - "PIPE_BLENDFACTOR_SRC_ALPHA", - "PIPE_BLENDFACTOR_DST_ALPHA", - "PIPE_BLENDFACTOR_DST_COLOR", - "PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE", - "PIPE_BLENDFACTOR_CONST_COLOR", - "PIPE_BLENDFACTOR_CONST_ALPHA", - "PIPE_BLENDFACTOR_SRC1_COLOR", - "PIPE_BLENDFACTOR_SRC1_ALPHA", - DEBUG_DUMP_INVALID_NAME, /* 0x0b */ - DEBUG_DUMP_INVALID_NAME, /* 0x0c */ - DEBUG_DUMP_INVALID_NAME, /* 0x0d */ - DEBUG_DUMP_INVALID_NAME, /* 0x0e */ - DEBUG_DUMP_INVALID_NAME, /* 0x0f */ - DEBUG_DUMP_INVALID_NAME, /* 0x10 */ - "PIPE_BLENDFACTOR_ZERO", - "PIPE_BLENDFACTOR_INV_SRC_COLOR", - "PIPE_BLENDFACTOR_INV_SRC_ALPHA", - "PIPE_BLENDFACTOR_INV_DST_ALPHA", - "PIPE_BLENDFACTOR_INV_DST_COLOR", - DEBUG_DUMP_INVALID_NAME, /* 0x16 */ - "PIPE_BLENDFACTOR_INV_CONST_COLOR", - "PIPE_BLENDFACTOR_INV_CONST_ALPHA", - "PIPE_BLENDFACTOR_INV_SRC1_COLOR", - "PIPE_BLENDFACTOR_INV_SRC1_ALPHA" -}; - -static const char * -debug_dump_blend_factor_short_names[] = { - DEBUG_DUMP_INVALID_NAME, /* 0x0 */ - "one", - "src_color", - "src_alpha", - "dst_alpha", - "dst_color", - "src_alpha_saturate", - "const_color", - "const_alpha", - "src1_color", - "src1_alpha", - DEBUG_DUMP_INVALID_NAME, /* 0x0b */ - DEBUG_DUMP_INVALID_NAME, /* 0x0c */ - DEBUG_DUMP_INVALID_NAME, /* 0x0d */ - DEBUG_DUMP_INVALID_NAME, /* 0x0e */ - DEBUG_DUMP_INVALID_NAME, /* 0x0f */ - DEBUG_DUMP_INVALID_NAME, /* 0x10 */ - "zero", - "inv_src_color", - "inv_src_alpha", - "inv_dst_alpha", - "inv_dst_color", - DEBUG_DUMP_INVALID_NAME, /* 0x16 */ - "inv_const_color", - "inv_const_alpha", - "inv_src1_color", - "inv_src1_alpha" -}; - -DEFINE_DEBUG_DUMP_CONTINUOUS(blend_factor) - - -static const char * -debug_dump_blend_func_names[] = { - "PIPE_BLEND_ADD", - "PIPE_BLEND_SUBTRACT", - "PIPE_BLEND_REVERSE_SUBTRACT", - "PIPE_BLEND_MIN", - "PIPE_BLEND_MAX" -}; - -static const char * -debug_dump_blend_func_short_names[] = { - "add", - "sub", - "rev_sub", - "min", - "max" -}; - -DEFINE_DEBUG_DUMP_CONTINUOUS(blend_func) - - -static const char * -debug_dump_func_names[] = { - "PIPE_FUNC_NEVER", - "PIPE_FUNC_LESS", - "PIPE_FUNC_EQUAL", - "PIPE_FUNC_LEQUAL", - "PIPE_FUNC_GREATER", - "PIPE_FUNC_NOTEQUAL", - "PIPE_FUNC_GEQUAL", - "PIPE_FUNC_ALWAYS" -}; - -static const char * -debug_dump_func_short_names[] = { - "never", - "less", - "equal", - "less_equal", - "greater", - "not_equal", - "greater_equal", - "always" -}; - -DEFINE_DEBUG_DUMP_CONTINUOUS(func) - - -static const char * -debug_dump_tex_target_names[] = { - "PIPE_TEXTURE_1D", - "PIPE_TEXTURE_2D", - "PIPE_TEXTURE_3D", - "PIPE_TEXTURE_CUBE" -}; - -static const char * -debug_dump_tex_target_short_names[] = { - "1d", - "2d", - "3d", - "cube" -}; - -DEFINE_DEBUG_DUMP_CONTINUOUS(tex_target) - - -static const char * -debug_dump_tex_wrap_names[] = { - "PIPE_TEX_WRAP_REPEAT", - "PIPE_TEX_WRAP_CLAMP", - "PIPE_TEX_WRAP_CLAMP_TO_EDGE", - "PIPE_TEX_WRAP_CLAMP_TO_BORDER", - "PIPE_TEX_WRAP_MIRROR_REPEAT", - "PIPE_TEX_WRAP_MIRROR_CLAMP", - "PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE", - "PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER" -}; - -static const char * -debug_dump_tex_wrap_short_names[] = { - "repeat", - "clamp", - "clamp_to_edge", - "clamp_to_border", - "mirror_repeat", - "mirror_clamp", - "mirror_clamp_to_edge", - "mirror_clamp_to_border" -}; - -DEFINE_DEBUG_DUMP_CONTINUOUS(tex_wrap) - - -static const char * -debug_dump_tex_mipfilter_names[] = { - "PIPE_TEX_MIPFILTER_NEAREST", - "PIPE_TEX_MIPFILTER_LINEAR", - "PIPE_TEX_MIPFILTER_NONE" -}; - -static const char * -debug_dump_tex_mipfilter_short_names[] = { - "nearest", - "linear", - "none" -}; - -DEFINE_DEBUG_DUMP_CONTINUOUS(tex_mipfilter) - - -static const char * -debug_dump_tex_filter_names[] = { - "PIPE_TEX_FILTER_NEAREST", - "PIPE_TEX_FILTER_LINEAR" -}; - -static const char * -debug_dump_tex_filter_short_names[] = { - "nearest", - "linear" -}; - -DEFINE_DEBUG_DUMP_CONTINUOUS(tex_filter) diff --git a/src/gallium/auxiliary/util/u_debug_dump.h b/src/gallium/auxiliary/util/u_debug_dump.h deleted file mode 100644 index 19b130ad183..00000000000 --- a/src/gallium/auxiliary/util/u_debug_dump.h +++ /dev/null @@ -1,77 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Dump data in human/machine readable format. - * - * @author Jose Fonseca - */ - -#ifndef U_DEBUG_DUMP_H_ -#define U_DEBUG_DUMP_H_ - - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -const char * -debug_dump_blend_factor(unsigned value, boolean shortened); - -const char * -debug_dump_blend_func(unsigned value, boolean shortened); - -const char * -debug_dump_func(unsigned value, boolean shortened); - -const char * -debug_dump_tex_target(unsigned value, boolean shortened); - -const char * -debug_dump_tex_wrap(unsigned value, boolean shortened); - -const char * -debug_dump_tex_mipfilter(unsigned value, boolean shortened); - -const char * -debug_dump_tex_filter(unsigned value, boolean shortened); - - -/* FIXME: Move the other debug_dump_xxx functions out of u_debug.h into here. */ - - -#ifdef __cplusplus -} -#endif - -#endif /* U_DEBUG_H_ */ diff --git a/src/gallium/auxiliary/util/u_dump.h b/src/gallium/auxiliary/util/u_dump.h new file mode 100644 index 00000000000..ddccc5490c2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_dump.h @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Dump data in human/machine readable format. + * + * @author Jose Fonseca + */ + +#ifndef U_DEBUG_DUMP_H_ +#define U_DEBUG_DUMP_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define UTIL_DUMP_INVALID_NAME "" + + +const char * +util_dump_blend_factor(unsigned value, boolean shortened); + +const char * +util_dump_blend_func(unsigned value, boolean shortened); + +const char * +util_dump_func(unsigned value, boolean shortened); + +const char * +util_dump_tex_target(unsigned value, boolean shortened); + +const char * +util_dump_tex_wrap(unsigned value, boolean shortened); + +const char * +util_dump_tex_mipfilter(unsigned value, boolean shortened); + +const char * +util_dump_tex_filter(unsigned value, boolean shortened); + + +/* FIXME: Move the other debug_dump_xxx functions out of u_debug.h into here. */ + + +#ifdef __cplusplus +} +#endif + +#endif /* U_DEBUG_H_ */ diff --git a/src/gallium/auxiliary/util/u_dump_defines.c b/src/gallium/auxiliary/util/u_dump_defines.c new file mode 100644 index 00000000000..96a22563473 --- /dev/null +++ b/src/gallium/auxiliary/util/u_dump_defines.c @@ -0,0 +1,264 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "util/u_debug.h" +#include "util/u_dump.h" + + +#if 0 +static const char * +util_dump_strip_prefix(const char *name, + const char *prefix) +{ + const char *stripped; + assert(name); + assert(prefix); + stripped = name; + while(*prefix) { + if(*stripped != *prefix) + return name; + + ++stripped; + ++prefix; + } + return stripped; +} +#endif + +static const char * +util_dump_enum_continuous(unsigned value, + unsigned num_names, + const char **names) +{ + if (value >= num_names) + return UTIL_DUMP_INVALID_NAME; + return names[value]; +} + + +#define DEFINE_UTIL_DUMP_CONTINUOUS(_name) \ + const char * \ + util_dump_##_name(unsigned value, boolean shortened) \ + { \ + if(shortened) \ + return util_dump_enum_continuous(value, Elements(util_dump_##_name##_short_names), util_dump_##_name##_short_names); \ + else \ + return util_dump_enum_continuous(value, Elements(util_dump_##_name##_names), util_dump_##_name##_names); \ + } + + +static const char * +util_dump_blend_factor_names[] = { + UTIL_DUMP_INVALID_NAME, /* 0x0 */ + "PIPE_BLENDFACTOR_ONE", + "PIPE_BLENDFACTOR_SRC_COLOR", + "PIPE_BLENDFACTOR_SRC_ALPHA", + "PIPE_BLENDFACTOR_DST_ALPHA", + "PIPE_BLENDFACTOR_DST_COLOR", + "PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE", + "PIPE_BLENDFACTOR_CONST_COLOR", + "PIPE_BLENDFACTOR_CONST_ALPHA", + "PIPE_BLENDFACTOR_SRC1_COLOR", + "PIPE_BLENDFACTOR_SRC1_ALPHA", + UTIL_DUMP_INVALID_NAME, /* 0x0b */ + UTIL_DUMP_INVALID_NAME, /* 0x0c */ + UTIL_DUMP_INVALID_NAME, /* 0x0d */ + UTIL_DUMP_INVALID_NAME, /* 0x0e */ + UTIL_DUMP_INVALID_NAME, /* 0x0f */ + UTIL_DUMP_INVALID_NAME, /* 0x10 */ + "PIPE_BLENDFACTOR_ZERO", + "PIPE_BLENDFACTOR_INV_SRC_COLOR", + "PIPE_BLENDFACTOR_INV_SRC_ALPHA", + "PIPE_BLENDFACTOR_INV_DST_ALPHA", + "PIPE_BLENDFACTOR_INV_DST_COLOR", + UTIL_DUMP_INVALID_NAME, /* 0x16 */ + "PIPE_BLENDFACTOR_INV_CONST_COLOR", + "PIPE_BLENDFACTOR_INV_CONST_ALPHA", + "PIPE_BLENDFACTOR_INV_SRC1_COLOR", + "PIPE_BLENDFACTOR_INV_SRC1_ALPHA" +}; + +static const char * +util_dump_blend_factor_short_names[] = { + UTIL_DUMP_INVALID_NAME, /* 0x0 */ + "one", + "src_color", + "src_alpha", + "dst_alpha", + "dst_color", + "src_alpha_saturate", + "const_color", + "const_alpha", + "src1_color", + "src1_alpha", + UTIL_DUMP_INVALID_NAME, /* 0x0b */ + UTIL_DUMP_INVALID_NAME, /* 0x0c */ + UTIL_DUMP_INVALID_NAME, /* 0x0d */ + UTIL_DUMP_INVALID_NAME, /* 0x0e */ + UTIL_DUMP_INVALID_NAME, /* 0x0f */ + UTIL_DUMP_INVALID_NAME, /* 0x10 */ + "zero", + "inv_src_color", + "inv_src_alpha", + "inv_dst_alpha", + "inv_dst_color", + UTIL_DUMP_INVALID_NAME, /* 0x16 */ + "inv_const_color", + "inv_const_alpha", + "inv_src1_color", + "inv_src1_alpha" +}; + +DEFINE_UTIL_DUMP_CONTINUOUS(blend_factor) + + +static const char * +util_dump_blend_func_names[] = { + "PIPE_BLEND_ADD", + "PIPE_BLEND_SUBTRACT", + "PIPE_BLEND_REVERSE_SUBTRACT", + "PIPE_BLEND_MIN", + "PIPE_BLEND_MAX" +}; + +static const char * +util_dump_blend_func_short_names[] = { + "add", + "sub", + "rev_sub", + "min", + "max" +}; + +DEFINE_UTIL_DUMP_CONTINUOUS(blend_func) + + +static const char * +util_dump_func_names[] = { + "PIPE_FUNC_NEVER", + "PIPE_FUNC_LESS", + "PIPE_FUNC_EQUAL", + "PIPE_FUNC_LEQUAL", + "PIPE_FUNC_GREATER", + "PIPE_FUNC_NOTEQUAL", + "PIPE_FUNC_GEQUAL", + "PIPE_FUNC_ALWAYS" +}; + +static const char * +util_dump_func_short_names[] = { + "never", + "less", + "equal", + "less_equal", + "greater", + "not_equal", + "greater_equal", + "always" +}; + +DEFINE_UTIL_DUMP_CONTINUOUS(func) + + +static const char * +util_dump_tex_target_names[] = { + "PIPE_TEXTURE_1D", + "PIPE_TEXTURE_2D", + "PIPE_TEXTURE_3D", + "PIPE_TEXTURE_CUBE" +}; + +static const char * +util_dump_tex_target_short_names[] = { + "1d", + "2d", + "3d", + "cube" +}; + +DEFINE_UTIL_DUMP_CONTINUOUS(tex_target) + + +static const char * +util_dump_tex_wrap_names[] = { + "PIPE_TEX_WRAP_REPEAT", + "PIPE_TEX_WRAP_CLAMP", + "PIPE_TEX_WRAP_CLAMP_TO_EDGE", + "PIPE_TEX_WRAP_CLAMP_TO_BORDER", + "PIPE_TEX_WRAP_MIRROR_REPEAT", + "PIPE_TEX_WRAP_MIRROR_CLAMP", + "PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE", + "PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER" +}; + +static const char * +util_dump_tex_wrap_short_names[] = { + "repeat", + "clamp", + "clamp_to_edge", + "clamp_to_border", + "mirror_repeat", + "mirror_clamp", + "mirror_clamp_to_edge", + "mirror_clamp_to_border" +}; + +DEFINE_UTIL_DUMP_CONTINUOUS(tex_wrap) + + +static const char * +util_dump_tex_mipfilter_names[] = { + "PIPE_TEX_MIPFILTER_NEAREST", + "PIPE_TEX_MIPFILTER_LINEAR", + "PIPE_TEX_MIPFILTER_NONE" +}; + +static const char * +util_dump_tex_mipfilter_short_names[] = { + "nearest", + "linear", + "none" +}; + +DEFINE_UTIL_DUMP_CONTINUOUS(tex_mipfilter) + + +static const char * +util_dump_tex_filter_names[] = { + "PIPE_TEX_FILTER_NEAREST", + "PIPE_TEX_FILTER_LINEAR" +}; + +static const char * +util_dump_tex_filter_short_names[] = { + "nearest", + "linear" +}; + +DEFINE_UTIL_DUMP_CONTINUOUS(tex_filter) -- cgit v1.2.3 From 2b4575f16d24a212b9a43cbd4a9966b3668e4b32 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 14 Feb 2010 16:55:04 +0000 Subject: os: Make streams abstract. Also replace windows kernel stream with null implementation. It was severely limited and no easy means to test it now. --- src/gallium/auxiliary/Makefile | 2 +- src/gallium/auxiliary/SConscript | 2 +- src/gallium/auxiliary/os/os_stream.h | 55 ++++++-- src/gallium/auxiliary/os/os_stream_null.c | 72 ++++++++++ src/gallium/auxiliary/os/os_stream_stdc.c | 71 ++++++---- src/gallium/auxiliary/os/os_stream_wd.c | 222 ------------------------------ 6 files changed, 160 insertions(+), 264 deletions(-) create mode 100644 src/gallium/auxiliary/os/os_stream_null.c delete mode 100644 src/gallium/auxiliary/os/os_stream_wd.c (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 3f96da61aec..aea21233674 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -50,7 +50,7 @@ C_SOURCES = \ indices/u_unfilled_gen.c \ os/os_misc.c \ os/os_stream_stdc.c \ - os/os_stream_wd.c \ + os/os_stream_null.c \ os/os_time.c \ pipebuffer/pb_buffer_malloc.c \ pipebuffer/pb_bufmgr_alt.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index f226f46285f..c8a3ecf4f92 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -84,7 +84,7 @@ source = [ 'indices/u_unfilled_gen.c', 'os/os_misc.c', 'os/os_stream_stdc.c', - 'os/os_stream_wd.c', + 'os/os_stream_null.c', 'os/os_time.c', 'pipebuffer/pb_buffer_fenced.c', 'pipebuffer/pb_buffer_malloc.c', diff --git a/src/gallium/auxiliary/os/os_stream.h b/src/gallium/auxiliary/os/os_stream.h index bf30e6542d3..8232b0f1586 100644 --- a/src/gallium/auxiliary/os/os_stream.h +++ b/src/gallium/auxiliary/os/os_stream.h @@ -37,25 +37,54 @@ #include "pipe/p_compiler.h" -struct os_stream; - - /** - * Create a stream - * @param filename relative or absolute path (necessary for windows) - * @param optional maximum file size (0 for a growable size). + * OS stream (FILE, socket, etc) abstraction. */ +struct os_stream +{ + void + (*close)(struct os_stream *stream); + + boolean + (*write)(struct os_stream *stream, const void *data, size_t size); + + void + (*flush)(struct os_stream *stream); +}; + + +static INLINE void +os_stream_close(struct os_stream *stream) +{ + stream->close(stream); +} + + +static INLINE boolean +os_stream_write(struct os_stream *stream, const void *data, size_t size) +{ + return stream->write(stream, data, size); +} + + +static INLINE void +os_stream_flush(struct os_stream *stream) +{ + stream->flush(stream); +} + + struct os_stream * -os_stream_create(const char *filename, size_t max_size); +os_file_stream_create(const char *filename); + -boolean -os_stream_write(struct os_stream *stream, const void *data, size_t size); +struct os_stream * +os_null_stream_create(void); -void -os_stream_flush(struct os_stream *stream); -void -os_stream_close(struct os_stream *stream); +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +#define os_file_stream_create(_filename) os_null_stream_create() +#endif #endif /* _OS_STREAM_H_ */ diff --git a/src/gallium/auxiliary/os/os_stream_null.c b/src/gallium/auxiliary/os/os_stream_null.c new file mode 100644 index 00000000000..b55ce9c926e --- /dev/null +++ b/src/gallium/auxiliary/os/os_stream_null.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation for the Windows Display driver. + */ + +#include "os_memory.h" +#include "os_stream.h" + + +static void +os_null_stream_close(struct os_stream *stream) +{ + (void)stream; +} + + +static boolean +os_null_stream_write(struct os_stream *stream, const void *data, size_t size) +{ + (void)data; + (void)size; + return TRUE; +} + + +static void +os_null_stream_flush(struct os_stream *stream) +{ + (void)stream; +} + + +static struct os_stream +os_null_stream = { + &os_null_stream_close, + &os_null_stream_write, + &os_null_stream_flush +}; + + +struct os_stream * +os_null_stream_create() +{ + return &os_null_stream; +} diff --git a/src/gallium/auxiliary/os/os_stream_stdc.c b/src/gallium/auxiliary/os/os_stream_stdc.c index caa60c0b50f..9a62799c0fa 100644 --- a/src/gallium/auxiliary/os/os_stream_stdc.c +++ b/src/gallium/auxiliary/os/os_stream_stdc.c @@ -40,39 +40,40 @@ #include "os_stream.h" -struct os_stream +struct os_stdc_stream { + struct os_stream base; + FILE *file; }; -struct os_stream * -os_stream_create(const char *filename, size_t max_size) +static INLINE struct os_stdc_stream * +os_stdc_stream(struct os_stream *stream) { - struct os_stream *stream; - - (void)max_size; - - stream = (struct os_stream *)calloc(1, sizeof(struct os_stream)); + return (struct os_stdc_stream *)stream; +} + + +static void +os_stdc_stream_close(struct os_stream *_stream) +{ + struct os_stdc_stream *stream = os_stdc_stream(_stream); + if(!stream) - goto no_stream; - - stream->file = fopen(filename, "w"); - if(!stream->file) - goto no_file; - - return stream; + return; -no_file: + fclose(stream->file); + free(stream); -no_stream: - return NULL; } -boolean -os_stream_write(struct os_stream *stream, const void *data, size_t size) +static boolean +os_stdc_stream_write(struct os_stream *_stream, const void *data, size_t size) { + struct os_stdc_stream *stream = os_stdc_stream(_stream); + if(!stream) return FALSE; @@ -80,9 +81,11 @@ os_stream_write(struct os_stream *stream, const void *data, size_t size) } -void -os_stream_flush(struct os_stream *stream) +static void +os_stdc_stream_flush(struct os_stream *_stream) { + struct os_stdc_stream *stream = os_stdc_stream(_stream); + if(!stream) return; @@ -90,15 +93,29 @@ os_stream_flush(struct os_stream *stream) } -void -os_stream_close(struct os_stream *stream) +struct os_stream * +os_file_stream_create(const char *filename) { + struct os_stdc_stream *stream; + + stream = (struct os_stdc_stream *)calloc(1, sizeof(struct os_stream)); if(!stream) - return; - - fclose(stream->file); + goto no_stream; + stream->base.close = &os_stdc_stream_close; + stream->base.write = &os_stdc_stream_write; + stream->base.flush = &os_stdc_stream_flush; + + stream->file = fopen(filename, "w"); + if(!stream->file) + goto no_file; + + return &stream->base; + +no_file: free(stream); +no_stream: + return NULL; } diff --git a/src/gallium/auxiliary/os/os_stream_wd.c b/src/gallium/auxiliary/os/os_stream_wd.c deleted file mode 100644 index a64cbcab4cf..00000000000 --- a/src/gallium/auxiliary/os/os_stream_wd.c +++ /dev/null @@ -1,222 +0,0 @@ -/************************************************************************** - * - * Copyright 2008-2010 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Stream implementation for the Windows Display driver. - */ - -#include "pipe/p_config.h" - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -#include -#include - -#include "os_memory.h" -#include "os_stream.h" - - -#define MAP_FILE_SIZE (4*1024*1024) - - -struct os_stream -{ - char filename[MAX_PATH + 1]; - WCHAR wFileName[MAX_PATH + 1]; - boolean growable; - size_t map_size; - ULONG_PTR iFile; - char *pMap; - size_t written; - unsigned suffix; -}; - - -static INLINE boolean -os_stream_map(struct os_stream *stream) -{ - ULONG BytesInUnicodeString; - static char filename[MAX_PATH + 1]; - unsigned filename_len; - - if(stream->growable) - filename_len = snprintf(filename, - sizeof(filename), - "%s.%04x", - stream->filename, - stream->suffix++); - else - filename_len = snprintf(filename, - sizeof(filename), - "%s", - stream->filename); - - EngMultiByteToUnicodeN( - stream->wFileName, - sizeof(stream->wFileName), - &BytesInUnicodeString, - filename, - filename_len); - - stream->pMap = EngMapFile(stream->wFileName, stream->map_size, &stream->iFile); - if(!stream->pMap) - return FALSE; - - memset(stream->pMap, 0, stream->map_size); - stream->written = 0; - - return TRUE; -} - - -static INLINE void -os_stream_unmap(struct os_stream *stream) -{ - EngUnmapFile(stream->iFile); - if(stream->written < stream->map_size) { - /* Truncate file size */ - stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); - if(stream->pMap) - EngUnmapFile(stream->iFile); - } - - stream->pMap = NULL; -} - - -static INLINE void -os_stream_full_qualified_filename(char *dst, size_t size, const char *src) -{ - boolean need_drive, need_root; - - if((('A' <= src[0] && src[0] <= 'Z') || ('a' <= src[0] && src[0] <= 'z')) && src[1] == ':') { - need_drive = FALSE; - need_root = src[2] == '\\' ? FALSE : TRUE; - } - else { - need_drive = TRUE; - need_root = src[0] == '\\' ? FALSE : TRUE; - } - - snprintf(dst, size, - "\\??\\%s%s%s", - need_drive ? "C:" : "", - need_root ? "\\" : "", - src); -} - - -struct os_stream * -os_stream_create(const char *filename, size_t max_size) -{ - struct os_stream *stream; - - stream = CALLOC_STRUCT(os_stream); - if(!stream) - goto error1; - - os_stream_full_qualified_filename(stream->filename, - sizeof(stream->filename), - filename); - - if(max_size) { - stream->growable = FALSE; - stream->map_size = max_size; - } - else { - stream->growable = TRUE; - stream->map_size = MAP_FILE_SIZE; - } - - if(!os_stream_map(stream)) - goto error2; - - return stream; - -error2: - FREE(stream); -error1: - return NULL; -} - - -static INLINE void -os_stream_copy(struct os_stream *stream, const char *data, size_t size) -{ - assert(stream->written + size <= stream->map_size); - memcpy(stream->pMap + stream->written, data, size); - stream->written += size; -} - - -boolean -os_stream_write(struct os_stream *stream, const void *data, size_t size) -{ - if(!stream) - return FALSE; - - if(!stream->pMap) - return FALSE; - - while(stream->written + size > stream->map_size) { - size_t step = stream->map_size - stream->written; - os_stream_copy(stream, data, step); - data = (const char *)data + step; - size -= step; - - os_stream_unmap(stream); - if(!stream->growable || !os_stream_map(stream)) - return FALSE; - } - - os_stream_copy(stream, data, size); - - return TRUE; -} - - -void -os_stream_flush(struct os_stream *stream) -{ - (void)stream; -} - - -void -os_stream_close(struct os_stream *stream) -{ - if(!stream) - return; - - os_stream_unmap(stream); - - FREE(stream); -} - - -#endif -- cgit v1.2.3 From 64606231b8101316e5ec51a0e71294c0a96b005f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 14 Feb 2010 23:07:21 +0000 Subject: os: Add a growable string stream. --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/SConscript | 1 + src/gallium/auxiliary/os/os_stream.h | 11 ++ src/gallium/auxiliary/os/os_stream_str.c | 166 +++++++++++++++++++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 src/gallium/auxiliary/os/os_stream_str.c (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index aea21233674..34f5df7764d 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -50,6 +50,7 @@ C_SOURCES = \ indices/u_unfilled_gen.c \ os/os_misc.c \ os/os_stream_stdc.c \ + os/os_stream_str.c \ os/os_stream_null.c \ os/os_time.c \ pipebuffer/pb_buffer_malloc.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index c8a3ecf4f92..680c4a1599c 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -84,6 +84,7 @@ source = [ 'indices/u_unfilled_gen.c', 'os/os_misc.c', 'os/os_stream_stdc.c', + 'os/os_stream_str.c', 'os/os_stream_null.c', 'os/os_time.c', 'pipebuffer/pb_buffer_fenced.c', diff --git a/src/gallium/auxiliary/os/os_stream.h b/src/gallium/auxiliary/os/os_stream.h index 2ce5b1885ea..3423b84d691 100644 --- a/src/gallium/auxiliary/os/os_stream.h +++ b/src/gallium/auxiliary/os/os_stream.h @@ -99,6 +99,17 @@ struct os_stream * os_null_stream_create(void); +struct os_stream * +os_str_stream_create(size_t initial_size); + + +const char * +os_str_stream_get(struct os_stream *stream); + +char * +os_str_stream_get_and_close(struct os_stream *stream); + + #if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #define os_file_stream_create(_filename) os_null_stream_create() #endif diff --git a/src/gallium/auxiliary/os/os_stream_str.c b/src/gallium/auxiliary/os/os_stream_str.c new file mode 100644 index 00000000000..a2884798d63 --- /dev/null +++ b/src/gallium/auxiliary/os/os_stream_str.c @@ -0,0 +1,166 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation based on the Standard C Library. + */ + +#include "pipe/p_config.h" + +#include "os_memory.h" +#include "os_stream.h" + + +struct os_str_stream +{ + struct os_stream base; + + char *str; + + size_t size; + size_t written; +}; + + +static INLINE struct os_str_stream * +os_str_stream(struct os_stream *stream) +{ + return (struct os_str_stream *)stream; +} + + +static void +os_str_stream_close(struct os_stream *_stream) +{ + struct os_str_stream *stream = os_str_stream(_stream); + + os_free(stream->str); + + os_free(stream); +} + + +static boolean +os_str_stream_write(struct os_stream *_stream, const void *data, size_t size) +{ + struct os_str_stream *stream = os_str_stream(_stream); + size_t minimum_size; + boolean ret = TRUE; + + minimum_size = stream->written + size + 1; + if (stream->size < minimum_size) { + size_t new_size = stream->size; + char * new_str; + + do { + new_size *= 2; + } while (new_size < minimum_size); + + new_str = os_realloc(stream->str, stream->size, new_size); + if (new_str) { + stream->str = new_str; + stream->size = new_size; + } + else { + size = stream->size - stream->written - 1; + ret = FALSE; + } + } + + memcpy(stream->str + stream->written, data, size); + stream->written += size; + + return ret; +} + + +static void +os_str_stream_flush(struct os_stream *stream) +{ + (void)stream; +} + + +struct os_stream * +os_str_stream_create(size_t size) +{ + struct os_str_stream *stream; + + stream = (struct os_str_stream *)os_calloc(1, sizeof(*stream)); + if(!stream) + goto no_stream; + + stream->base.close = &os_str_stream_close; + stream->base.write = &os_str_stream_write; + stream->base.flush = &os_str_stream_flush; + + stream->str = os_malloc(size); + if(!stream->str) + goto no_str; + + stream->size = size; + + return &stream->base; + +no_str: + os_free(stream); +no_stream: + return NULL; +} + + +const char * +os_str_stream_get(struct os_stream *_stream) +{ + struct os_str_stream *stream = os_str_stream(_stream); + + if (!stream) + return NULL; + + stream->str[stream->written] = 0; + return stream->str; +} + + +char * +os_str_stream_get_and_close(struct os_stream *_stream) +{ + struct os_str_stream *stream = os_str_stream(_stream); + char *str; + + if (!stream) + return NULL; + + str = stream->str; + + str[stream->written] = 0; + + os_free(stream); + + return str; +} -- cgit v1.2.3 From 165b824343e77775b416f8dec16f744ffaf39aea Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 14 Feb 2010 23:20:42 +0000 Subject: util: Helper functions to dump all state objects. --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/SConscript | 1 + src/gallium/auxiliary/util/u_dump.h | 88 ++++ src/gallium/auxiliary/util/u_dump_state.c | 699 ++++++++++++++++++++++++++++++ 4 files changed, 789 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_dump_state.c (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 34f5df7764d..7e4335f29f4 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -95,6 +95,7 @@ C_SOURCES = \ util/u_debug_symbol.c \ util/u_debug_stack.c \ util/u_dump_defines.c \ + util/u_dump_state.c \ util/u_bitmask.c \ util/u_blit.c \ util/u_blitter.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 680c4a1599c..e13ac9187cb 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -136,6 +136,7 @@ source = [ 'util/u_debug_stack.c', 'util/u_debug_symbol.c', 'util/u_dump_defines.c', + 'util/u_dump_state.c', 'util/u_dl.c', 'util/u_draw_quad.c', 'util/u_format.c', diff --git a/src/gallium/auxiliary/util/u_dump.h b/src/gallium/auxiliary/util/u_dump.h index ddccc5490c2..cad0b355585 100644 --- a/src/gallium/auxiliary/util/u_dump.h +++ b/src/gallium/auxiliary/util/u_dump.h @@ -48,6 +48,17 @@ extern "C" { #define UTIL_DUMP_INVALID_NAME "" +struct os_stream; + + +/* + * p_defines.h + * + * XXX: These functions don't really dump anything -- just translate into + * strings so a verb better than "dump" should be used instead, in order to + * free up the namespace to the true dumper functions. + */ + const char * util_dump_blend_factor(unsigned value, boolean shortened); @@ -70,6 +81,83 @@ const char * util_dump_tex_filter(unsigned value, boolean shortened); +/* + * p_state.h, through an os_stream + */ + +void +util_dump_template(struct os_stream *stream, + const struct pipe_texture *templat); + +void +util_dump_rasterizer_state(struct os_stream *stream, + const struct pipe_rasterizer_state *state); + +void +util_dump_poly_stipple(struct os_stream *stream, + const struct pipe_poly_stipple *state); + +void +util_dump_viewport_state(struct os_stream *stream, + const struct pipe_viewport_state *state); + +void +util_dump_scissor_state(struct os_stream *stream, + const struct pipe_scissor_state *state); + +void +util_dump_clip_state(struct os_stream *stream, + const struct pipe_clip_state *state); + +void +util_dump_shader_state(struct os_stream *stream, + const struct pipe_shader_state *state); + +void +util_dump_depth_stencil_alpha_state(struct os_stream *stream, + const struct pipe_depth_stencil_alpha_state *state); + +void +util_dump_rt_blend_state(struct os_stream *stream, + const struct pipe_rt_blend_state *state); + +void +util_dump_blend_state(struct os_stream *stream, + const struct pipe_blend_state *state); + +void +util_dump_blend_color(struct os_stream *stream, + const struct pipe_blend_color *state); + +void +util_dump_stencil_ref(struct os_stream *stream, + const struct pipe_stencil_ref *state); + +void +util_dump_framebuffer_state(struct os_stream *stream, + const struct pipe_framebuffer_state *state); + +void +util_dump_sampler_state(struct os_stream *stream, + const struct pipe_sampler_state *state); + +void +util_dump_surface(struct os_stream *stream, + const struct pipe_surface *state); + +void +util_dump_transfer(struct os_stream *stream, + const struct pipe_transfer *state); + +void +util_dump_vertex_buffer(struct os_stream *stream, + const struct pipe_vertex_buffer *state); + +void +util_dump_vertex_element(struct os_stream *stream, + const struct pipe_vertex_element *state); + + /* FIXME: Move the other debug_dump_xxx functions out of u_debug.h into here. */ diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c new file mode 100644 index 00000000000..5ba91369982 --- /dev/null +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -0,0 +1,699 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "os/os_stream.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "util/u_format.h" +#include "tgsi/tgsi_dump.h" + +#include "u_dump.h" + + +/* + * Dump primitives + */ + +static INLINE void +util_stream_writef(struct os_stream *stream, const char *format, ...) +{ + static char buf[1024]; + unsigned len; + va_list ap; + va_start(ap, format); + len = util_vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + os_stream_write(stream, buf, len); +} + +static void +util_dump_bool(struct os_stream *stream, int value) +{ + util_stream_writef(stream, "%c", value ? '1' : '0'); +} + +static void +util_dump_int(struct os_stream *stream, long long int value) +{ + util_stream_writef(stream, "%lli", value); +} + +static void +util_dump_uint(struct os_stream *stream, long long unsigned value) +{ + util_stream_writef(stream, "%llu", value); +} + +static void +util_dump_float(struct os_stream *stream, double value) +{ + util_stream_writef(stream, "%g", value); +} + +static void +util_dump_string(struct os_stream *stream, const char *str) +{ + os_stream_write_str(stream, "\""); + os_stream_write_str(stream, str); + os_stream_write_str(stream, "\""); +} + +static void +util_dump_enum(struct os_stream *stream, const char *value) +{ + os_stream_write_str(stream, value); +} + +static void +util_dump_array_begin(struct os_stream *stream) +{ + os_stream_write_str(stream, "{"); +} + +static void +util_dump_array_end(struct os_stream *stream) +{ + os_stream_write_str(stream, "}"); +} + +static void +util_dump_elem_begin(struct os_stream *stream) +{ +} + +static void +util_dump_elem_end(struct os_stream *stream) +{ + os_stream_write_str(stream, ", "); +} + +static void +util_dump_struct_begin(struct os_stream *stream, const char *name) +{ + os_stream_write_str(stream, "{"); +} + +static void +util_dump_struct_end(struct os_stream *stream) +{ + os_stream_write_str(stream, "}"); +} + +static void +util_dump_member_begin(struct os_stream *stream, const char *name) +{ + util_stream_writef(stream, "%s = ", name); +} + +static void +util_dump_member_end(struct os_stream *stream) +{ + os_stream_write_str(stream, ", "); +} + +static void +util_dump_null(struct os_stream *stream) +{ + os_stream_write_str(stream, "NULL"); +} + +static void +util_dump_ptr(struct os_stream *stream, const void *value) +{ + if(value) + util_stream_writef(stream, "0x%08lx", (unsigned long)(uintptr_t)value); + else + util_dump_null(stream); +} + + +/* + * Code saving macros. + */ + +#define util_dump_arg(_stream, _type, _arg) \ + do { \ + util_dump_arg_begin(_stream, #_arg); \ + util_dump_##_type(_stream, _arg); \ + util_dump_arg_end(_stream); \ + } while(0) + +#define util_dump_ret(_stream, _type, _arg) \ + do { \ + util_dump_ret_begin(_stream); \ + util_dump_##_type(_stream, _arg); \ + util_dump_ret_end(_stream); \ + } while(0) + +#define util_dump_array(_stream, _type, _obj, _size) \ + do { \ + size_t idx; \ + util_dump_array_begin(_stream); \ + for(idx = 0; idx < (_size); ++idx) { \ + util_dump_elem_begin(_stream); \ + util_dump_##_type(_stream, (_obj)[idx]); \ + util_dump_elem_end(_stream); \ + } \ + util_dump_array_end(_stream); \ + } while(0) + +#define util_dump_struct_array(_stream, _type, _obj, _size) \ + do { \ + size_t idx; \ + util_dump_array_begin(_stream); \ + for(idx = 0; idx < (_size); ++idx) { \ + util_dump_elem_begin(_stream); \ + util_dump_##_type(_stream, &(_obj)[idx]); \ + util_dump_elem_end(_stream); \ + } \ + util_dump_array_end(_stream); \ + } while(0) + +#define util_dump_member(_stream, _type, _obj, _member) \ + do { \ + util_dump_member_begin(_stream, #_member); \ + util_dump_##_type(_stream, (_obj)->_member); \ + util_dump_member_end(_stream); \ + } while(0) + +#define util_dump_arg_array(_stream, _type, _arg, _size) \ + do { \ + util_dump_arg_begin(_stream, #_arg); \ + util_dump_array(_stream, _type, _arg, _size); \ + util_dump_arg_end(_stream); \ + } while(0) + +#define util_dump_member_array(_stream, _type, _obj, _member) \ + do { \ + util_dump_member_begin(_stream, #_member); \ + util_dump_array(_stream, _type, (_obj)->_member, sizeof((_obj)->_member)/sizeof((_obj)->_member[0])); \ + util_dump_member_end(_stream); \ + } while(0) + + + +/* + * Wrappers for enum -> string dumpers. + */ + + +static void +util_dump_format(struct os_stream *stream, enum pipe_format format) +{ + util_dump_enum(stream, util_format_name(format)); +} + + +static void +util_dump_enum_blend_factor(struct os_stream *stream, unsigned value) +{ + util_dump_enum(stream, util_dump_blend_factor(value, TRUE)); +} + +static void +util_dump_enum_blend_func(struct os_stream *stream, unsigned value) +{ + util_dump_enum(stream, util_dump_blend_func(value, TRUE)); +} + +static void +util_dump_enum_func(struct os_stream *stream, unsigned value) +{ + util_dump_enum(stream, util_dump_func(value, TRUE)); +} + + +/* + * Public functions + */ + + +void +util_dump_template(struct os_stream *stream, const struct pipe_texture *templat) +{ + if(!templat) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_texture"); + + util_dump_member(stream, int, templat, target); + util_dump_member(stream, format, templat, format); + + util_dump_member_begin(stream, "width"); + util_dump_uint(stream, templat->width0); + util_dump_member_end(stream); + + util_dump_member_begin(stream, "height"); + util_dump_uint(stream, templat->height0); + util_dump_member_end(stream); + + util_dump_member_begin(stream, "depth"); + util_dump_uint(stream, templat->depth0); + util_dump_member_end(stream); + + util_dump_member(stream, uint, templat, last_level); + util_dump_member(stream, uint, templat, tex_usage); + + util_dump_struct_end(stream); +} + + +void +util_dump_rasterizer_state(struct os_stream *stream, const struct pipe_rasterizer_state *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_rasterizer_state"); + + util_dump_member(stream, bool, state, flatshade); + util_dump_member(stream, bool, state, light_twoside); + util_dump_member(stream, uint, state, front_winding); + util_dump_member(stream, uint, state, cull_mode); + util_dump_member(stream, uint, state, fill_cw); + util_dump_member(stream, uint, state, fill_ccw); + util_dump_member(stream, bool, state, offset_cw); + util_dump_member(stream, bool, state, offset_ccw); + util_dump_member(stream, bool, state, scissor); + util_dump_member(stream, bool, state, poly_smooth); + util_dump_member(stream, bool, state, poly_stipple_enable); + util_dump_member(stream, bool, state, point_smooth); + util_dump_member(stream, uint, state, sprite_coord_enable); + util_dump_member(stream, bool, state, sprite_coord_mode); + util_dump_member(stream, bool, state, point_quad_rasterization); + util_dump_member(stream, bool, state, point_size_per_vertex); + util_dump_member(stream, bool, state, multisample); + util_dump_member(stream, bool, state, line_smooth); + util_dump_member(stream, bool, state, line_stipple_enable); + util_dump_member(stream, uint, state, line_stipple_factor); + util_dump_member(stream, uint, state, line_stipple_pattern); + util_dump_member(stream, bool, state, line_last_pixel); + util_dump_member(stream, bool, state, bypass_vs_clip_and_viewport); + util_dump_member(stream, bool, state, flatshade_first); + util_dump_member(stream, bool, state, gl_rasterization_rules); + + util_dump_member(stream, float, state, line_width); + util_dump_member(stream, float, state, point_size); + util_dump_member(stream, float, state, offset_units); + util_dump_member(stream, float, state, offset_scale); + + util_dump_struct_end(stream); +} + + +void +util_dump_poly_stipple(struct os_stream *stream, const struct pipe_poly_stipple *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_poly_stipple"); + + util_dump_member_begin(stream, "stipple"); + util_dump_member_array(stream, uint, state, stipple); + util_dump_member_end(stream); + + util_dump_struct_end(stream); +} + + +void +util_dump_viewport_state(struct os_stream *stream, const struct pipe_viewport_state *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_viewport_state"); + + util_dump_member_array(stream, float, state, scale); + util_dump_member_array(stream, float, state, translate); + + util_dump_struct_end(stream); +} + + +void +util_dump_scissor_state(struct os_stream *stream, const struct pipe_scissor_state *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_scissor_state"); + + util_dump_member(stream, uint, state, minx); + util_dump_member(stream, uint, state, miny); + util_dump_member(stream, uint, state, maxx); + util_dump_member(stream, uint, state, maxy); + + util_dump_struct_end(stream); +} + + +void +util_dump_clip_state(struct os_stream *stream, const struct pipe_clip_state *state) +{ + unsigned i; + + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_clip_state"); + + util_dump_member_begin(stream, "ucp"); + util_dump_array_begin(stream); + for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) { + util_dump_elem_begin(stream); + util_dump_array(stream, float, state->ucp[i], 4); + util_dump_elem_end(stream); + } + util_dump_array_end(stream); + util_dump_member_end(stream); + + util_dump_member(stream, uint, state, nr); + + util_dump_struct_end(stream); +} + + +void +util_dump_shader_state(struct os_stream *stream, const struct pipe_shader_state *state) +{ + char str[8192]; + + if(!state) { + util_dump_null(stream); + return; + } + + tgsi_dump_str(state->tokens, 0, str, sizeof(str)); + + util_dump_struct_begin(stream, "pipe_shader_state"); + + util_dump_member_begin(stream, "tokens"); + util_dump_string(stream, str); + util_dump_member_end(stream); + + util_dump_struct_end(stream); +} + + +void +util_dump_depth_stencil_alpha_state(struct os_stream *stream, const struct pipe_depth_stencil_alpha_state *state) +{ + unsigned i; + + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_depth_stencil_alpha_state"); + + util_dump_member_begin(stream, "depth"); + util_dump_struct_begin(stream, "pipe_depth_state"); + util_dump_member(stream, bool, &state->depth, enabled); + util_dump_member(stream, bool, &state->depth, writemask); + util_dump_member(stream, uint, &state->depth, func); + util_dump_struct_end(stream); + util_dump_member_end(stream); + + util_dump_member_begin(stream, "stencil"); + util_dump_array_begin(stream); + for(i = 0; i < Elements(state->stencil); ++i) { + util_dump_elem_begin(stream); + util_dump_struct_begin(stream, "pipe_stencil_state"); + util_dump_member(stream, bool, &state->stencil[i], enabled); + util_dump_member(stream, enum_func, &state->stencil[i], func); + util_dump_member(stream, uint, &state->stencil[i], fail_op); + util_dump_member(stream, uint, &state->stencil[i], zpass_op); + util_dump_member(stream, uint, &state->stencil[i], zfail_op); + util_dump_member(stream, uint, &state->stencil[i], valuemask); + util_dump_member(stream, uint, &state->stencil[i], writemask); + util_dump_struct_end(stream); + util_dump_elem_end(stream); + } + util_dump_array_end(stream); + util_dump_member_end(stream); + + util_dump_member_begin(stream, "alpha"); + util_dump_struct_begin(stream, "pipe_alpha_state"); + util_dump_member(stream, bool, &state->alpha, enabled); + util_dump_member(stream, enum_func, &state->alpha, func); + util_dump_member(stream, float, &state->alpha, ref_value); + util_dump_struct_end(stream); + util_dump_member_end(stream); + + util_dump_struct_end(stream); +} + +void +util_dump_rt_blend_state(struct os_stream *stream, const struct pipe_rt_blend_state *state) +{ + util_dump_struct_begin(stream, "pipe_rt_blend_state"); + + util_dump_member(stream, uint, state, blend_enable); + + util_dump_member(stream, enum_blend_func, state, rgb_func); + util_dump_member(stream, enum_blend_factor, state, rgb_src_factor); + util_dump_member(stream, enum_blend_factor, state, rgb_dst_factor); + + util_dump_member(stream, enum_blend_func, state, alpha_func); + util_dump_member(stream, enum_blend_factor, state, alpha_src_factor); + util_dump_member(stream, enum_blend_factor, state, alpha_dst_factor); + + util_dump_member(stream, uint, state, colormask); + + util_dump_struct_end(stream); +} + +void +util_dump_blend_state(struct os_stream *stream, const struct pipe_blend_state *state) +{ + unsigned valid_entries = 1; + + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_blend_state"); + + util_dump_member(stream, bool, state, dither); + + util_dump_member(stream, bool, state, logicop_enable); + util_dump_member(stream, enum_func, state, logicop_func); + + util_dump_member(stream, bool, state, independent_blend_enable); + + util_dump_member_begin(stream, "rt"); + if (state->independent_blend_enable) + valid_entries = PIPE_MAX_COLOR_BUFS; + util_dump_struct_array(stream, rt_blend_state, state->rt, valid_entries); + util_dump_member_end(stream); + + util_dump_struct_end(stream); +} + + +void +util_dump_blend_color(struct os_stream *stream, const struct pipe_blend_color *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_blend_color"); + + util_dump_member_array(stream, float, state, color); + + util_dump_struct_end(stream); +} + +void +util_dump_stencil_ref(struct os_stream *stream, const struct pipe_stencil_ref *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_stencil_ref"); + + util_dump_member_array(stream, uint, state, ref_value); + + util_dump_struct_end(stream); +} + +void +util_dump_framebuffer_state(struct os_stream *stream, const struct pipe_framebuffer_state *state) +{ + util_dump_struct_begin(stream, "pipe_framebuffer_state"); + + util_dump_member(stream, uint, state, width); + util_dump_member(stream, uint, state, height); + util_dump_member(stream, uint, state, nr_cbufs); + util_dump_member_array(stream, ptr, state, cbufs); + util_dump_member(stream, ptr, state, zsbuf); + + util_dump_struct_end(stream); +} + + +void +util_dump_sampler_state(struct os_stream *stream, const struct pipe_sampler_state *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_sampler_state"); + + util_dump_member(stream, uint, state, wrap_s); + util_dump_member(stream, uint, state, wrap_t); + util_dump_member(stream, uint, state, wrap_r); + util_dump_member(stream, uint, state, min_img_filter); + util_dump_member(stream, uint, state, min_mip_filter); + util_dump_member(stream, uint, state, mag_img_filter); + util_dump_member(stream, uint, state, compare_mode); + util_dump_member(stream, uint, state, compare_func); + util_dump_member(stream, bool, state, normalized_coords); + util_dump_member(stream, uint, state, max_anisotropy); + util_dump_member(stream, float, state, lod_bias); + util_dump_member(stream, float, state, min_lod); + util_dump_member(stream, float, state, max_lod); + util_dump_member_array(stream, float, state, border_color); + + util_dump_struct_end(stream); +} + + +void +util_dump_surface(struct os_stream *stream, const struct pipe_surface *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_surface"); + + util_dump_member(stream, format, state, format); + util_dump_member(stream, uint, state, width); + util_dump_member(stream, uint, state, height); + + util_dump_member(stream, uint, state, layout); + util_dump_member(stream, uint, state, offset); + util_dump_member(stream, uint, state, usage); + + util_dump_member(stream, ptr, state, texture); + util_dump_member(stream, uint, state, face); + util_dump_member(stream, uint, state, level); + util_dump_member(stream, uint, state, zslice); + + util_dump_struct_end(stream); +} + + +void +util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_transfer"); + + util_dump_member(stream, uint, state, width); + util_dump_member(stream, uint, state, height); + + util_dump_member(stream, uint, state, stride); + util_dump_member(stream, uint, state, usage); + + util_dump_member(stream, ptr, state, texture); + util_dump_member(stream, uint, state, face); + util_dump_member(stream, uint, state, level); + util_dump_member(stream, uint, state, zslice); + + util_dump_struct_end(stream); +} + + +void +util_dump_vertex_buffer(struct os_stream *stream, const struct pipe_vertex_buffer *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_vertex_buffer"); + + util_dump_member(stream, uint, state, stride); + util_dump_member(stream, uint, state, max_index); + util_dump_member(stream, uint, state, buffer_offset); + util_dump_member(stream, ptr, state, buffer); + + util_dump_struct_end(stream); +} + + +void +util_dump_vertex_element(struct os_stream *stream, const struct pipe_vertex_element *state) +{ + if(!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_vertex_element"); + + util_dump_member(stream, uint, state, src_offset); + + util_dump_member(stream, uint, state, vertex_buffer_index); + util_dump_member(stream, uint, state, nr_components); + + util_dump_member(stream, format, state, src_format); + + util_dump_struct_end(stream); +} -- cgit v1.2.3 From 270279ac997e2a7b4e57c122b8ae217195271f72 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 14 Feb 2010 23:53:42 +0000 Subject: os: A stream for debug logging. Just a wrapper around os_log_message. Although it would probably make more sense to be the other way around. Also some comment fixes. --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/SConscript | 1 + src/gallium/auxiliary/os/os_stream.h | 4 ++ src/gallium/auxiliary/os/os_stream_log.c | 81 +++++++++++++++++++++++++++++++ src/gallium/auxiliary/os/os_stream_null.c | 2 +- src/gallium/auxiliary/os/os_stream_str.c | 2 +- 6 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 src/gallium/auxiliary/os/os_stream_log.c (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 7e4335f29f4..ff7695150e2 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -49,6 +49,7 @@ C_SOURCES = \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ os/os_misc.c \ + os/os_stream_log.c \ os/os_stream_stdc.c \ os/os_stream_str.c \ os/os_stream_null.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index e13ac9187cb..b531ad2dbd9 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -83,6 +83,7 @@ source = [ 'indices/u_indices_gen.c', 'indices/u_unfilled_gen.c', 'os/os_misc.c', + 'os/os_stream_log.c', 'os/os_stream_stdc.c', 'os/os_stream_str.c', 'os/os_stream_null.c', diff --git a/src/gallium/auxiliary/os/os_stream.h b/src/gallium/auxiliary/os/os_stream.h index 3423b84d691..693a0621e2d 100644 --- a/src/gallium/auxiliary/os/os_stream.h +++ b/src/gallium/auxiliary/os/os_stream.h @@ -99,6 +99,10 @@ struct os_stream * os_null_stream_create(void); +extern struct os_stream * +os_log_stream; + + struct os_stream * os_str_stream_create(size_t initial_size); diff --git a/src/gallium/auxiliary/os/os_stream_log.c b/src/gallium/auxiliary/os/os_stream_log.c new file mode 100644 index 00000000000..7cc2028a22c --- /dev/null +++ b/src/gallium/auxiliary/os/os_stream_log.c @@ -0,0 +1,81 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Debug logging stream implementation. + */ + +#include "os_memory.h" +#include "os_misc.h" +#include "os_stream.h" + + +static void +os_log_stream_close(struct os_stream *stream) +{ + (void)stream; +} + + +static boolean +os_log_stream_write(struct os_stream *stream, const void *data, size_t size) +{ + char *str; + + str = os_malloc(size + 1); + if (!str) + return FALSE; + + memcpy(str, data, size); + str[size] = 0; + + os_log_message(str); + + os_free(str); + + return TRUE; +} + + +static void +os_log_stream_flush(struct os_stream *stream) +{ + (void)stream; +} + + +static struct os_stream +os_log_stream_struct = { + &os_log_stream_close, + &os_log_stream_write, + &os_log_stream_flush +}; + + +struct os_stream * +os_log_stream = &os_log_stream_struct; diff --git a/src/gallium/auxiliary/os/os_stream_null.c b/src/gallium/auxiliary/os/os_stream_null.c index b55ce9c926e..128c4e8f0e0 100644 --- a/src/gallium/auxiliary/os/os_stream_null.c +++ b/src/gallium/auxiliary/os/os_stream_null.c @@ -27,7 +27,7 @@ /** * @file - * Stream implementation for the Windows Display driver. + * Null stream implementation. */ #include "os_memory.h" diff --git a/src/gallium/auxiliary/os/os_stream_str.c b/src/gallium/auxiliary/os/os_stream_str.c index a2884798d63..b5c7270d2ae 100644 --- a/src/gallium/auxiliary/os/os_stream_str.c +++ b/src/gallium/auxiliary/os/os_stream_str.c @@ -27,7 +27,7 @@ /** * @file - * Stream implementation based on the Standard C Library. + * Malloc string stream implementation. */ #include "pipe/p_config.h" -- cgit v1.2.3 From 3c45c4bc44310c1af4f0c06d29eb0a9d39a38837 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 24 Feb 2010 15:11:28 +0000 Subject: util: Cope with the fact that formats in u_format.csv are not ordered. --- src/gallium/auxiliary/Makefile | 1 - src/gallium/auxiliary/SConscript | 1 - src/gallium/auxiliary/util/u_format.c | 45 ----------------- src/gallium/auxiliary/util/u_format_table.py | 75 +++++++++++++++++----------- 4 files changed, 47 insertions(+), 75 deletions(-) delete mode 100644 src/gallium/auxiliary/util/u_format.c (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 916f5f6c91c..2d41fb3dcae 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -105,7 +105,6 @@ C_SOURCES = \ util/u_cpu_detect.c \ util/u_dl.c \ util/u_draw_quad.c \ - util/u_format.c \ util/u_format_access.c \ util/u_format_table.c \ util/u_gen_mipmap.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index b531ad2dbd9..8d2859fa711 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -140,7 +140,6 @@ source = [ 'util/u_dump_state.c', 'util/u_dl.c', 'util/u_draw_quad.c', - 'util/u_format.c', 'util/u_format_access.c', 'util/u_format_table.c', 'util/u_gen_mipmap.c', diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c deleted file mode 100644 index e0724a1a8be..00000000000 --- a/src/gallium/auxiliary/util/u_format.c +++ /dev/null @@ -1,45 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Vmware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "u_format.h" - - -const struct util_format_description * -util_format_description(enum pipe_format format) -{ - const struct util_format_description *desc; - - if (format >= PIPE_FORMAT_COUNT) { - return NULL; - } - - desc = &util_format_description_table[format]; - assert(desc->format == format); - - return desc; -} diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index 05e176d5675..3d0e3a03493 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -87,39 +87,39 @@ def write_format_table(formats): print '#include "u_format.h"' print print 'const struct util_format_description' - print 'util_format_description_table[] = ' - print "{" - print " {" - print " PIPE_FORMAT_NONE," - print " \"PIPE_FORMAT_NONE\"," - print " {0, 0, 0}," - print " 0," - print " 0," - print " 0," - print " 0," - print " {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}}," - print " {0, 0, 0, 0}," - print " 0" - print " }," + print 'util_format_none_description = {' + print " PIPE_FORMAT_NONE," + print " \"PIPE_FORMAT_NONE\"," + print " {0, 0, 0}," + print " 0," + print " 0," + print " 0," + print " 0," + print " {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}}," + print " {0, 0, 0, 0}," + print " 0" + print "};" + print for format in formats: + print 'const struct util_format_description' + print 'util_format_%s_description = {' % (format.short_name(),) + print " %s," % (format.name,) + print " \"%s\"," % (format.name,) + print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size()) + print " %s," % (layout_map(format.layout),) + print " %u,\t/* nr_channels */" % (format.nr_channels(),) + print " %s,\t/* is_array */" % (bool_map(format.is_array()),) + print " %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),) print " {" - print " %s," % (format.name,) - print " \"%s\"," % (format.name,) - print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size()) - print " %s," % (layout_map(format.layout),) - print " %u,\t/* nr_channels */" % (format.nr_channels(),) - print " %s,\t/* is_array */" % (bool_map(format.is_array()),) - print " %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),) - print " {" for i in range(4): type = format.in_types[i] if i < 3: sep = "," else: sep = "" - print " {%s, %s, %u}%s\t/* %s */" % (kind_map[type.kind], bool_map(type.norm), type.size, sep, "xyzw"[i]) - print " }," - print " {" + print " {%s, %s, %u}%s\t/* %s */" % (kind_map[type.kind], bool_map(type.norm), type.size, sep, "xyzw"[i]) + print " }," + print " {" for i in range(4): swizzle = format.out_swizzle[i] if i < 3: @@ -130,11 +130,30 @@ def write_format_table(formats): comment = colorspace_channels_map[format.colorspace][i] except (KeyError, IndexError): comment = 'ignored' - print " %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment) - print " }," - print " %s," % (colorspace_map(format.colorspace),) + print " %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment) print " }," + print " %s," % (colorspace_map(format.colorspace),) + print "};" + print + print "const struct util_format_description *" + print "util_format_description(enum pipe_format format)" + print "{" + print " if (format >= PIPE_FORMAT_COUNT) {" + print " return NULL;" + print " }" + print + print " switch (format) {" + print " case PIPE_FORMAT_NONE:" + print " return &util_format_none_description;" + for format in formats: + print " case %s:" % format.name + print " return &util_format_%s_description;" % (format.short_name(),) + print " default:" + print " assert(0);" + print " return NULL;" + print " }" print "};" + print def main(): -- cgit v1.2.3 From 9beb302212a2afac408016cbd7b93c8b859e4910 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 26 Feb 2010 16:45:22 +0000 Subject: util: Code generate functions to pack and unpack a single pixel. Should work correctly for all pixel formats except SRGB formats. Generated code made much simpler by defining the pixel format as a C structure. For example this is the generated structure for PIPE_FORMAT_B6UG5SR5S_NORM: union util_format_b6ug5sr5s_norm { uint16_t value; struct { int r:5; int g:5; unsigned b:6; } chan; }; Not used everywhere yet because it seems compiled code is slower than bitshift arithmetic by some misterious reason. So we should generate bitshift arithmetic at least for the simple UNORM pixel formats. --- src/gallium/auxiliary/Makefile | 5 + src/gallium/auxiliary/SConscript | 7 + src/gallium/auxiliary/util/.gitignore | 1 + src/gallium/auxiliary/util/u_format_access.py | 163 +-------- src/gallium/auxiliary/util/u_format_pack.py | 484 ++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_tile_soa.py | 10 +- 7 files changed, 507 insertions(+), 164 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_format_pack.py (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 2d41fb3dcae..39e328f5144 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -184,5 +184,10 @@ indices/u_unfilled_gen.c: indices/u_unfilled_gen.py util/u_format_table.c: util/u_format_table.py util/u_format_parse.py util/u_format.csv python util/u_format_table.py util/u_format.csv > $@ +util/u_format_pack.h: util/u_format_pack.py util/u_format_parse.py util/u_format.csv + python util/u_format_pack.py util/u_format.csv > $@ + util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv python util/u_format_access.py util/u_format.csv > $@ + +default: util/u_format_pack.h diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 8d2859fa711..47de50bf3ec 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -28,6 +28,13 @@ env.CodeGenerate( command = 'python $SCRIPT $SOURCE > $TARGET' ) +env.CodeGenerate( + target = File('util/u_format_pack.h').srcnode(), + script = 'util/u_format_pack.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + env.CodeGenerate( target = 'util/u_format_access.c', script = 'util/u_format_access.py', diff --git a/src/gallium/auxiliary/util/.gitignore b/src/gallium/auxiliary/util/.gitignore index 29c586c9b51..448d2f304fb 100644 --- a/src/gallium/auxiliary/util/.gitignore +++ b/src/gallium/auxiliary/util/.gitignore @@ -1,2 +1,3 @@ u_format_access.c u_format_table.c +u_format_pack.h diff --git a/src/gallium/auxiliary/util/u_format_access.py b/src/gallium/auxiliary/util/u_format_access.py index a7a91ea652e..1c9be1b538e 100644 --- a/src/gallium/auxiliary/util/u_format_access.py +++ b/src/gallium/auxiliary/util/u_format_access.py @@ -39,7 +39,7 @@ import sys -from u_format_parse import * +from u_format_pack import * def is_format_supported(format): @@ -94,160 +94,6 @@ def native_type(format): assert False -def intermediate_native_type(bits, sign): - '''Find a native type adequate to hold intermediate results of the request bit size.''' - - bytes = 4 # don't use anything smaller than 32bits - while bytes * 8 < bits: - bytes *= 2 - bits = bytes*8 - - if sign: - return 'int%u_t' % bits - else: - return 'uint%u_t' % bits - - -def get_one_shift(channel): - '''Get the number of the bit that matches unity for this channel.''' - if channel.type == 'FLOAT': - assert False - if not channel.norm: - return 0 - if channel.type == UNSIGNED: - return channel.size - if channel.type == SIGNED: - return channel.size - 1 - if channel.type == FIXED: - return channel.size / 2 - assert False - - -def get_one(channel): - '''Get the value of unity for this channel.''' - if channel.type == 'FLOAT' or not channel.norm: - return 1 - else: - return (1 << get_one_shift(channel)) - 1 - - -def generate_clamp(): - '''Code generate the clamping functions for each type. - - We don't use a macro so that arguments with side effects, - like *src_pixel++ are correctly handled. - ''' - - for suffix, native_type in [ - ('', 'double'), - ('f', 'float'), - ('ui', 'unsigned int'), - ('si', 'int'), - ]: - print 'static INLINE %s' % native_type - print 'clamp%s(%s value, %s lbound, %s ubound)' % (suffix, native_type, native_type, native_type) - print '{' - print ' if(value < lbound)' - print ' return lbound;' - print ' if(value > ubound)' - print ' return ubound;' - print ' return value;' - print '}' - print - - -def clamp_expr(src_channel, dst_channel, dst_native_type, value): - '''Generate the expression to clamp the value in the source type to the - destination type range.''' - - if src_channel == dst_channel: - return value - - # Pick the approriate clamp function - if src_channel.type == FLOAT: - if src_channel.size == 32: - func = 'clampf' - elif src_channel.size == 64: - func = 'clamp' - else: - assert False - elif src_channel.type == UNSIGNED: - func = 'clampui' - elif src_channel.type == SIGNED: - func = 'clampsi' - else: - assert False - - # Clamp floats to [-1, 1] or [0, 1] range - if src_channel.type == FLOAT and dst_channel.norm: - max = 1 - if src_channel.sign and dst_channel.sign: - min = -1 - else: - min = 0 - return '%s(%s, %s, %s)' % (func, value, min, max) - - # FIXME: Also clamp scaled values - - return value - - -def conversion_expr(src_channel, dst_channel, dst_native_type, value): - '''Generate the expression to convert a value between two types.''' - - if src_channel == dst_channel: - return value - - if src_channel.type == FLOAT and dst_channel.type == FLOAT: - return '(%s)%s' % (dst_native_type, value) - - if not src_channel.norm and not dst_channel.norm: - return '(%s)%s' % (dst_native_type, value) - - value = clamp_expr(src_channel, dst_channel, dst_native_type, value) - - if dst_channel.type == FLOAT: - if src_channel.norm: - one = get_one(src_channel) - if src_channel.size <= 23: - scale = '(1.0f/0x%x)' % one - else: - # bigger than single precision mantissa, use double - scale = '(1.0/0x%x)' % one - value = '(%s * %s)' % (value, scale) - return '(%s)%s' % (dst_native_type, value) - - if src_channel.type == FLOAT: - if dst_channel.norm: - dst_one = get_one(dst_channel) - if dst_channel.size <= 23: - scale = '0x%x' % dst_one - else: - # bigger than single precision mantissa, use double - scale = '(double)0x%x' % dst_one - value = '(%s * %s)' % (value, scale) - return '(%s)%s' % (dst_native_type, value) - - if src_channel.type == dst_channel.type: - src_one = get_one(src_channel) - dst_one = get_one(dst_channel) - - if src_one > dst_one and src_channel.norm and dst_channel.norm: - # We can just bitshift - src_shift = get_one_shift(src_channel) - dst_shift = get_one_shift(dst_channel) - value = '(%s >> %s)' % (value, src_shift - dst_shift) - else: - # We need to rescale using an intermediate type big enough to hold the multiplication of both - tmp_native_type = intermediate_native_type(src_channel.size + dst_channel.size, src_channel.sign and dst_channel.sign) - value = '(%s)%s' % (tmp_native_type, value) - value = '%s * 0x%x / 0x%x' % (value, dst_one, src_one) - value = '(%s)%s' % (dst_native_type, value) - return value - - assert False - - def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): '''Generate the function to read pixels from a particular format''' @@ -302,9 +148,10 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): for i in range(4): src_channel = format.channels[i] if names[i]: - value = '(*src_pixel++)' + value = 'src_pixel[%u]' % i value = conversion_expr(src_channel, dst_channel, dst_native_type, value) print ' %s %s = %s;' % (dst_native_type, names[i], value) + print ' src_pixel += %u;' % (format.nr_channels()) else: assert False @@ -454,12 +301,10 @@ def main(): print __doc__.strip() print print '#include "pipe/p_compiler.h"' - print '#include "u_format.h"' print '#include "u_math.h"' + print '#include "u_format_pack.h"' print - generate_clamp() - type = Channel(FLOAT, False, 32) native_type = 'float' suffix = '4f' diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py new file mode 100644 index 00000000000..3f33f7cc021 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_pack.py @@ -0,0 +1,484 @@ +#!/usr/bin/env python + +''' +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Pixel format packing and unpacking functions. + * + * @author Jose Fonseca + */ +''' + + +import sys + +from u_format_parse import * + + +def generate_format_type(format): + '''Generate a structure that describes the format.''' + + print 'union util_format_%s {' % format.short_name() + if format.is_bitmask(): + print ' uint%u_t value;' % (format.block_size(),) + print ' struct {' + for channel in format.channels: + if format.is_bitmask() and not format.is_array(): + if channel.type == VOID: + if channel.size: + print ' unsigned %s:%u;' % (channel.name, channel.size) + elif channel.type == UNSIGNED: + print ' unsigned %s:%u;' % (channel.name, channel.size) + elif channel.type == SIGNED: + print ' int %s:%u;' % (channel.name, channel.size) + else: + assert 0 + else: + assert channel.size % 8 == 0 and is_pot(channel.size) + if channel.type == VOID: + if channel.size: + print ' uint%u_t %s;' % (channel.size, channel.name) + elif channel.type == UNSIGNED: + print ' uint%u_t %s;' % (channel.size, channel.name) + elif channel.type in (SIGNED, FIXED): + print ' int%u_t %s;' % (channel.size, channel.name) + elif channel.type == FLOAT: + if channel.size == 64: + print ' double %s;' % (channel.name) + elif channel.size == 32: + print ' float %s;' % (channel.name) + elif channel.size == 16: + print ' uint16_t %s;' % (channel.name) + else: + assert 0 + else: + assert 0 + print ' } chan;' + print '};' + print + + +def bswap_format(format): + '''Generate a structure that describes the format.''' + + if format.is_bitmask() and not format.is_array(): + print '#ifdef PIPE_ARCH_BIG_ENDIAN' + print ' pixel.value = util_bswap%u(pixel.value);' % format.block_size() + print '#endif' + + +def is_format_supported(format): + '''Determines whether we actually have the plumbing necessary to generate the + to read/write to/from this format.''' + + # FIXME: Ideally we would support any format combination here. + + if format.layout != PLAIN: + return False + + for i in range(4): + channel = format.channels[i] + if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT): + return False + + # We can only read a color from a depth/stencil format if the depth channel is present + if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE: + return False + + return True + + +def native_type(format): + '''Get the native appropriate for a format.''' + + if format.layout == PLAIN: + if not format.is_array(): + # For arithmetic pixel formats return the integer type that matches the whole pixel + return 'uint%u_t' % format.block_size() + else: + # For array pixel formats return the integer type that matches the color channel + type = format.channels[0] + if type.type == UNSIGNED: + return 'uint%u_t' % type.size + elif type.type == SIGNED: + return 'int%u_t' % type.size + elif type.type == FLOAT: + if type.size == 32: + return 'float' + elif type.size == 64: + return 'double' + else: + assert False + else: + assert False + else: + assert False + + +def intermediate_native_type(bits, sign): + '''Find a native type adequate to hold intermediate results of the request bit size.''' + + bytes = 4 # don't use anything smaller than 32bits + while bytes * 8 < bits: + bytes *= 2 + bits = bytes*8 + + if sign: + return 'int%u_t' % bits + else: + return 'uint%u_t' % bits + + +def get_one_shift(type): + '''Get the number of the bit that matches unity for this type.''' + if type.type == 'FLOAT': + assert False + if not type.norm: + return 0 + if type.type == UNSIGNED: + return type.size + if type.type == SIGNED: + return type.size - 1 + if type.type == FIXED: + return type.size / 2 + assert False + + +def get_one(type): + '''Get the value of unity for this type.''' + if type.type == 'FLOAT' or not type.norm: + return 1 + else: + return (1 << get_one_shift(type)) - 1 + + +def generate_clamp(): + '''Code generate the clamping functions for each type. + + We don't use a macro so that arguments with side effects, + like *src_pixel++ are correctly handled. + ''' + + for suffix, native_type in [ + ('', 'double'), + ('f', 'float'), + ('ui', 'unsigned int'), + ('si', 'int'), + ]: + print 'static INLINE %s' % native_type + print 'clamp%s(%s value, %s lbound, %s ubound)' % (suffix, native_type, native_type, native_type) + print '{' + print ' if(value < lbound)' + print ' return lbound;' + print ' if(value > ubound)' + print ' return ubound;' + print ' return value;' + print '}' + print + + +def clamp_expr(src_channel, dst_channel, dst_native_type, value): + '''Generate the expression to clamp the value in the source type to the + destination type range.''' + + if src_channel == dst_channel: + return value + + # Pick the approriate clamp function + if src_channel.type == FLOAT: + if src_channel.size == 32: + func = 'clampf' + elif src_channel.size == 64: + func = 'clamp' + else: + assert False + elif src_channel.type == UNSIGNED: + func = 'clampui' + elif src_channel.type == SIGNED: + func = 'clampsi' + else: + assert False + + src_min = src_channel.min() + src_max = src_channel.max() + dst_min = dst_channel.min() + dst_max = dst_channel.max() + + if src_min < dst_min and src_max > dst_max: + return 'CLAMP(%s, %s, %s)' % (value, dst_min, dst_max) + + if src_max > dst_max: + return 'MIN2(%s, %s)' % (value, dst_max) + + if src_min < dst_min: + return 'MAX2(%s, %s)' % (value, dst_min) + + return value + + +def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True): + '''Generate the expression to convert a value between two types.''' + + if src_channel == dst_channel: + return value + + if src_channel.type == FLOAT and dst_channel.type == FLOAT: + return '(%s)%s' % (dst_native_type, value) + + if not src_channel.norm and not dst_channel.norm: + return '(%s)%s' % (dst_native_type, value) + + if clamp: + value = clamp_expr(src_channel, dst_channel, dst_native_type, value) + + if dst_channel.type == FLOAT: + if src_channel.norm: + one = get_one(src_channel) + if src_channel.size <= 23: + scale = '(1.0f/0x%x)' % one + else: + # bigger than single precision mantissa, use double + scale = '(1.0/0x%x)' % one + value = '(%s * %s)' % (value, scale) + return '(%s)%s' % (dst_native_type, value) + + if src_channel.type == FLOAT: + if dst_channel.norm: + dst_one = get_one(dst_channel) + if dst_channel.size <= 23: + scale = '0x%x' % dst_one + else: + # bigger than single precision mantissa, use double + scale = '(double)0x%x' % dst_one + value = '(%s * %s)' % (value, scale) + return '(%s)%s' % (dst_native_type, value) + + if not src_channel.norm and not dst_channel.norm: + # neither is normalized -- just cast + return '(%s)%s' % (dst_native_type, value) + + if src_channel.type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED): + src_one = get_one(src_channel) + dst_one = get_one(dst_channel) + + if src_one > dst_one and src_channel.norm: + # We can just bitshift + src_shift = get_one_shift(src_channel) + dst_shift = get_one_shift(dst_channel) + value = '(%s >> %s)' % (value, src_shift - dst_shift) + else: + # We need to rescale using an intermediate type big enough to hold the multiplication of both + tmp_native_type = intermediate_native_type(src_channel.size + dst_channel.size, src_channel.sign and dst_channel.sign) + value = '(%s)%s' % (tmp_native_type, value) + value = '(%s * 0x%x / 0x%x)' % (value, dst_one, src_one) + value = '(%s)%s' % (dst_native_type, value) + return value + + assert False + + +def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): + '''Generate the function to unpack pixels from a particular format''' + + name = format.short_name() + + src_native_type = native_type(format) + + print 'static INLINE void' + print 'util_format_%s_unpack_%s(%s *dst, const void *src)' % (name, dst_suffix, dst_native_type) + print '{' + print ' union util_format_%s pixel;' % format.short_name() + print ' memcpy(&pixel, src, sizeof pixel);' + bswap_format(format) + + assert format.layout == PLAIN + + for i in range(4): + swizzle = format.swizzles[i] + if swizzle < 4: + src_channel = format.channels[swizzle] + value = 'pixel.chan.%s' % src_channel.name + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + elif swizzle == SWIZZLE_0: + value = '0' + elif swizzle == SWIZZLE_1: + value = get_one(dst_channel) + elif swizzle == SWIZZLE_NONE: + value = '0' + else: + assert False + if format.colorspace == ZS: + if i == 3: + value = get_one(dst_channel) + elif i >= 1: + value = 'dst[0]' + print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) + + print '}' + print + + +def generate_format_pack(format, src_channel, src_native_type, src_suffix): + '''Generate the function to pack pixels to a particular format''' + + name = format.short_name() + + dst_native_type = native_type(format) + + print 'static INLINE void' + print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type) + print '{' + print ' union util_format_%s pixel;' % format.short_name() + + assert format.layout == PLAIN + + inv_swizzle = format.inv_swizzles() + + for i in range(4): + dst_channel = format.channels[i] + width = dst_channel.size + if inv_swizzle[i] is None: + continue + value = 'rgba'[inv_swizzle[i]] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + if format.colorspace == ZS: + if i == 3: + value = get_one(dst_channel) + elif i >= 1: + value = '0' + print ' pixel.chan.%s = %s;' % (dst_channel.name, value) + + bswap_format(format) + print ' memcpy(dst, &pixel, sizeof pixel);' + print '}' + print + + +def generate_unpack(formats, dst_channel, dst_native_type, dst_suffix): + '''Generate the dispatch function to unpack pixels from any format''' + + for format in formats: + if is_format_supported(format): + generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix) + + print 'static INLINE void' + print 'util_format_unpack_%s(enum pipe_format format, %s *dst, const void *src)' % (dst_suffix, dst_native_type) + print '{' + print ' void (*func)(%s *dst, const void *src);' % dst_native_type + print ' switch(format) {' + for format in formats: + if is_format_supported(format): + print ' case %s:' % format.name + print ' func = &util_format_%s_unpack_%s;' % (format.short_name(), dst_suffix) + print ' break;' + print ' default:' + print ' debug_printf("unsupported format\\n");' + print ' return;' + print ' }' + print ' func(dst, src);' + print '}' + print + + +def generate_pack(formats, src_channel, src_native_type, src_suffix): + '''Generate the dispatch function to pack pixels to any format''' + + for format in formats: + if is_format_supported(format): + generate_format_pack(format, src_channel, src_native_type, src_suffix) + + print 'static INLINE void' + print 'util_format_pack_%s(enum pipe_format format, void *dst, %s r, %s g, %s b, %s a)' % (src_suffix, src_native_type, src_native_type, src_native_type, src_native_type) + print '{' + print ' void (*func)(void *dst, %s r, %s g, %s b, %s a);' % (src_native_type, src_native_type, src_native_type, src_native_type) + print ' switch(format) {' + for format in formats: + if is_format_supported(format): + print ' case %s:' % format.name + print ' func = &util_format_%s_pack_%s;' % (format.short_name(), src_suffix) + print ' break;' + print ' default:' + print ' debug_printf("%s: unsupported format\\n", __FUNCTION__);' + print ' return;' + print ' }' + print ' func(dst, r, g, b, a);' + print '}' + print + + +def main(): + formats = [] + for arg in sys.argv[1:]: + formats.extend(parse(arg)) + + print '/* This file is autogenerated by u_format_pack.py from u_format.csv. Do not edit directly. */' + print + # This will print the copyright message on the top of this file + print __doc__.strip() + + print + print '#ifndef U_FORMAT_PACK_H' + print '#define U_FORMAT_PACK_H' + print + print '#include "pipe/p_compiler.h"' + print '#include "u_math.h"' + print '#include "u_format.h"' + print + + generate_clamp() + + for format in formats: + if format.layout == PLAIN: + generate_format_type(format) + + channel = Channel(FLOAT, False, 32) + native_type = 'float' + suffix = '4f' + + generate_unpack(formats, channel, native_type, suffix) + generate_pack(formats, channel, native_type, suffix) + + channel = Channel(UNSIGNED, True, 8) + native_type = 'uint8_t' + suffix = '4ub' + + generate_unpack(formats, channel, native_type, suffix) + generate_pack(formats, channel, native_type, suffix) + + print + print '#ifdef __cplusplus' + print '}' + print '#endif' + print + print '#endif /* ! U_FORMAT_PACK_H */' + + +if __name__ == '__main__': + main() diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 71f9337422f..13c1a13e87a 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -21,6 +21,7 @@ env.CodeGenerate( # XXX: Our dependency scanner only finds depended modules in relative dirs. env.Depends('lp_tile_soa.c', [ '#src/gallium/auxiliary/util/u_format_parse.py', + '#src/gallium/auxiliary/util/u_format_pack.py', '#src/gallium/auxiliary/util/u_format_access.py', ]) diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index dc44d67d634..00b8d4fc382 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -90,7 +90,7 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): value = '(%s >> %u)' % (value, shift) if shift + width < format.block_size(): value = '(%s & 0x%x)' % (value, mask) - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) print ' %s %s = %s;' % (dst_native_type, names[i], value) shift += width else: @@ -98,7 +98,7 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): src_channel = format.channels[i] if names[i]: value = '(*src_pixel++)' - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) print ' %s %s = %s;' % (dst_native_type, names[i], value) else: assert False @@ -154,7 +154,7 @@ def pack_rgba(format, src_channel, r, g, b, a): if value: dst_channel = format.channels[i] dst_native_type = native_type(format) - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) term = "((%s) << %d)" % (value, shift) if expr: expr = expr + " | " + term @@ -217,7 +217,7 @@ def emit_tile_pixel_write_code(format, src_channel): width = dst_channel.size if inv_swizzle[i] is not None: value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) if shift: value = '(%s << %u)' % (value, shift) print ' pixel |= %s;' % value @@ -228,7 +228,7 @@ def emit_tile_pixel_write_code(format, src_channel): dst_channel = format.channels[i] if inv_swizzle[i] is not None: value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) print ' *dst_pixel++ = %s;' % value else: assert False -- cgit v1.2.3 From af36b050eb2c801faecbd2b239d96df71fec5287 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 3 Mar 2010 10:54:35 +0000 Subject: gallivm: Rename lp_bld_misc -> lp_bld_init. --- src/gallium/auxiliary/Makefile | 2 +- src/gallium/auxiliary/SConscript | 2 +- src/gallium/auxiliary/gallivm/lp_bld_init.cpp | 75 +++++++++++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_init.h | 56 ++++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 75 --------------------------- src/gallium/auxiliary/gallivm/lp_bld_misc.h | 56 -------------------- 6 files changed, 133 insertions(+), 133 deletions(-) create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_init.cpp create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_init.h delete mode 100644 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp delete mode 100644 src/gallium/auxiliary/gallivm/lp_bld_misc.h (limited to 'src/gallium/auxiliary/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 60f9c2ae3c3..1d0930e024b 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -158,7 +158,7 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_type.c GALLIVM_CPP_SOURCES = \ - gallivm/lp_bld_misc.cpp + gallivm/lp_bld_init.cpp GENERATED_SOURCES = \ indices/u_indices_gen.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 47de50bf3ec..f365c4bbdd6 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -192,7 +192,7 @@ if drawllvm: 'gallivm/lp_bld_interp.c', 'gallivm/lp_bld_intr.c', 'gallivm/lp_bld_logic.c', - 'gallivm/lp_bld_misc.cpp', + 'gallivm/lp_bld_init.cpp', 'gallivm/lp_bld_pack.c', 'gallivm/lp_bld_sample.c', 'gallivm/lp_bld_sample_soa.c', diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.cpp b/src/gallium/auxiliary/gallivm/lp_bld_init.cpp new file mode 100644 index 00000000000..6b559db159e --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.cpp @@ -0,0 +1,75 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_config.h" + +#include "lp_bld_init.h" + + +#ifndef LLVM_NATIVE_ARCH + +namespace llvm { + extern void LinkInJIT(); +} + + +void +LLVMLinkInJIT(void) +{ + llvm::LinkInJIT(); +} + + +extern "C" int X86TargetMachineModule; + + +int +LLVMInitializeNativeTarget(void) +{ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + X86TargetMachineModule = 1; +#endif + return 0; +} + + +#endif + + +/* + * Hack to allow the linking of release LLVM static libraries on a debug build. + * + * See also: + * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d + */ +#if defined(_MSC_VER) && defined(_DEBUG) +#include +extern "C" { + _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} +} +#endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h new file mode 100644 index 00000000000..6a8ab8e5db9 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_BLD_INIT_H +#define LP_BLD_INIT_H + + +#include "llvm/Config/config.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifndef LLVM_NATIVE_ARCH + +void +LLVMLinkInJIT(void); + +int +LLVMInitializeNativeTarget(void); + +#endif /* !LLVM_NATIVE_ARCH */ + + +#ifdef __cplusplus +} +#endif + + +#endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp deleted file mode 100644 index 6e79438ead0..00000000000 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "pipe/p_config.h" - -#include "lp_bld_misc.h" - - -#ifndef LLVM_NATIVE_ARCH - -namespace llvm { - extern void LinkInJIT(); -} - - -void -LLVMLinkInJIT(void) -{ - llvm::LinkInJIT(); -} - - -extern "C" int X86TargetMachineModule; - - -int -LLVMInitializeNativeTarget(void) -{ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - X86TargetMachineModule = 1; -#endif - return 0; -} - - -#endif - - -/* - * Hack to allow the linking of release LLVM static libraries on a debug build. - * - * See also: - * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d - */ -#if defined(_MSC_VER) && defined(_DEBUG) -#include -extern "C" { - _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} -} -#endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h deleted file mode 100644 index 0e787e0b9cb..00000000000 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h +++ /dev/null @@ -1,56 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_BLD_MISC_H -#define LP_BLD_MISC_H - - -#include "llvm/Config/config.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -#ifndef LLVM_NATIVE_ARCH - -void -LLVMLinkInJIT(void); - -int -LLVMInitializeNativeTarget(void); - -#endif /* !LLVM_NATIVE_ARCH */ - - -#ifdef __cplusplus -} -#endif - - -#endif /* !LP_BLD_MISC_H */ -- cgit v1.2.3